|
|
9ae3a8 |
From ea939f77fa0b152746821afb017cfef8170e5500 Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Gerd Hoffmann <kraxel@redhat.com>
|
|
|
9ae3a8 |
Date: Wed, 22 Feb 2017 12:36:21 +0100
|
|
|
9ae3a8 |
Subject: [PATCH 03/24] ui/vnc: optimize dirty bitmap tracking
|
|
|
9ae3a8 |
MIME-Version: 1.0
|
|
|
9ae3a8 |
Content-Type: text/plain; charset=UTF-8
|
|
|
9ae3a8 |
Content-Transfer-Encoding: 8bit
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
RH-Author: Gerd Hoffmann <kraxel@redhat.com>
|
|
|
9ae3a8 |
Message-id: <1487766986-6329-4-git-send-email-kraxel@redhat.com>
|
|
|
9ae3a8 |
Patchwork-id: 73979
|
|
|
9ae3a8 |
O-Subject: [RHEL-7.4 qemu-kvm PATCH 3/8] ui/vnc: optimize dirty bitmap tracking
|
|
|
9ae3a8 |
Bugzilla: 1377977
|
|
|
9ae3a8 |
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Marc-André Lureau <mlureau@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
From: Peter Lieven <pl@kamp.de>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
vnc_update_client currently scans the dirty bitmap of each client
|
|
|
9ae3a8 |
bitwise which is a very costly operation if only few bits are dirty.
|
|
|
9ae3a8 |
vnc_refresh_server_surface does almost the same.
|
|
|
9ae3a8 |
this patch optimizes both by utilizing the heavily optimized
|
|
|
9ae3a8 |
function find_next_bit to find the offset of the next dirty
|
|
|
9ae3a8 |
bit in the dirty bitmaps.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The following artifical test (just the bitmap operation part) running
|
|
|
9ae3a8 |
vnc_update_client 65536 times on a 2560x2048 surface illustrates the
|
|
|
9ae3a8 |
performance difference:
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
All bits clean - vnc_update_client_new: 0.07 secs
|
|
|
9ae3a8 |
vnc_update_client_old: 10.98 secs
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
All bits dirty - vnc_update_client_new: 11.26 secs
|
|
|
9ae3a8 |
vnc_update_client_old: 20.19 secs
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Few bits dirty - vnc_update_client_new: 0.08 secs
|
|
|
9ae3a8 |
vnc_update_client_old: 10.98 secs
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
The case for all bits dirty is still rather slow, this
|
|
|
9ae3a8 |
is due to the implementation of find_and_clear_dirty_height.
|
|
|
9ae3a8 |
This will be addresses in a separate patch.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Peter Lieven <pl@kamp.de>
|
|
|
9ae3a8 |
Reviewed-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
|
|
|
9ae3a8 |
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
|
|
|
9ae3a8 |
(cherry picked from commit 12b316d4c173bf07f421ef9dc98ba4b53916066e)
|
|
|
9ae3a8 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
ui/vnc.c | 155 ++++++++++++++++++++++++++++++++++-----------------------------
|
|
|
9ae3a8 |
ui/vnc.h | 4 ++
|
|
|
9ae3a8 |
2 files changed, 88 insertions(+), 71 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/ui/vnc.c b/ui/vnc.c
|
|
|
9ae3a8 |
index 13fb34b..54530a2 100644
|
|
|
9ae3a8 |
--- a/ui/vnc.c
|
|
|
9ae3a8 |
+++ b/ui/vnc.c
|
|
|
9ae3a8 |
@@ -572,6 +572,15 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
|
|
|
9ae3a8 |
ptr += x * VNC_SERVER_FB_BYTES;
|
|
|
9ae3a8 |
return ptr;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
+/* this sets only the visible pixels of a dirty bitmap */
|
|
|
9ae3a8 |
+#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
|
|
|
9ae3a8 |
+ int y;\
|
|
|
9ae3a8 |
+ memset(bitmap, 0x00, sizeof(bitmap));\
|
|
|
9ae3a8 |
+ for (y = 0; y < h; y++) {\
|
|
|
9ae3a8 |
+ bitmap_set(bitmap[y], 0,\
|
|
|
9ae3a8 |
+ DIV_ROUND_UP(w, VNC_DIRTY_PIXELS_PER_BIT));\
|
|
|
9ae3a8 |
+ } \
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
static void vnc_dpy_switch(DisplayChangeListener *dcl,
|
|
|
9ae3a8 |
DisplaySurface *surface)
|
|
|
9ae3a8 |
@@ -597,7 +606,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
|
|
|
9ae3a8 |
qemu_pixman_image_unref(vd->guest.fb);
|
|
|
9ae3a8 |
vd->guest.fb = pixman_image_ref(surface->image);
|
|
|
9ae3a8 |
vd->guest.format = surface->format;
|
|
|
9ae3a8 |
- memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
|
|
|
9ae3a8 |
+ VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
|
|
|
9ae3a8 |
+ surface_width(vd->ds),
|
|
|
9ae3a8 |
+ surface_height(vd->ds));
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
QTAILQ_FOREACH(vs, &vd->clients, next) {
|
|
|
9ae3a8 |
vnc_colordepth(vs);
|
|
|
9ae3a8 |
@@ -605,7 +616,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
|
|
|
9ae3a8 |
if (vs->vd->cursor) {
|
|
|
9ae3a8 |
vnc_cursor_define(vs);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
- memset(vs->dirty, 0xFF, sizeof(vs->dirty));
|
|
|
9ae3a8 |
+ VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
|
|
|
9ae3a8 |
+ surface_width(vd->ds),
|
|
|
9ae3a8 |
+ surface_height(vd->ds));
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
@@ -891,10 +904,9 @@ static int vnc_update_client(VncState *vs, int has_dirty)
|
|
|
9ae3a8 |
VncDisplay *vd = vs->vd;
|
|
|
9ae3a8 |
VncJob *job;
|
|
|
9ae3a8 |
int y;
|
|
|
9ae3a8 |
- int width, height;
|
|
|
9ae3a8 |
+ int height;
|
|
|
9ae3a8 |
int n = 0;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
if (vs->output.offset && !vs->audio_cap && !vs->force_update)
|
|
|
9ae3a8 |
/* kernel send buffers are full -> drop frames to throttle */
|
|
|
9ae3a8 |
return 0;
|
|
|
9ae3a8 |
@@ -910,39 +922,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
job = vnc_job_new(vs);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- width = MIN(pixman_image_get_width(vd->server), vs->client_width);
|
|
|
9ae3a8 |
height = MIN(pixman_image_get_height(vd->server), vs->client_height);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- for (y = 0; y < height; y++) {
|
|
|
9ae3a8 |
- int x;
|
|
|
9ae3a8 |
- int last_x = -1;
|
|
|
9ae3a8 |
- for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
|
|
|
9ae3a8 |
- if (test_and_clear_bit(x, vs->dirty[y])) {
|
|
|
9ae3a8 |
- if (last_x == -1) {
|
|
|
9ae3a8 |
- last_x = x;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- } else {
|
|
|
9ae3a8 |
- if (last_x != -1) {
|
|
|
9ae3a8 |
- int h = find_and_clear_dirty_height(vs, y, last_x, x,
|
|
|
9ae3a8 |
- height);
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- n += vnc_job_add_rect(job,
|
|
|
9ae3a8 |
- last_x * VNC_DIRTY_PIXELS_PER_BIT,
|
|
|
9ae3a8 |
- y,
|
|
|
9ae3a8 |
- (x - last_x) *
|
|
|
9ae3a8 |
- VNC_DIRTY_PIXELS_PER_BIT,
|
|
|
9ae3a8 |
- h);
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- last_x = -1;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- if (last_x != -1) {
|
|
|
9ae3a8 |
- int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
|
|
|
9ae3a8 |
- n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
|
|
|
9ae3a8 |
- y,
|
|
|
9ae3a8 |
- (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
|
|
|
9ae3a8 |
- h);
|
|
|
9ae3a8 |
+ y = 0;
|
|
|
9ae3a8 |
+ for (;;) {
|
|
|
9ae3a8 |
+ int x, h;
|
|
|
9ae3a8 |
+ unsigned long x2;
|
|
|
9ae3a8 |
+ unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
|
|
|
9ae3a8 |
+ height * VNC_DIRTY_BPL(vs),
|
|
|
9ae3a8 |
+ y * VNC_DIRTY_BPL(vs));
|
|
|
9ae3a8 |
+ if (offset == height * VNC_DIRTY_BPL(vs)) {
|
|
|
9ae3a8 |
+ /* no more dirty bits */
|
|
|
9ae3a8 |
+ break;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
+ y = offset / VNC_DIRTY_BPL(vs);
|
|
|
9ae3a8 |
+ x = offset % VNC_DIRTY_BPL(vs);
|
|
|
9ae3a8 |
+ x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
|
|
|
9ae3a8 |
+ VNC_DIRTY_BPL(vs), x);
|
|
|
9ae3a8 |
+ bitmap_clear(vs->dirty[y], x, x2 - x);
|
|
|
9ae3a8 |
+ h = find_and_clear_dirty_height(vs, y, x, x2, height);
|
|
|
9ae3a8 |
+ n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
|
|
|
9ae3a8 |
+ (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
vnc_job_push(job);
|
|
|
9ae3a8 |
@@ -2690,8 +2690,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
|
|
|
9ae3a8 |
int width = pixman_image_get_width(vd->guest.fb);
|
|
|
9ae3a8 |
int height = pixman_image_get_height(vd->guest.fb);
|
|
|
9ae3a8 |
int y;
|
|
|
9ae3a8 |
- uint8_t *guest_row;
|
|
|
9ae3a8 |
- uint8_t *server_row;
|
|
|
9ae3a8 |
+ uint8_t *guest_row0 = NULL, *server_row0;
|
|
|
9ae3a8 |
+ int guest_stride = 0, server_stride;
|
|
|
9ae3a8 |
int cmp_bytes;
|
|
|
9ae3a8 |
VncState *vs;
|
|
|
9ae3a8 |
int has_dirty = 0;
|
|
|
9ae3a8 |
@@ -2716,44 +2716,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
|
|
|
9ae3a8 |
if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
|
|
|
9ae3a8 |
int width = pixman_image_get_width(vd->server);
|
|
|
9ae3a8 |
tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
|
|
|
9ae3a8 |
- server_row = (uint8_t *)pixman_image_get_data(vd->server);
|
|
|
9ae3a8 |
- for (y = 0; y < height; y++) {
|
|
|
9ae3a8 |
- if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
|
|
|
9ae3a8 |
- int x;
|
|
|
9ae3a8 |
- uint8_t *guest_ptr;
|
|
|
9ae3a8 |
- uint8_t *server_ptr;
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
- if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
|
|
|
9ae3a8 |
- qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
|
|
|
9ae3a8 |
- guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
|
|
|
9ae3a8 |
- } else {
|
|
|
9ae3a8 |
- guest_ptr = guest_row;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- server_ptr = server_row;
|
|
|
9ae3a8 |
+ } else {
|
|
|
9ae3a8 |
+ guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
|
|
|
9ae3a8 |
+ guest_stride = pixman_image_get_stride(vd->guest.fb);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
|
|
|
9ae3a8 |
+ server_stride = pixman_image_get_stride(vd->server);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ y = 0;
|
|
|
9ae3a8 |
+ for (;;) {
|
|
|
9ae3a8 |
+ int x;
|
|
|
9ae3a8 |
+ uint8_t *guest_ptr, *server_ptr;
|
|
|
9ae3a8 |
+ unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
|
|
|
9ae3a8 |
+ height * VNC_DIRTY_BPL(&vd->guest),
|
|
|
9ae3a8 |
+ y * VNC_DIRTY_BPL(&vd->guest));
|
|
|
9ae3a8 |
+ if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
|
|
|
9ae3a8 |
+ /* no more dirty bits */
|
|
|
9ae3a8 |
+ break;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ y = offset / VNC_DIRTY_BPL(&vd->guest);
|
|
|
9ae3a8 |
+ x = offset % VNC_DIRTY_BPL(&vd->guest);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
|
|
|
9ae3a8 |
- x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
|
|
|
9ae3a8 |
- server_ptr += cmp_bytes) {
|
|
|
9ae3a8 |
- if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
|
|
|
9ae3a8 |
- vd->guest.dirty[y])) {
|
|
|
9ae3a8 |
- continue;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
|
|
|
9ae3a8 |
- continue;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- memcpy(server_ptr, guest_ptr, cmp_bytes);
|
|
|
9ae3a8 |
- if (!vd->non_adaptive)
|
|
|
9ae3a8 |
- vnc_rect_updated(vd, x, y, &tv;;
|
|
|
9ae3a8 |
- QTAILQ_FOREACH(vs, &vd->clients, next) {
|
|
|
9ae3a8 |
- set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
- has_dirty++;
|
|
|
9ae3a8 |
+ server_ptr = server_row0 + y * server_stride + x * cmp_bytes;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
|
|
|
9ae3a8 |
+ qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
|
|
|
9ae3a8 |
+ guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
|
|
|
9ae3a8 |
+ } else {
|
|
|
9ae3a8 |
+ guest_ptr = guest_row0 + y * guest_stride;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ guest_ptr += x * cmp_bytes;
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ for (; x < DIV_ROUND_UP(width, VNC_DIRTY_PIXELS_PER_BIT);
|
|
|
9ae3a8 |
+ x++, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
|
|
|
9ae3a8 |
+ if (!test_and_clear_bit(x, vd->guest.dirty[y])) {
|
|
|
9ae3a8 |
+ continue;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
|
|
|
9ae3a8 |
+ continue;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ memcpy(server_ptr, guest_ptr, cmp_bytes);
|
|
|
9ae3a8 |
+ if (!vd->non_adaptive) {
|
|
|
9ae3a8 |
+ vnc_rect_updated(vd, x * VNC_DIRTY_PIXELS_PER_BIT,
|
|
|
9ae3a8 |
+ y, &tv;;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
+ QTAILQ_FOREACH(vs, &vd->clients, next) {
|
|
|
9ae3a8 |
+ set_bit(x, vs->dirty[y]);
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+ has_dirty++;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
- guest_row += pixman_image_get_stride(vd->guest.fb);
|
|
|
9ae3a8 |
- server_row += pixman_image_get_stride(vd->server);
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
+ y++;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
qemu_pixman_image_unref(tmpbuf);
|
|
|
9ae3a8 |
return has_dirty;
|
|
|
9ae3a8 |
diff --git a/ui/vnc.h b/ui/vnc.h
|
|
|
9ae3a8 |
index 561f383..ebf4bdd 100644
|
|
|
9ae3a8 |
--- a/ui/vnc.h
|
|
|
9ae3a8 |
+++ b/ui/vnc.h
|
|
|
9ae3a8 |
@@ -88,6 +88,10 @@ typedef void VncSendHextileTile(VncState *vs,
|
|
|
9ae3a8 |
/* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
|
|
|
9ae3a8 |
#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
|
|
|
9ae3a8 |
+ * VNC_DIRTY_BITS due to alignment */
|
|
|
9ae3a8 |
+#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
#define VNC_STAT_RECT 64
|
|
|
9ae3a8 |
#define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
|
|
|
9ae3a8 |
#define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.8.3.1
|
|
|
9ae3a8 |
|