From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 17 Mar 2020 17:05:18 +0000 Subject: [PATCH 1/2] migration: Rate limit inside host pages RH-Author: Laurent Vivier Message-id: <20200317170518.9303-1-lvivier@redhat.com> Patchwork-id: 94374 O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages Bugzilla: 1814336 RH-Acked-by: Peter Xu RH-Acked-by: Juan Quintela RH-Acked-by: Dr. David Alan Gilbert From: "Dr. David Alan Gilbert" When using hugepages, rate limiting is necessary within each huge page, since a 1G huge page can take a significant time to send, so you end up with bursty behaviour. Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") Reported-by: Lin Ma Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Signed-off-by: Juan Quintela (cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) Signed-off-by: Laurent Vivier BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 BRANCH: rhel-av-8.2.0 UPSTREAM: Merged BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 TESTED: Tested that the migration abort doesn't trigger an error message in the kernel logs on P9 Signed-off-by: Danilo C. L. de Paula --- migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- migration/migration.h | 1 + migration/ram.c | 2 ++ migration/trace-events | 4 ++-- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index ed18c59..e31d0f5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) qemu_sem_wait(&migrate_get_current()->rate_limit_sem); } +/* Returns true if the rate limiting was broken by an urgent request */ +bool migration_rate_limit(void) +{ + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + MigrationState *s = migrate_get_current(); + + bool urgent = false; + migration_update_counters(s, now); + if (qemu_file_rate_limit(s->to_dst_file)) { + /* + * Wait for a delay to do rate limiting OR + * something urgent to post the semaphore. + */ + int ms = s->iteration_start_time + BUFFER_DELAY - now; + trace_migration_rate_limit_pre(ms); + if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { + /* + * We were woken by one or more urgent things but + * the timedwait will have consumed one of them. + * The service routine for the urgent wake will dec + * the semaphore itself for each item it consumes, + * so add this one we just eat back. + */ + qemu_sem_post(&s->rate_limit_sem); + urgent = true; + } + trace_migration_rate_limit_post(urgent); + } + return urgent; +} + /* * Master migration thread on the source VM. * It drives the migration and pumps the data down the outgoing channel. @@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); while (migration_is_active(s)) { - int64_t current_time; - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { MigIterateState iter_state = migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { @@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) update_iteration_initial_status(s); } - current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - - migration_update_counters(s, current_time); - - urgent = false; - if (qemu_file_rate_limit(s->to_dst_file)) { - /* Wait for a delay to do rate limiting OR - * something urgent to post the semaphore. - */ - int ms = s->iteration_start_time + BUFFER_DELAY - current_time; - trace_migration_thread_ratelimit_pre(ms); - if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { - /* We were worken by one or more urgent things but - * the timedwait will have consumed one of them. - * The service routine for the urgent wake will dec - * the semaphore itself for each item it consumes, - * so add this one we just eat back. - */ - qemu_sem_post(&s->rate_limit_sem); - urgent = true; - } - trace_migration_thread_ratelimit_post(urgent); - } + urgent = migration_rate_limit(); } trace_migration_thread_after_loop(); diff --git a/migration/migration.h b/migration/migration.h index a2b2336..a15e8d8 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; void migration_make_urgent_request(void); void migration_consume_urgent_request(void); +bool migration_rate_limit(void); #endif diff --git a/migration/ram.c b/migration/ram.c index 3891eff..5344c7d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, pages += tmppages; pss->page++; + /* Allow rate limiting to happen in the middle of huge pages */ + migration_rate_limit(); } while ((pss->page & (pagesize_bits - 1)) && offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); diff --git a/migration/trace-events b/migration/trace-events index 6dee7b5..2f9129e 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 migration_completion_file_err(void) "" migration_completion_postcopy_end(void) "" migration_completion_postcopy_end_after_complete(void) "" +migration_rate_limit_pre(int ms) "%d ms" +migration_rate_limit_post(int urgent) "urgent: %d" migration_return_path_end_before(void) "" migration_return_path_end_after(int rp_error) "%d" migration_thread_after_loop(void) "" migration_thread_file_err(void) "" -migration_thread_ratelimit_pre(int ms) "%d ms" -migration_thread_ratelimit_post(int urgent) "urgent: %d" migration_thread_setup_complete(void) "" open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" -- 1.8.3.1