4ec855
From 1cfbcbeebc6d9ca1f1f7656fff572bf6ac50de76 Mon Sep 17 00:00:00 2001
4ec855
From: "plai@redhat.com" <plai@redhat.com>
4ec855
Date: Tue, 26 Nov 2019 19:36:52 +0000
4ec855
Subject: [PATCH 08/11] kvm: support -overcommit cpu-pm=on|off
4ec855
4ec855
RH-Author: plai@redhat.com
4ec855
Message-id: <1574797015-32564-5-git-send-email-plai@redhat.com>
4ec855
Patchwork-id: 92697
4ec855
O-Subject: [RHEL8.2 qemu-kvm PATCH 4/7] kvm: support -overcommit cpu-pm=on|off
4ec855
Bugzilla: 1634827
4ec855
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
4ec855
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
4ec855
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
4ec855
4ec855
From: "Michael S. Tsirkin" <mst@redhat.com>
4ec855
4ec855
With this flag, kvm allows guest to control host CPU power state.  This
4ec855
increases latency for other processes using same host CPU in an
4ec855
unpredictable way, but if decreases idle entry/exit times for the
4ec855
running VCPU, so to use it QEMU needs a hint about whether host CPU is
4ec855
overcommitted, hence the flag name.
4ec855
4ec855
Follow-up patches will expose this capability to guest
4ec855
(using mwait leaf).
4ec855
4ec855
Based on a patch by Wanpeng Li <kernellwp@gmail.com> .
4ec855
4ec855
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
4ec855
Message-Id: <20180622192148.178309-2-mst@redhat.com>
4ec855
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4ec855
(cherry picked from commit 6f131f13e68d648a8e4f083c667ab1acd88ce4cd)
4ec855
Signed-off-by: Paul Lai <plai@redhat.com>
4ec855
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
4ec855
---
4ec855
 include/sysemu/sysemu.h |  1 +
4ec855
 qemu-options.hx         | 24 ++++++++++++++++++++++++
4ec855
 target/i386/kvm.c       | 23 +++++++++++++++++++++++
4ec855
 vl.c                    | 32 +++++++++++++++++++++++++++++++-
4ec855
 4 files changed, 79 insertions(+), 1 deletion(-)
4ec855
4ec855
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
4ec855
index f20e4f5..f38fad0 100644
4ec855
--- a/include/sysemu/sysemu.h
4ec855
+++ b/include/sysemu/sysemu.h
4ec855
@@ -131,6 +131,7 @@ extern bool boot_strict;
4ec855
 extern uint8_t *boot_splash_filedata;
4ec855
 extern size_t boot_splash_filedata_size;
4ec855
 extern bool enable_mlock;
4ec855
+extern bool enable_cpu_pm;
4ec855
 extern uint8_t qemu_extra_params_fw[2];
4ec855
 extern QEMUClockType rtc_clock;
4ec855
 extern const char *mem_path;
4ec855
diff --git a/qemu-options.hx b/qemu-options.hx
4ec855
index 1243057..99933a0 100644
4ec855
--- a/qemu-options.hx
4ec855
+++ b/qemu-options.hx
4ec855
@@ -3331,6 +3331,30 @@ mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on}
4ec855
 (enabled by default).
4ec855
 ETEXI
4ec855
 
4ec855
+DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
4ec855
+    "--overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
4ec855
+    "                run qemu with overcommit hints\n"
4ec855
+    "                mem-lock=on|off controls memory lock support (default: off)\n"
4ec855
+    "                cpu-pm=on|off controls cpu power management (default: off)\n",
4ec855
+    QEMU_ARCH_ALL)
4ec855
+STEXI
4ec855
+@item -overcommit mem-lock=on|off
4ec855
+@item -overcommit cpu-pm=on|off
4ec855
+@findex -overcommit
4ec855
+Run qemu with hints about host resource overcommit. The default is
4ec855
+to assume that host overcommits all resources.
4ec855
+
4ec855
+Locking qemu and guest memory can be enabled via @option{mem-lock=on} (disabled
4ec855
+by default).  This works when host memory is not overcommitted and reduces the
4ec855
+worst-case latency for guest.  This is equivalent to @option{realtime}.
4ec855
+
4ec855
+Guest ability to manage power state of host cpus (increasing latency for other
4ec855
+processes on the same host cpu, but decreasing latency for guest) can be
4ec855
+enabled via @option{cpu-pm=on} (disabled by default).  This works best when
4ec855
+host CPU is not overcommitted. When used, host estimates of CPU cycle and power
4ec855
+utilization will be incorrect, not taking into account guest idle time.
4ec855
+ETEXI
4ec855
+
4ec855
 DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \
4ec855
     "-gdb dev        wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL)
4ec855
 STEXI
4ec855
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
4ec855
index 107c53b..879c3e0 100644
4ec855
--- a/target/i386/kvm.c
4ec855
+++ b/target/i386/kvm.c
4ec855
@@ -1606,6 +1606,29 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
4ec855
         smram_machine_done.notify = register_smram_listener;
4ec855
         qemu_add_machine_init_done_notifier(&smram_machine_done);
4ec855
     }
4ec855
+
4ec855
+    if (enable_cpu_pm) {
4ec855
+        int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS);
4ec855
+        int ret;
4ec855
+
4ec855
+/* Work around for kernel header with a typo. TODO: fix header and drop. */
4ec855
+#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT)
4ec855
+#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL
4ec855
+#endif
4ec855
+        if (disable_exits) {
4ec855
+            disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
4ec855
+                              KVM_X86_DISABLE_EXITS_HLT |
4ec855
+                              KVM_X86_DISABLE_EXITS_PAUSE);
4ec855
+        }
4ec855
+
4ec855
+        ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0,
4ec855
+                                disable_exits);
4ec855
+        if (ret < 0) {
4ec855
+            error_report("kvm: guest stopping CPU not supported: %s",
4ec855
+                         strerror(-ret));
4ec855
+        }
4ec855
+    }
4ec855
+
4ec855
     return 0;
4ec855
 }
4ec855
 
4ec855
diff --git a/vl.c b/vl.c
4ec855
index 932c1cf..aa08ab5 100644
4ec855
--- a/vl.c
4ec855
+++ b/vl.c
4ec855
@@ -150,6 +150,7 @@ ram_addr_t ram_size;
4ec855
 const char *mem_path = NULL;
4ec855
 int mem_prealloc = 0; /* force preallocation of physical target memory */
4ec855
 bool enable_mlock = false;
4ec855
+bool enable_cpu_pm = false;
4ec855
 int nb_nics;
4ec855
 NICInfo nd_table[MAX_NICS];
4ec855
 int autostart;
4ec855
@@ -428,6 +429,22 @@ static QemuOptsList qemu_realtime_opts = {
4ec855
     },
4ec855
 };
4ec855
 
4ec855
+static QemuOptsList qemu_overcommit_opts = {
4ec855
+    .name = "overcommit",
4ec855
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head),
4ec855
+    .desc = {
4ec855
+        {
4ec855
+            .name = "mem-lock",
4ec855
+            .type = QEMU_OPT_BOOL,
4ec855
+        },
4ec855
+        {
4ec855
+            .name = "cpu-pm",
4ec855
+            .type = QEMU_OPT_BOOL,
4ec855
+        },
4ec855
+        { /* end of list */ }
4ec855
+    },
4ec855
+};
4ec855
+
4ec855
 static QemuOptsList qemu_msg_opts = {
4ec855
     .name = "msg",
4ec855
     .head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head),
4ec855
@@ -4089,7 +4106,20 @@ int main(int argc, char **argv, char **envp)
4ec855
                 if (!opts) {
4ec855
                     exit(1);
4ec855
                 }
4ec855
-                enable_mlock = qemu_opt_get_bool(opts, "mlock", true);
4ec855
+                /* Don't override the -overcommit option if set */
4ec855
+                enable_mlock = enable_mlock ||
4ec855
+                    qemu_opt_get_bool(opts, "mlock", true);
4ec855
+                break;
4ec855
+            case QEMU_OPTION_overcommit:
4ec855
+                opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
4ec855
+                                               optarg, false);
4ec855
+                if (!opts) {
4ec855
+                    exit(1);
4ec855
+                }
4ec855
+                /* Don't override the -realtime option if set */
4ec855
+                enable_mlock = enable_mlock ||
4ec855
+                    qemu_opt_get_bool(opts, "mem-lock", false);
4ec855
+                enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false);
4ec855
                 break;
4ec855
             case QEMU_OPTION_msg:
4ec855
                 opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg,
4ec855
-- 
4ec855
1.8.3.1
4ec855