Blame SOURCES/0027-Revert-kvm_stat-Remove.patch

4a2fec
From e0425f69f136a05a59ee5cb7022409ed2be94a0b Mon Sep 17 00:00:00 2001
4a2fec
From: "Danilo C. L. de Paula" <ddepaula@redhat.com>
4a2fec
Date: Mon, 16 Jan 2017 11:52:49 +0100
4a2fec
Subject: Revert "kvm_stat: Remove"
4a2fec
4a2fec
RH-Author: ddepaula <ddepaula@redhat.com>
4a2fec
Message-id: <1479302806-10135-2-git-send-email-ddepaula@redhat.com>
4a2fec
Patchwork-id: 72851
4a2fec
O-Subject: [RHEV-7.4 qemu-kvm-rhev PATCH v3 1/3] Revert "kvm_stat: Remove"
4a2fec
Bugzilla: 1389238
4a2fec
RH-Acked-by: John Snow <jsnow@redhat.com>
4a2fec
RH-Acked-by: David Hildenbrand <david@redhat.com>
4a2fec
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
4a2fec
4a2fec
kvm_stat script was removed in QEMU 2.7.0 as it become part of kernel
4a2fec
tree. However kvm_stat is shipped in qemu-kvm-tools package in RHEL.
4a2fec
4a2fec
This reverts commit 60b412dd18362bd4ddc44ba7022aacb6af074b5d.
4a2fec
4a2fec
Signed-off-by: Danilo Cesar Lemes de Paula <ddepaula@redhat.com>
4a2fec
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
4a2fec
4a2fec
Merged patches (2.9.0):
4a2fec
- 1e69b1b Include kvm_stat in qemu-kvm.spec
4a2fec
- 7fcfc94 tools: kvm_stat: Powerpc related fixes
4a2fec
- 7f89136 tools: kvm_stat: Introduce pid monitoring
4a2fec
- c728a6b tools: kvm_stat: Add comments
4a2fec
- 27fb856 Package man page of "kvm_stat" tool
4a2fec
4a2fec
(cherry picked from commit d4a8e35b84072816c79e23f5d0a69a2145217004)
4a2fec
---
4a2fec
 Makefile                      |    8 +
4a2fec
 redhat/qemu-kvm.spec.template |    7 +-
4a2fec
 scripts/kvm/kvm_stat          | 1127 +++++++++++++++++++++++++++++++++++++++++
4a2fec
 scripts/kvm/kvm_stat.texi     |   55 ++
4a2fec
 4 files changed, 1196 insertions(+), 1 deletion(-)
4a2fec
 create mode 100755 scripts/kvm/kvm_stat
4a2fec
 create mode 100644 scripts/kvm/kvm_stat.texi
4a2fec
4a2fec
diff --git a/Makefile b/Makefile
4a2fec
index ba31124..312ed5e 100644
4a2fec
--- a/Makefile
4a2fec
+++ b/Makefile
4a2fec
@@ -209,6 +209,9 @@ ifdef BUILD_DOCS
4a2fec
 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
4a2fec
 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
4a2fec
 DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
4a2fec
+ifdef CONFIG_LINUX
4a2fec
+DOCS+=kvm_stat.1
4a2fec
+endif
4a2fec
 ifdef CONFIG_VIRTFS
4a2fec
 DOCS+=fsdev/virtfs-proxy-helper.1
4a2fec
 endif
4a2fec
@@ -727,6 +730,11 @@ html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
4a2fec
 info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
4a2fec
 pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
4a2fec
 txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
4a2fec
+kvm_stat.1: scripts/kvm/kvm_stat.texi
4a2fec
+	$(call quiet-command, \
4a2fec
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< kvm_stat.pod && \
4a2fec
+	  $(POD2MAN) --section=1 --center=" " --release=" " kvm_stat.pod > $@, \
4a2fec
+	  "  GEN   $@")
4a2fec
 
4a2fec
 qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
4a2fec
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
4a2fec
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
4a2fec
new file mode 100755
4a2fec
index 0000000..581278c
4a2fec
--- /dev/null
4a2fec
+++ b/scripts/kvm/kvm_stat
4a2fec
@@ -0,0 +1,1127 @@
4a2fec
+#!/usr/bin/python
4a2fec
+#
4a2fec
+# top-like utility for displaying kvm statistics
4a2fec
+#
4a2fec
+# Copyright 2006-2008 Qumranet Technologies
4a2fec
+# Copyright 2008-2011 Red Hat, Inc.
4a2fec
+#
4a2fec
+# Authors:
4a2fec
+#  Avi Kivity <avi@redhat.com>
4a2fec
+#
4a2fec
+# This work is licensed under the terms of the GNU GPL, version 2.  See
4a2fec
+# the COPYING file in the top-level directory.
4a2fec
+"""The kvm_stat module outputs statistics about running KVM VMs
4a2fec
+
4a2fec
+Three different ways of output formatting are available:
4a2fec
+- as a top-like text ui
4a2fec
+- in a key -> value format
4a2fec
+- in an all keys, all values format
4a2fec
+
4a2fec
+The data is sampled from the KVM's debugfs entries and its perf events.
4a2fec
+"""
4a2fec
+
4a2fec
+import curses
4a2fec
+import sys
4a2fec
+import os
4a2fec
+import time
4a2fec
+import optparse
4a2fec
+import ctypes
4a2fec
+import fcntl
4a2fec
+import resource
4a2fec
+import struct
4a2fec
+import re
4a2fec
+from collections import defaultdict
4a2fec
+from time import sleep
4a2fec
+
4a2fec
+VMX_EXIT_REASONS = {
4a2fec
+    'EXCEPTION_NMI':        0,
4a2fec
+    'EXTERNAL_INTERRUPT':   1,
4a2fec
+    'TRIPLE_FAULT':         2,
4a2fec
+    'PENDING_INTERRUPT':    7,
4a2fec
+    'NMI_WINDOW':           8,
4a2fec
+    'TASK_SWITCH':          9,
4a2fec
+    'CPUID':                10,
4a2fec
+    'HLT':                  12,
4a2fec
+    'INVLPG':               14,
4a2fec
+    'RDPMC':                15,
4a2fec
+    'RDTSC':                16,
4a2fec
+    'VMCALL':               18,
4a2fec
+    'VMCLEAR':              19,
4a2fec
+    'VMLAUNCH':             20,
4a2fec
+    'VMPTRLD':              21,
4a2fec
+    'VMPTRST':              22,
4a2fec
+    'VMREAD':               23,
4a2fec
+    'VMRESUME':             24,
4a2fec
+    'VMWRITE':              25,
4a2fec
+    'VMOFF':                26,
4a2fec
+    'VMON':                 27,
4a2fec
+    'CR_ACCESS':            28,
4a2fec
+    'DR_ACCESS':            29,
4a2fec
+    'IO_INSTRUCTION':       30,
4a2fec
+    'MSR_READ':             31,
4a2fec
+    'MSR_WRITE':            32,
4a2fec
+    'INVALID_STATE':        33,
4a2fec
+    'MWAIT_INSTRUCTION':    36,
4a2fec
+    'MONITOR_INSTRUCTION':  39,
4a2fec
+    'PAUSE_INSTRUCTION':    40,
4a2fec
+    'MCE_DURING_VMENTRY':   41,
4a2fec
+    'TPR_BELOW_THRESHOLD':  43,
4a2fec
+    'APIC_ACCESS':          44,
4a2fec
+    'EPT_VIOLATION':        48,
4a2fec
+    'EPT_MISCONFIG':        49,
4a2fec
+    'WBINVD':               54,
4a2fec
+    'XSETBV':               55,
4a2fec
+    'APIC_WRITE':           56,
4a2fec
+    'INVPCID':              58,
4a2fec
+}
4a2fec
+
4a2fec
+SVM_EXIT_REASONS = {
4a2fec
+    'READ_CR0':       0x000,
4a2fec
+    'READ_CR3':       0x003,
4a2fec
+    'READ_CR4':       0x004,
4a2fec
+    'READ_CR8':       0x008,
4a2fec
+    'WRITE_CR0':      0x010,
4a2fec
+    'WRITE_CR3':      0x013,
4a2fec
+    'WRITE_CR4':      0x014,
4a2fec
+    'WRITE_CR8':      0x018,
4a2fec
+    'READ_DR0':       0x020,
4a2fec
+    'READ_DR1':       0x021,
4a2fec
+    'READ_DR2':       0x022,
4a2fec
+    'READ_DR3':       0x023,
4a2fec
+    'READ_DR4':       0x024,
4a2fec
+    'READ_DR5':       0x025,
4a2fec
+    'READ_DR6':       0x026,
4a2fec
+    'READ_DR7':       0x027,
4a2fec
+    'WRITE_DR0':      0x030,
4a2fec
+    'WRITE_DR1':      0x031,
4a2fec
+    'WRITE_DR2':      0x032,
4a2fec
+    'WRITE_DR3':      0x033,
4a2fec
+    'WRITE_DR4':      0x034,
4a2fec
+    'WRITE_DR5':      0x035,
4a2fec
+    'WRITE_DR6':      0x036,
4a2fec
+    'WRITE_DR7':      0x037,
4a2fec
+    'EXCP_BASE':      0x040,
4a2fec
+    'INTR':           0x060,
4a2fec
+    'NMI':            0x061,
4a2fec
+    'SMI':            0x062,
4a2fec
+    'INIT':           0x063,
4a2fec
+    'VINTR':          0x064,
4a2fec
+    'CR0_SEL_WRITE':  0x065,
4a2fec
+    'IDTR_READ':      0x066,
4a2fec
+    'GDTR_READ':      0x067,
4a2fec
+    'LDTR_READ':      0x068,
4a2fec
+    'TR_READ':        0x069,
4a2fec
+    'IDTR_WRITE':     0x06a,
4a2fec
+    'GDTR_WRITE':     0x06b,
4a2fec
+    'LDTR_WRITE':     0x06c,
4a2fec
+    'TR_WRITE':       0x06d,
4a2fec
+    'RDTSC':          0x06e,
4a2fec
+    'RDPMC':          0x06f,
4a2fec
+    'PUSHF':          0x070,
4a2fec
+    'POPF':           0x071,
4a2fec
+    'CPUID':          0x072,
4a2fec
+    'RSM':            0x073,
4a2fec
+    'IRET':           0x074,
4a2fec
+    'SWINT':          0x075,
4a2fec
+    'INVD':           0x076,
4a2fec
+    'PAUSE':          0x077,
4a2fec
+    'HLT':            0x078,
4a2fec
+    'INVLPG':         0x079,
4a2fec
+    'INVLPGA':        0x07a,
4a2fec
+    'IOIO':           0x07b,
4a2fec
+    'MSR':            0x07c,
4a2fec
+    'TASK_SWITCH':    0x07d,
4a2fec
+    'FERR_FREEZE':    0x07e,
4a2fec
+    'SHUTDOWN':       0x07f,
4a2fec
+    'VMRUN':          0x080,
4a2fec
+    'VMMCALL':        0x081,
4a2fec
+    'VMLOAD':         0x082,
4a2fec
+    'VMSAVE':         0x083,
4a2fec
+    'STGI':           0x084,
4a2fec
+    'CLGI':           0x085,
4a2fec
+    'SKINIT':         0x086,
4a2fec
+    'RDTSCP':         0x087,
4a2fec
+    'ICEBP':          0x088,
4a2fec
+    'WBINVD':         0x089,
4a2fec
+    'MONITOR':        0x08a,
4a2fec
+    'MWAIT':          0x08b,
4a2fec
+    'MWAIT_COND':     0x08c,
4a2fec
+    'XSETBV':         0x08d,
4a2fec
+    'NPF':            0x400,
4a2fec
+}
4a2fec
+
4a2fec
+# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
4a2fec
+AARCH64_EXIT_REASONS = {
4a2fec
+    'UNKNOWN':      0x00,
4a2fec
+    'WFI':          0x01,
4a2fec
+    'CP15_32':      0x03,
4a2fec
+    'CP15_64':      0x04,
4a2fec
+    'CP14_MR':      0x05,
4a2fec
+    'CP14_LS':      0x06,
4a2fec
+    'FP_ASIMD':     0x07,
4a2fec
+    'CP10_ID':      0x08,
4a2fec
+    'CP14_64':      0x0C,
4a2fec
+    'ILL_ISS':      0x0E,
4a2fec
+    'SVC32':        0x11,
4a2fec
+    'HVC32':        0x12,
4a2fec
+    'SMC32':        0x13,
4a2fec
+    'SVC64':        0x15,
4a2fec
+    'HVC64':        0x16,
4a2fec
+    'SMC64':        0x17,
4a2fec
+    'SYS64':        0x18,
4a2fec
+    'IABT':         0x20,
4a2fec
+    'IABT_HYP':     0x21,
4a2fec
+    'PC_ALIGN':     0x22,
4a2fec
+    'DABT':         0x24,
4a2fec
+    'DABT_HYP':     0x25,
4a2fec
+    'SP_ALIGN':     0x26,
4a2fec
+    'FP_EXC32':     0x28,
4a2fec
+    'FP_EXC64':     0x2C,
4a2fec
+    'SERROR':       0x2F,
4a2fec
+    'BREAKPT':      0x30,
4a2fec
+    'BREAKPT_HYP':  0x31,
4a2fec
+    'SOFTSTP':      0x32,
4a2fec
+    'SOFTSTP_HYP':  0x33,
4a2fec
+    'WATCHPT':      0x34,
4a2fec
+    'WATCHPT_HYP':  0x35,
4a2fec
+    'BKPT32':       0x38,
4a2fec
+    'VECTOR32':     0x3A,
4a2fec
+    'BRK64':        0x3C,
4a2fec
+}
4a2fec
+
4a2fec
+# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
4a2fec
+USERSPACE_EXIT_REASONS = {
4a2fec
+    'UNKNOWN':          0,
4a2fec
+    'EXCEPTION':        1,
4a2fec
+    'IO':               2,
4a2fec
+    'HYPERCALL':        3,
4a2fec
+    'DEBUG':            4,
4a2fec
+    'HLT':              5,
4a2fec
+    'MMIO':             6,
4a2fec
+    'IRQ_WINDOW_OPEN':  7,
4a2fec
+    'SHUTDOWN':         8,
4a2fec
+    'FAIL_ENTRY':       9,
4a2fec
+    'INTR':             10,
4a2fec
+    'SET_TPR':          11,
4a2fec
+    'TPR_ACCESS':       12,
4a2fec
+    'S390_SIEIC':       13,
4a2fec
+    'S390_RESET':       14,
4a2fec
+    'DCR':              15,
4a2fec
+    'NMI':              16,
4a2fec
+    'INTERNAL_ERROR':   17,
4a2fec
+    'OSI':              18,
4a2fec
+    'PAPR_HCALL':       19,
4a2fec
+    'S390_UCONTROL':    20,
4a2fec
+    'WATCHDOG':         21,
4a2fec
+    'S390_TSCH':        22,
4a2fec
+    'EPR':              23,
4a2fec
+    'SYSTEM_EVENT':     24,
4a2fec
+}
4a2fec
+
4a2fec
+IOCTL_NUMBERS = {
4a2fec
+    'SET_FILTER':  0x40082406,
4a2fec
+    'ENABLE':      0x00002400,
4a2fec
+    'DISABLE':     0x00002401,
4a2fec
+    'RESET':       0x00002403,
4a2fec
+}
4a2fec
+
4a2fec
+class Arch(object):
4a2fec
+    """Encapsulates global architecture specific data.
4a2fec
+
4a2fec
+    Contains the performance event open syscall and ioctl numbers, as
4a2fec
+    well as the VM exit reasons for the architecture it runs on.
4a2fec
+
4a2fec
+    """
4a2fec
+    @staticmethod
4a2fec
+    def get_arch():
4a2fec
+        machine = os.uname()[4]
4a2fec
+
4a2fec
+        if machine.startswith('ppc'):
4a2fec
+            return ArchPPC()
4a2fec
+        elif machine.startswith('aarch64'):
4a2fec
+            return ArchA64()
4a2fec
+        elif machine.startswith('s390'):
4a2fec
+            return ArchS390()
4a2fec
+        else:
4a2fec
+            # X86_64
4a2fec
+            for line in open('/proc/cpuinfo'):
4a2fec
+                if not line.startswith('flags'):
4a2fec
+                    continue
4a2fec
+
4a2fec
+                flags = line.split()
4a2fec
+                if 'vmx' in flags:
4a2fec
+                    return ArchX86(VMX_EXIT_REASONS)
4a2fec
+                if 'svm' in flags:
4a2fec
+                    return ArchX86(SVM_EXIT_REASONS)
4a2fec
+                return
4a2fec
+
4a2fec
+class ArchX86(Arch):
4a2fec
+    def __init__(self, exit_reasons):
4a2fec
+        self.sc_perf_evt_open = 298
4a2fec
+        self.ioctl_numbers = IOCTL_NUMBERS
4a2fec
+        self.exit_reasons = exit_reasons
4a2fec
+
4a2fec
+class ArchPPC(Arch):
4a2fec
+    def __init__(self):
4a2fec
+        self.sc_perf_evt_open = 319
4a2fec
+        self.ioctl_numbers = IOCTL_NUMBERS
4a2fec
+        self.ioctl_numbers['ENABLE'] = 0x20002400
4a2fec
+        self.ioctl_numbers['DISABLE'] = 0x20002401
4a2fec
+        self.ioctl_numbers['RESET'] = 0x20002403
4a2fec
+
4a2fec
+        # PPC comes in 32 and 64 bit and some generated ioctl
4a2fec
+        # numbers depend on the wordsize.
4a2fec
+        char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
4a2fec
+        self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
4a2fec
+        self.exit_reasons = {}
4a2fec
+
4a2fec
+class ArchA64(Arch):
4a2fec
+    def __init__(self):
4a2fec
+        self.sc_perf_evt_open = 241
4a2fec
+        self.ioctl_numbers = IOCTL_NUMBERS
4a2fec
+        self.exit_reasons = AARCH64_EXIT_REASONS
4a2fec
+
4a2fec
+class ArchS390(Arch):
4a2fec
+    def __init__(self):
4a2fec
+        self.sc_perf_evt_open = 331
4a2fec
+        self.ioctl_numbers = IOCTL_NUMBERS
4a2fec
+        self.exit_reasons = None
4a2fec
+
4a2fec
+ARCH = Arch.get_arch()
4a2fec
+
4a2fec
+
4a2fec
+def walkdir(path):
4a2fec
+    """Returns os.walk() data for specified directory.
4a2fec
+
4a2fec
+    As it is only a wrapper it returns the same 3-tuple of (dirpath,
4a2fec
+    dirnames, filenames).
4a2fec
+    """
4a2fec
+    return next(os.walk(path))
4a2fec
+
4a2fec
+
4a2fec
+def parse_int_list(list_string):
4a2fec
+    """Returns an int list from a string of comma separated integers and
4a2fec
+    integer ranges."""
4a2fec
+    integers = []
4a2fec
+    members = list_string.split(',')
4a2fec
+
4a2fec
+    for member in members:
4a2fec
+        if '-' not in member:
4a2fec
+            integers.append(int(member))
4a2fec
+        else:
4a2fec
+            int_range = member.split('-')
4a2fec
+            integers.extend(range(int(int_range[0]),
4a2fec
+                                  int(int_range[1]) + 1))
4a2fec
+
4a2fec
+    return integers
4a2fec
+
4a2fec
+
4a2fec
+def get_online_cpus():
4a2fec
+    """Returns a list of cpu id integers."""
4a2fec
+    with open('/sys/devices/system/cpu/online') as cpu_list:
4a2fec
+        cpu_string = cpu_list.readline()
4a2fec
+        return parse_int_list(cpu_string)
4a2fec
+
4a2fec
+
4a2fec
+def get_filters():
4a2fec
+    """Returns a dict of trace events, their filter ids and
4a2fec
+    the values that can be filtered.
4a2fec
+
4a2fec
+    Trace events can be filtered for special values by setting a
4a2fec
+    filter string via an ioctl. The string normally has the format
4a2fec
+    identifier==value. For each filter a new event will be created, to
4a2fec
+    be able to distinguish the events.
4a2fec
+
4a2fec
+    """
4a2fec
+    filters = {}
4a2fec
+    filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
4a2fec
+    if ARCH.exit_reasons:
4a2fec
+        filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
4a2fec
+    return filters
4a2fec
+
4a2fec
+libc = ctypes.CDLL('libc.so.6', use_errno=True)
4a2fec
+syscall = libc.syscall
4a2fec
+
4a2fec
+class perf_event_attr(ctypes.Structure):
4a2fec
+    """Struct that holds the necessary data to set up a trace event.
4a2fec
+
4a2fec
+    For an extensive explanation see perf_event_open(2) and
4a2fec
+    include/uapi/linux/perf_event.h, struct perf_event_attr
4a2fec
+
4a2fec
+    All fields that are not initialized in the constructor are 0.
4a2fec
+
4a2fec
+    """
4a2fec
+    _fields_ = [('type', ctypes.c_uint32),
4a2fec
+                ('size', ctypes.c_uint32),
4a2fec
+                ('config', ctypes.c_uint64),
4a2fec
+                ('sample_freq', ctypes.c_uint64),
4a2fec
+                ('sample_type', ctypes.c_uint64),
4a2fec
+                ('read_format', ctypes.c_uint64),
4a2fec
+                ('flags', ctypes.c_uint64),
4a2fec
+                ('wakeup_events', ctypes.c_uint32),
4a2fec
+                ('bp_type', ctypes.c_uint32),
4a2fec
+                ('bp_addr', ctypes.c_uint64),
4a2fec
+                ('bp_len', ctypes.c_uint64),
4a2fec
+                ]
4a2fec
+
4a2fec
+    def __init__(self):
4a2fec
+        super(self.__class__, self).__init__()
4a2fec
+        self.type = PERF_TYPE_TRACEPOINT
4a2fec
+        self.size = ctypes.sizeof(self)
4a2fec
+        self.read_format = PERF_FORMAT_GROUP
4a2fec
+
4a2fec
+def perf_event_open(attr, pid, cpu, group_fd, flags):
4a2fec
+    """Wrapper for the sys_perf_evt_open() syscall.
4a2fec
+
4a2fec
+    Used to set up performance events, returns a file descriptor or -1
4a2fec
+    on error.
4a2fec
+
4a2fec
+    Attributes are:
4a2fec
+    - syscall number
4a2fec
+    - struct perf_event_attr *
4a2fec
+    - pid or -1 to monitor all pids
4a2fec
+    - cpu number or -1 to monitor all cpus
4a2fec
+    - The file descriptor of the group leader or -1 to create a group.
4a2fec
+    - flags
4a2fec
+
4a2fec
+    """
4a2fec
+    return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
4a2fec
+                   ctypes.c_int(pid), ctypes.c_int(cpu),
4a2fec
+                   ctypes.c_int(group_fd), ctypes.c_long(flags))
4a2fec
+
4a2fec
+PERF_TYPE_TRACEPOINT = 2
4a2fec
+PERF_FORMAT_GROUP = 1 << 3
4a2fec
+
4a2fec
+PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
4a2fec
+PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
4a2fec
+
4a2fec
+class Group(object):
4a2fec
+    """Represents a perf event group."""
4a2fec
+
4a2fec
+    def __init__(self):
4a2fec
+        self.events = []
4a2fec
+
4a2fec
+    def add_event(self, event):
4a2fec
+        self.events.append(event)
4a2fec
+
4a2fec
+    def read(self):
4a2fec
+        """Returns a dict with 'event name: value' for all events in the
4a2fec
+        group.
4a2fec
+
4a2fec
+        Values are read by reading from the file descriptor of the
4a2fec
+        event that is the group leader. See perf_event_open(2) for
4a2fec
+        details.
4a2fec
+
4a2fec
+        Read format for the used event configuration is:
4a2fec
+        struct read_format {
4a2fec
+            u64 nr; /* The number of events */
4a2fec
+            struct {
4a2fec
+                u64 value; /* The value of the event */
4a2fec
+            } values[nr];
4a2fec
+        };
4a2fec
+
4a2fec
+        """
4a2fec
+        length = 8 * (1 + len(self.events))
4a2fec
+        read_format = 'xxxxxxxx' + 'Q' * len(self.events)
4a2fec
+        return dict(zip([event.name for event in self.events],
4a2fec
+                        struct.unpack(read_format,
4a2fec
+                                      os.read(self.events[0].fd, length))))
4a2fec
+
4a2fec
+class Event(object):
4a2fec
+    """Represents a performance event and manages its life cycle."""
4a2fec
+    def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
4a2fec
+                 trace_filter, trace_set='kvm'):
4a2fec
+        self.name = name
4a2fec
+        self.fd = None
4a2fec
+        self.setup_event(group, trace_cpu, trace_pid, trace_point,
4a2fec
+                         trace_filter, trace_set)
4a2fec
+
4a2fec
+    def __del__(self):
4a2fec
+        """Closes the event's file descriptor.
4a2fec
+
4a2fec
+        As no python file object was created for the file descriptor,
4a2fec
+        python will not reference count the descriptor and will not
4a2fec
+        close it itself automatically, so we do it.
4a2fec
+
4a2fec
+        """
4a2fec
+        if self.fd:
4a2fec
+            os.close(self.fd)
4a2fec
+
4a2fec
+    def setup_event_attribute(self, trace_set, trace_point):
4a2fec
+        """Returns an initialized ctype perf_event_attr struct."""
4a2fec
+
4a2fec
+        id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
4a2fec
+                               trace_point, 'id')
4a2fec
+
4a2fec
+        event_attr = perf_event_attr()
4a2fec
+        event_attr.config = int(open(id_path).read())
4a2fec
+        return event_attr
4a2fec
+
4a2fec
+    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
4a2fec
+                    trace_filter, trace_set):
4a2fec
+        """Sets up the perf event in Linux.
4a2fec
+
4a2fec
+        Issues the syscall to register the event in the kernel and
4a2fec
+        then sets the optional filter.
4a2fec
+
4a2fec
+        """
4a2fec
+
4a2fec
+        event_attr = self.setup_event_attribute(trace_set, trace_point)
4a2fec
+
4a2fec
+        # First event will be group leader.
4a2fec
+        group_leader = -1
4a2fec
+
4a2fec
+        # All others have to pass the leader's descriptor instead.
4a2fec
+        if group.events:
4a2fec
+            group_leader = group.events[0].fd
4a2fec
+
4a2fec
+        fd = perf_event_open(event_attr, trace_pid,
4a2fec
+                             trace_cpu, group_leader, 0)
4a2fec
+        if fd == -1:
4a2fec
+            err = ctypes.get_errno()
4a2fec
+            raise OSError(err, os.strerror(err),
4a2fec
+                          'while calling sys_perf_event_open().')
4a2fec
+
4a2fec
+        if trace_filter:
4a2fec
+            fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
4a2fec
+                        trace_filter)
4a2fec
+
4a2fec
+        self.fd = fd
4a2fec
+
4a2fec
+    def enable(self):
4a2fec
+        """Enables the trace event in the kernel.
4a2fec
+
4a2fec
+        Enabling the group leader makes reading counters from it and the
4a2fec
+        events under it possible.
4a2fec
+
4a2fec
+        """
4a2fec
+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
4a2fec
+
4a2fec
+    def disable(self):
4a2fec
+        """Disables the trace event in the kernel.
4a2fec
+
4a2fec
+        Disabling the group leader makes reading all counters under it
4a2fec
+        impossible.
4a2fec
+
4a2fec
+        """
4a2fec
+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
4a2fec
+
4a2fec
+    def reset(self):
4a2fec
+        """Resets the count of the trace event in the kernel."""
4a2fec
+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
4a2fec
+
4a2fec
+class TracepointProvider(object):
4a2fec
+    """Data provider for the stats class.
4a2fec
+
4a2fec
+    Manages the events/groups from which it acquires its data.
4a2fec
+
4a2fec
+    """
4a2fec
+    def __init__(self):
4a2fec
+        self.group_leaders = []
4a2fec
+        self.filters = get_filters()
4a2fec
+        self._fields = self.get_available_fields()
4a2fec
+        self._pid = 0
4a2fec
+
4a2fec
+    def get_available_fields(self):
4a2fec
+        """Returns a list of available event's of format 'event name(filter
4a2fec
+        name)'.
4a2fec
+
4a2fec
+        All available events have directories under
4a2fec
+        /sys/kernel/debug/tracing/events/ which export information
4a2fec
+        about the specific event. Therefore, listing the dirs gives us
4a2fec
+        a list of all available events.
4a2fec
+
4a2fec
+        Some events like the vm exit reasons can be filtered for
4a2fec
+        specific values. To take account for that, the routine below
4a2fec
+        creates special fields with the following format:
4a2fec
+        event name(filter name)
4a2fec
+
4a2fec
+        """
4a2fec
+        path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
4a2fec
+        fields = walkdir(path)[1]
4a2fec
+        extra = []
4a2fec
+        for field in fields:
4a2fec
+            if field in self.filters:
4a2fec
+                filter_name_, filter_dicts = self.filters[field]
4a2fec
+                for name in filter_dicts:
4a2fec
+                    extra.append(field + '(' + name + ')')
4a2fec
+        fields += extra
4a2fec
+        return fields
4a2fec
+
4a2fec
+    def setup_traces(self):
4a2fec
+        """Creates all event and group objects needed to be able to retrieve
4a2fec
+        data."""
4a2fec
+        if self._pid > 0:
4a2fec
+            # Fetch list of all threads of the monitored pid, as qemu
4a2fec
+            # starts a thread for each vcpu.
4a2fec
+            path = os.path.join('/proc', str(self._pid), 'task')
4a2fec
+            groupids = walkdir(path)[1]
4a2fec
+        else:
4a2fec
+            groupids = get_online_cpus()
4a2fec
+
4a2fec
+        # The constant is needed as a buffer for python libs, std
4a2fec
+        # streams and other files that the script opens.
4a2fec
+        newlim = len(groupids) * len(self._fields) + 50
4a2fec
+        try:
4a2fec
+            softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
4a2fec
+
4a2fec
+            if hardlim < newlim:
4a2fec
+                # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
4a2fec
+                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
4a2fec
+            else:
4a2fec
+                # Raising the soft limit is sufficient.
4a2fec
+                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
4a2fec
+
4a2fec
+        except ValueError:
4a2fec
+            sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
4a2fec
+
4a2fec
+        for groupid in groupids:
4a2fec
+            group = Group()
4a2fec
+            for name in self._fields:
4a2fec
+                tracepoint = name
4a2fec
+                tracefilter = None
4a2fec
+                match = re.match(r'(.*)\((.*)\)', name)
4a2fec
+                if match:
4a2fec
+                    tracepoint, sub = match.groups()
4a2fec
+                    tracefilter = ('%s==%d\0' %
4a2fec
+                                   (self.filters[tracepoint][0],
4a2fec
+                                    self.filters[tracepoint][1][sub]))
4a2fec
+
4a2fec
+                # From perf_event_open(2):
4a2fec
+                # pid > 0 and cpu == -1
4a2fec
+                # This measures the specified process/thread on any CPU.
4a2fec
+                #
4a2fec
+                # pid == -1 and cpu >= 0
4a2fec
+                # This measures all processes/threads on the specified CPU.
4a2fec
+                trace_cpu = groupid if self._pid == 0 else -1
4a2fec
+                trace_pid = int(groupid) if self._pid != 0 else -1
4a2fec
+
4a2fec
+                group.add_event(Event(name=name,
4a2fec
+                                      group=group,
4a2fec
+                                      trace_cpu=trace_cpu,
4a2fec
+                                      trace_pid=trace_pid,
4a2fec
+                                      trace_point=tracepoint,
4a2fec
+                                      trace_filter=tracefilter))
4a2fec
+
4a2fec
+            self.group_leaders.append(group)
4a2fec
+
4a2fec
+    def available_fields(self):
4a2fec
+        return self.get_available_fields()
4a2fec
+
4a2fec
+    @property
4a2fec
+    def fields(self):
4a2fec
+        return self._fields
4a2fec
+
4a2fec
+    @fields.setter
4a2fec
+    def fields(self, fields):
4a2fec
+        """Enables/disables the (un)wanted events"""
4a2fec
+        self._fields = fields
4a2fec
+        for group in self.group_leaders:
4a2fec
+            for index, event in enumerate(group.events):
4a2fec
+                if event.name in fields:
4a2fec
+                    event.reset()
4a2fec
+                    event.enable()
4a2fec
+                else:
4a2fec
+                    # Do not disable the group leader.
4a2fec
+                    # It would disable all of its events.
4a2fec
+                    if index != 0:
4a2fec
+                        event.disable()
4a2fec
+
4a2fec
+    @property
4a2fec
+    def pid(self):
4a2fec
+        return self._pid
4a2fec
+
4a2fec
+    @pid.setter
4a2fec
+    def pid(self, pid):
4a2fec
+        """Changes the monitored pid by setting new traces."""
4a2fec
+        self._pid = pid
4a2fec
+        # The garbage collector will get rid of all Event/Group
4a2fec
+        # objects and open files after removing the references.
4a2fec
+        self.group_leaders = []
4a2fec
+        self.setup_traces()
4a2fec
+        self.fields = self._fields
4a2fec
+
4a2fec
+    def read(self):
4a2fec
+        """Returns 'event name: current value' for all enabled events."""
4a2fec
+        ret = defaultdict(int)
4a2fec
+        for group in self.group_leaders:
4a2fec
+            for name, val in group.read().iteritems():
4a2fec
+                if name in self._fields:
4a2fec
+                    ret[name] += val
4a2fec
+        return ret
4a2fec
+
4a2fec
+class DebugfsProvider(object):
4a2fec
+    """Provides data from the files that KVM creates in the kvm debugfs
4a2fec
+    folder."""
4a2fec
+    def __init__(self):
4a2fec
+        self._fields = self.get_available_fields()
4a2fec
+        self._pid = 0
4a2fec
+        self.do_read = True
4a2fec
+
4a2fec
+    def get_available_fields(self):
4a2fec
+        """"Returns a list of available fields.
4a2fec
+
4a2fec
+        The fields are all available KVM debugfs files
4a2fec
+
4a2fec
+        """
4a2fec
+        return walkdir(PATH_DEBUGFS_KVM)[2]
4a2fec
+
4a2fec
+    @property
4a2fec
+    def fields(self):
4a2fec
+        return self._fields
4a2fec
+
4a2fec
+    @fields.setter
4a2fec
+    def fields(self, fields):
4a2fec
+        self._fields = fields
4a2fec
+
4a2fec
+    @property
4a2fec
+    def pid(self):
4a2fec
+        return self._pid
4a2fec
+
4a2fec
+    @pid.setter
4a2fec
+    def pid(self, pid):
4a2fec
+        if pid != 0:
4a2fec
+            self._pid = pid
4a2fec
+
4a2fec
+            vms = walkdir(PATH_DEBUGFS_KVM)[1]
4a2fec
+            if len(vms) == 0:
4a2fec
+                self.do_read = False
4a2fec
+
4a2fec
+            self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
4a2fec
+
4a2fec
+        else:
4a2fec
+            self.paths = ['']
4a2fec
+            self.do_read = True
4a2fec
+
4a2fec
+    def read(self):
4a2fec
+        """Returns a dict with format:'file name / field -> current value'."""
4a2fec
+        results = {}
4a2fec
+
4a2fec
+        # If no debugfs filtering support is available, then don't read.
4a2fec
+        if not self.do_read:
4a2fec
+            return results
4a2fec
+
4a2fec
+        for path in self.paths:
4a2fec
+            for field in self._fields:
4a2fec
+                results[field] = results.get(field, 0) \
4a2fec
+                                 + self.read_field(field, path)
4a2fec
+
4a2fec
+        return results
4a2fec
+
4a2fec
+    def read_field(self, field, path):
4a2fec
+        """Returns the value of a single field from a specific VM."""
4a2fec
+        try:
4a2fec
+            return int(open(os.path.join(PATH_DEBUGFS_KVM,
4a2fec
+                                         path,
4a2fec
+                                         field))
4a2fec
+                       .read())
4a2fec
+        except IOError:
4a2fec
+            return 0
4a2fec
+
4a2fec
+class Stats(object):
4a2fec
+    """Manages the data providers and the data they provide.
4a2fec
+
4a2fec
+    It is used to set filters on the provider's data and collect all
4a2fec
+    provider data.
4a2fec
+
4a2fec
+    """
4a2fec
+    def __init__(self, providers, pid, fields=None):
4a2fec
+        self.providers = providers
4a2fec
+        self._pid_filter = pid
4a2fec
+        self._fields_filter = fields
4a2fec
+        self.values = {}
4a2fec
+        self.update_provider_pid()
4a2fec
+        self.update_provider_filters()
4a2fec
+
4a2fec
+    def update_provider_filters(self):
4a2fec
+        """Propagates fields filters to providers."""
4a2fec
+        def wanted(key):
4a2fec
+            if not self._fields_filter:
4a2fec
+                return True
4a2fec
+            return re.match(self._fields_filter, key) is not None
4a2fec
+
4a2fec
+        # As we reset the counters when updating the fields we can
4a2fec
+        # also clear the cache of old values.
4a2fec
+        self.values = {}
4a2fec
+        for provider in self.providers:
4a2fec
+            provider_fields = [key for key in provider.get_available_fields()
4a2fec
+                               if wanted(key)]
4a2fec
+            provider.fields = provider_fields
4a2fec
+
4a2fec
+    def update_provider_pid(self):
4a2fec
+        """Propagates pid filters to providers."""
4a2fec
+        for provider in self.providers:
4a2fec
+            provider.pid = self._pid_filter
4a2fec
+
4a2fec
+    @property
4a2fec
+    def fields_filter(self):
4a2fec
+        return self._fields_filter
4a2fec
+
4a2fec
+    @fields_filter.setter
4a2fec
+    def fields_filter(self, fields_filter):
4a2fec
+        self._fields_filter = fields_filter
4a2fec
+        self.update_provider_filters()
4a2fec
+
4a2fec
+    @property
4a2fec
+    def pid_filter(self):
4a2fec
+        return self._pid_filter
4a2fec
+
4a2fec
+    @pid_filter.setter
4a2fec
+    def pid_filter(self, pid):
4a2fec
+        self._pid_filter = pid
4a2fec
+        self.values = {}
4a2fec
+        self.update_provider_pid()
4a2fec
+
4a2fec
+    def get(self):
4a2fec
+        """Returns a dict with field -> (value, delta to last value) of all
4a2fec
+        provider data."""
4a2fec
+        for provider in self.providers:
4a2fec
+            new = provider.read()
4a2fec
+            for key in provider.fields:
4a2fec
+                oldval = self.values.get(key, (0, 0))
4a2fec
+                newval = new.get(key, 0)
4a2fec
+                newdelta = None
4a2fec
+                if oldval is not None:
4a2fec
+                    newdelta = newval - oldval[0]
4a2fec
+                self.values[key] = (newval, newdelta)
4a2fec
+        return self.values
4a2fec
+
4a2fec
+LABEL_WIDTH = 40
4a2fec
+NUMBER_WIDTH = 10
4a2fec
+
4a2fec
+class Tui(object):
4a2fec
+    """Instruments curses to draw a nice text ui."""
4a2fec
+    def __init__(self, stats):
4a2fec
+        self.stats = stats
4a2fec
+        self.screen = None
4a2fec
+        self.drilldown = False
4a2fec
+        self.update_drilldown()
4a2fec
+
4a2fec
+    def __enter__(self):
4a2fec
+        """Initialises curses for later use.  Based on curses.wrapper
4a2fec
+           implementation from the Python standard library."""
4a2fec
+        self.screen = curses.initscr()
4a2fec
+        curses.noecho()
4a2fec
+        curses.cbreak()
4a2fec
+
4a2fec
+        # The try/catch works around a minor bit of
4a2fec
+        # over-conscientiousness in the curses module, the error
4a2fec
+        # return from C start_color() is ignorable.
4a2fec
+        try:
4a2fec
+            curses.start_color()
4a2fec
+        except:
4a2fec
+            pass
4a2fec
+
4a2fec
+        curses.use_default_colors()
4a2fec
+        return self
4a2fec
+
4a2fec
+    def __exit__(self, *exception):
4a2fec
+        """Resets the terminal to its normal state.  Based on curses.wrappre
4a2fec
+           implementation from the Python standard library."""
4a2fec
+        if self.screen:
4a2fec
+            self.screen.keypad(0)
4a2fec
+            curses.echo()
4a2fec
+            curses.nocbreak()
4a2fec
+            curses.endwin()
4a2fec
+
4a2fec
+    def update_drilldown(self):
4a2fec
+        """Sets or removes a filter that only allows fields without braces."""
4a2fec
+        if not self.stats.fields_filter:
4a2fec
+            self.stats.fields_filter = r'^[^\(]*$'
4a2fec
+
4a2fec
+        elif self.stats.fields_filter == r'^[^\(]*$':
4a2fec
+            self.stats.fields_filter = None
4a2fec
+
4a2fec
+    def update_pid(self, pid):
4a2fec
+        """Propagates pid selection to stats object."""
4a2fec
+        self.stats.pid_filter = pid
4a2fec
+
4a2fec
+    def refresh(self, sleeptime):
4a2fec
+        """Refreshes on-screen data."""
4a2fec
+        self.screen.erase()
4a2fec
+        if self.stats.pid_filter > 0:
4a2fec
+            self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
4a2fec
+                               .format(self.stats.pid_filter),
4a2fec
+                               curses.A_BOLD)
4a2fec
+        else:
4a2fec
+            self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
4a2fec
+        self.screen.addstr(2, 1, 'Event')
4a2fec
+        self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
4a2fec
+                           len('Total'), 'Total')
4a2fec
+        self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
4a2fec
+                           len('Current'), 'Current')
4a2fec
+        row = 3
4a2fec
+        stats = self.stats.get()
4a2fec
+        def sortkey(x):
4a2fec
+            if stats[x][1]:
4a2fec
+                return (-stats[x][1], -stats[x][0])
4a2fec
+            else:
4a2fec
+                return (0, -stats[x][0])
4a2fec
+        for key in sorted(stats.keys(), key=sortkey):
4a2fec
+
4a2fec
+            if row >= self.screen.getmaxyx()[0]:
4a2fec
+                break
4a2fec
+            values = stats[key]
4a2fec
+            if not values[0] and not values[1]:
4a2fec
+                break
4a2fec
+            col = 1
4a2fec
+            self.screen.addstr(row, col, key)
4a2fec
+            col += LABEL_WIDTH
4a2fec
+            self.screen.addstr(row, col, '%10d' % (values[0],))
4a2fec
+            col += NUMBER_WIDTH
4a2fec
+            if values[1] is not None:
4a2fec
+                self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
4a2fec
+            row += 1
4a2fec
+        self.screen.refresh()
4a2fec
+
4a2fec
+    def show_filter_selection(self):
4a2fec
+        """Draws filter selection mask.
4a2fec
+
4a2fec
+        Asks for a valid regex and sets the fields filter accordingly.
4a2fec
+
4a2fec
+        """
4a2fec
+        while True:
4a2fec
+            self.screen.erase()
4a2fec
+            self.screen.addstr(0, 0,
4a2fec
+                               "Show statistics for events matching a regex.",
4a2fec
+                               curses.A_BOLD)
4a2fec
+            self.screen.addstr(2, 0,
4a2fec
+                               "Current regex: {0}"
4a2fec
+                               .format(self.stats.fields_filter))
4a2fec
+            self.screen.addstr(3, 0, "New regex: ")
4a2fec
+            curses.echo()
4a2fec
+            regex = self.screen.getstr()
4a2fec
+            curses.noecho()
4a2fec
+            if len(regex) == 0:
4a2fec
+                return
4a2fec
+            try:
4a2fec
+                re.compile(regex)
4a2fec
+                self.stats.fields_filter = regex
4a2fec
+                return
4a2fec
+            except re.error:
4a2fec
+                continue
4a2fec
+
4a2fec
+    def show_vm_selection(self):
4a2fec
+        """Draws PID selection mask.
4a2fec
+
4a2fec
+        Asks for a pid until a valid pid or 0 has been entered.
4a2fec
+
4a2fec
+        """
4a2fec
+        while True:
4a2fec
+            self.screen.erase()
4a2fec
+            self.screen.addstr(0, 0,
4a2fec
+                               'Show statistics for specific pid.',
4a2fec
+                               curses.A_BOLD)
4a2fec
+            self.screen.addstr(1, 0,
4a2fec
+                               'This might limit the shown data to the trace '
4a2fec
+                               'statistics.')
4a2fec
+
4a2fec
+            curses.echo()
4a2fec
+            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
4a2fec
+            pid = self.screen.getstr()
4a2fec
+            curses.noecho()
4a2fec
+
4a2fec
+            try:
4a2fec
+                pid = int(pid)
4a2fec
+
4a2fec
+                if pid == 0:
4a2fec
+                    self.update_pid(pid)
4a2fec
+                    break
4a2fec
+                else:
4a2fec
+                    if not os.path.isdir(os.path.join('/proc/', str(pid))):
4a2fec
+                        continue
4a2fec
+                    else:
4a2fec
+                        self.update_pid(pid)
4a2fec
+                        break
4a2fec
+
4a2fec
+            except ValueError:
4a2fec
+                continue
4a2fec
+
4a2fec
+    def show_stats(self):
4a2fec
+        """Refreshes the screen and processes user input."""
4a2fec
+        sleeptime = 0.25
4a2fec
+        while True:
4a2fec
+            self.refresh(sleeptime)
4a2fec
+            curses.halfdelay(int(sleeptime * 10))
4a2fec
+            sleeptime = 3
4a2fec
+            try:
4a2fec
+                char = self.screen.getkey()
4a2fec
+                if char == 'x':
4a2fec
+                    self.drilldown = not self.drilldown
4a2fec
+                    self.update_drilldown()
4a2fec
+                if char == 'q':
4a2fec
+                    break
4a2fec
+                if char == 'f':
4a2fec
+                    self.show_filter_selection()
4a2fec
+                if char == 'p':
4a2fec
+                    self.show_vm_selection()
4a2fec
+            except KeyboardInterrupt:
4a2fec
+                break
4a2fec
+            except curses.error:
4a2fec
+                continue
4a2fec
+
4a2fec
+def batch(stats):
4a2fec
+    """Prints statistics in a key, value format."""
4a2fec
+    s = stats.get()
4a2fec
+    time.sleep(1)
4a2fec
+    s = stats.get()
4a2fec
+    for key in sorted(s.keys()):
4a2fec
+        values = s[key]
4a2fec
+        print '%-42s%10d%10d' % (key, values[0], values[1])
4a2fec
+
4a2fec
+def log(stats):
4a2fec
+    """Prints statistics as reiterating key block, multiple value blocks."""
4a2fec
+    keys = sorted(stats.get().iterkeys())
4a2fec
+    def banner():
4a2fec
+        for k in keys:
4a2fec
+            print '%s' % k,
4a2fec
+        print
4a2fec
+    def statline():
4a2fec
+        s = stats.get()
4a2fec
+        for k in keys:
4a2fec
+            print ' %9d' % s[k][1],
4a2fec
+        print
4a2fec
+    line = 0
4a2fec
+    banner_repeat = 20
4a2fec
+    while True:
4a2fec
+        time.sleep(1)
4a2fec
+        if line % banner_repeat == 0:
4a2fec
+            banner()
4a2fec
+        statline()
4a2fec
+        line += 1
4a2fec
+
4a2fec
+def get_options():
4a2fec
+    """Returns processed program arguments."""
4a2fec
+    description_text = """
4a2fec
+This script displays various statistics about VMs running under KVM.
4a2fec
+The statistics are gathered from the KVM debugfs entries and / or the
4a2fec
+currently available perf traces.
4a2fec
+
4a2fec
+The monitoring takes additional cpu cycles and might affect the VM's
4a2fec
+performance.
4a2fec
+
4a2fec
+Requirements:
4a2fec
+- Access to:
4a2fec
+    /sys/kernel/debug/kvm
4a2fec
+    /sys/kernel/debug/trace/events/*
4a2fec
+    /proc/pid/task
4a2fec
+- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
4a2fec
+  CAP_SYS_ADMIN and perf events are used.
4a2fec
+- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
4a2fec
+  the large number of files that are possibly opened.
4a2fec
+"""
4a2fec
+
4a2fec
+    class PlainHelpFormatter(optparse.IndentedHelpFormatter):
4a2fec
+        def format_description(self, description):
4a2fec
+            if description:
4a2fec
+                return description + "\n"
4a2fec
+            else:
4a2fec
+                return ""
4a2fec
+
4a2fec
+    optparser = optparse.OptionParser(description=description_text,
4a2fec
+                                      formatter=PlainHelpFormatter())
4a2fec
+    optparser.add_option('-1', '--once', '--batch',
4a2fec
+                         action='store_true',
4a2fec
+                         default=False,
4a2fec
+                         dest='once',
4a2fec
+                         help='run in batch mode for one second',
4a2fec
+                         )
4a2fec
+    optparser.add_option('-l', '--log',
4a2fec
+                         action='store_true',
4a2fec
+                         default=False,
4a2fec
+                         dest='log',
4a2fec
+                         help='run in logging mode (like vmstat)',
4a2fec
+                         )
4a2fec
+    optparser.add_option('-t', '--tracepoints',
4a2fec
+                         action='store_true',
4a2fec
+                         default=False,
4a2fec
+                         dest='tracepoints',
4a2fec
+                         help='retrieve statistics from tracepoints',
4a2fec
+                         )
4a2fec
+    optparser.add_option('-d', '--debugfs',
4a2fec
+                         action='store_true',
4a2fec
+                         default=False,
4a2fec
+                         dest='debugfs',
4a2fec
+                         help='retrieve statistics from debugfs',
4a2fec
+                         )
4a2fec
+    optparser.add_option('-f', '--fields',
4a2fec
+                         action='store',
4a2fec
+                         default=None,
4a2fec
+                         dest='fields',
4a2fec
+                         help='fields to display (regex)',
4a2fec
+                         )
4a2fec
+    optparser.add_option('-p', '--pid',
4a2fec
+                        action='store',
4a2fec
+                        default=0,
4a2fec
+                        type=int,
4a2fec
+                        dest='pid',
4a2fec
+                        help='restrict statistics to pid',
4a2fec
+                        )
4a2fec
+    (options, _) = optparser.parse_args(sys.argv)
4a2fec
+    return options
4a2fec
+
4a2fec
+def get_providers(options):
4a2fec
+    """Returns a list of data providers depending on the passed options."""
4a2fec
+    providers = []
4a2fec
+
4a2fec
+    if options.tracepoints:
4a2fec
+        providers.append(TracepointProvider())
4a2fec
+    if options.debugfs:
4a2fec
+        providers.append(DebugfsProvider())
4a2fec
+    if len(providers) == 0:
4a2fec
+        providers.append(TracepointProvider())
4a2fec
+
4a2fec
+    return providers
4a2fec
+
4a2fec
+def check_access(options):
4a2fec
+    """Exits if the current user can't access all needed directories."""
4a2fec
+    if not os.path.exists('/sys/kernel/debug'):
4a2fec
+        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
4a2fec
+        sys.exit(1)
4a2fec
+
4a2fec
+    if not os.path.exists(PATH_DEBUGFS_KVM):
4a2fec
+        sys.stderr.write("Please make sure, that debugfs is mounted and "
4a2fec
+                         "readable by the current user:\n"
4a2fec
+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
4a2fec
+                         "Also ensure, that the kvm modules are loaded.\n")
4a2fec
+        sys.exit(1)
4a2fec
+
4a2fec
+    if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
4a2fec
+                                                     or not options.debugfs):
4a2fec
+        sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
4a2fec
+                         "when using the option -t (default).\n"
4a2fec
+                         "If it is enabled, make {0} readable by the "
4a2fec
+                         "current user.\n"
4a2fec
+                         .format(PATH_DEBUGFS_TRACING))
4a2fec
+        if options.tracepoints:
4a2fec
+            sys.exit(1)
4a2fec
+
4a2fec
+        sys.stderr.write("Falling back to debugfs statistics!\n")
4a2fec
+        options.debugfs = True
4a2fec
+        sleep(5)
4a2fec
+
4a2fec
+    return options
4a2fec
+
4a2fec
+def main():
4a2fec
+    options = get_options()
4a2fec
+    options = check_access(options)
4a2fec
+
4a2fec
+    if (options.pid > 0 and
4a2fec
+        not os.path.isdir(os.path.join('/proc/',
4a2fec
+                                       str(options.pid)))):
4a2fec
+        sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
4a2fec
+        sys.exit('Specified pid does not exist.')
4a2fec
+
4a2fec
+    providers = get_providers(options)
4a2fec
+    stats = Stats(providers, options.pid, fields=options.fields)
4a2fec
+
4a2fec
+    if options.log:
4a2fec
+        log(stats)
4a2fec
+    elif not options.once:
4a2fec
+        with Tui(stats) as tui:
4a2fec
+            tui.show_stats()
4a2fec
+    else:
4a2fec
+        batch(stats)
4a2fec
+
4a2fec
+if __name__ == "__main__":
4a2fec
+    main()
4a2fec
diff --git a/scripts/kvm/kvm_stat.texi b/scripts/kvm/kvm_stat.texi
4a2fec
new file mode 100644
4a2fec
index 0000000..4faf1a6
4a2fec
--- /dev/null
4a2fec
+++ b/scripts/kvm/kvm_stat.texi
4a2fec
@@ -0,0 +1,55 @@
4a2fec
+@example
4a2fec
+@c man begin SYNOPSIS
4a2fec
+usage: kvm_stat [OPTION]...
4a2fec
+@c man end
4a2fec
+@end example
4a2fec
+
4a2fec
+@c man begin DESCRIPTION
4a2fec
+
4a2fec
+kvm_stat prints counts of KVM kernel module trace events.  These events signify
4a2fec
+state transitions such as guest mode entry and exit.
4a2fec
+
4a2fec
+This tool is useful for observing guest behavior from the host perspective.
4a2fec
+Often conclusions about performance or buggy behavior can be drawn from the
4a2fec
+output.
4a2fec
+
4a2fec
+The set of KVM kernel module trace events may be specific to the kernel version
4a2fec
+or architecture.  It is best to check the KVM kernel module source code for the
4a2fec
+meaning of events.
4a2fec
+
4a2fec
+@c man end
4a2fec
+
4a2fec
+@c man begin OPTIONS
4a2fec
+@table @option
4a2fec
+@item -1, --once, --batch
4a2fec
+  run in batch mode for one second
4a2fec
+@item -l, --log
4a2fec
+  run in logging mode (like vmstat)
4a2fec
+@item -t, --tracepoints
4a2fec
+  retrieve statistics from tracepoints
4a2fec
+@item -d, --debugfs
4a2fec
+  retrieve statistics from debugfs
4a2fec
+@item -p, --pid=@var{pid}
4a2fec
+  limit statistics to one virtual machine (pid)
4a2fec
+@item -f, --fields=@var{fields}
4a2fec
+  fields to display (regex)
4a2fec
+@item -h, --help
4a2fec
+  show help message
4a2fec
+@end table
4a2fec
+
4a2fec
+@c man end
4a2fec
+
4a2fec
+@ignore
4a2fec
+
4a2fec
+@setfilename kvm_stat
4a2fec
+@settitle Report KVM kernel module event counters.
4a2fec
+
4a2fec
+@c man begin AUTHOR
4a2fec
+Stefan Hajnoczi <stefanha@redhat.com>
4a2fec
+@c man end
4a2fec
+
4a2fec
+@c man begin SEEALSO
4a2fec
+perf(1), trace-cmd(1)
4a2fec
+@c man end
4a2fec
+
4a2fec
+@end ignore
4a2fec
-- 
4a2fec
1.8.3.1
4a2fec