diff --git a/.gitignore b/.gitignore index 4c91121..5d96e3a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/man-pages-overrides-7.5.2.tar.xz +SOURCES/man-pages-overrides-7.6.2.tar.xz diff --git a/.man-pages-overrides.metadata b/.man-pages-overrides.metadata index a02778a..c0a9250 100644 --- a/.man-pages-overrides.metadata +++ b/.man-pages-overrides.metadata @@ -1 +1 @@ -fbf8271558624890e28b44c7812bc97944c7c4c9 SOURCES/man-pages-overrides-7.5.2.tar.xz +94ed0b85b36e1f37784f3f1dc8ddb2e51facdeb6 SOURCES/man-pages-overrides-7.6.2.tar.xz diff --git a/SOURCES/1021967-mpo-7.1.0-socat.1.patch b/SOURCES/1021967-mpo-7.1.0-socat.1.patch deleted file mode 100644 index a51620a..0000000 --- a/SOURCES/1021967-mpo-7.1.0-socat.1.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 201baa7c77eeac03b0e98064aff6d371140cf394 Mon Sep 17 00:00:00 2001 -From: Jan Chaloupka -Date: Mon, 29 Sep 2014 19:49:55 +0200 -Subject: [PATCH] socat.1 bold too far - ---- - socat/man1/socat.1 | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/socat/man1/socat.1 b/socat/man1/socat.1 -index a07563e..b59fe36 100644 ---- a/socat/man1/socat.1 -+++ b/socat/man1/socat.1 -@@ -1969,7 +1969,7 @@ address\&. - Changes the (owner) and groups of the process after - processing the address (example)\&. This call might require root privilege\&. - .IP "\fB\f(CWsu\-d=\fP\fP" --Short name for \fB\f(CWsubstuser\-delayed\fP\fP\&. -+Short name for \fBsubstuser\-delayed\fP. - Changes the - (owner) and groups of the process after processing the address (example)\&. - The user and his groups are retrieved \fIbefore\fP a possible --- -1.9.3 - diff --git a/SOURCES/1439724-mpo-7.5.0-vsftpd.conf.5.patch b/SOURCES/1439724-mpo-7.5.0-vsftpd.conf.5.patch deleted file mode 100644 index 350c1f3..0000000 --- a/SOURCES/1439724-mpo-7.5.0-vsftpd.conf.5.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 8ea4d5cf9f5463765d7fdc0cbb9717f773ce848b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Nikola=20Forr=C3=B3?= -Date: Wed, 1 Nov 2017 13:31:47 +0100 -Subject: [PATCH 2/4] vsftpd.conf.5: mention conflict between chroot_local_user - and text_userdb_names options - ---- - vsftpd/man5/vsftpd.conf.5 | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/vsftpd/man5/vsftpd.conf.5 b/vsftpd/man5/vsftpd.conf.5 -index 6f36b1b..c7642ce 100644 ---- a/vsftpd/man5/vsftpd.conf.5 -+++ b/vsftpd/man5/vsftpd.conf.5 -@@ -578,6 +578,9 @@ Default: NO - By default, numeric IDs are shown in the user and group fields of directory - listings. You can get textual names by enabling this parameter. It is off - by default for performance reasons. -+Note that textual names are not guaranteed when -+.BR chroot_local_user -+is set to YES. - - Default: NO - .TP --- -2.13.6 - diff --git a/SOURCES/1495572-mpo-7.6.0-mount.cifs.8.patch b/SOURCES/1495572-mpo-7.6.0-mount.cifs.8.patch new file mode 100644 index 0000000..1b44970 --- /dev/null +++ b/SOURCES/1495572-mpo-7.6.0-mount.cifs.8.patch @@ -0,0 +1,28 @@ +From 785c2a1e47ce8fa251f9355ca760412b22784d28 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Wed, 20 Jun 2018 11:11:09 +0200 +Subject: [PATCH] mount.cifs.8: describe seal option +--- + cifs-utils/man8/mount.cifs.8 | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/cifs-utils/man8/mount.cifs.8 b/cifs-utils/man8/mount.cifs.8 +index 01579f6..2a751ab 100644 +--- a/cifs-utils/man8/mount.cifs.8 ++++ b/cifs-utils/man8/mount.cifs.8 +@@ -440,6 +440,12 @@ The default in mainline kernel versions prior to v3.8 was sec=ntlm. In v3.8, the + If the server requires signing during protocol negotiation, then it may be enabled automatically. Packet signing may also be enabled automatically if it's enabled in /proc/fs/cifs/SecurityFlags. + .RE + .PP ++seal ++.RS 4 ++Request encryption at the SMB layer. Encryption is only supported in ++SMBv3 and above. The encryption algorithm used is AES\-128\-CCM. ++.RE ++.PP + nobrl + .RS 4 + Do not send byte range lock requests to the server\&. This is necessary for certain applications that break with cifs style mandatory byte range locks (and most cifs servers do not yet support requesting advisory byte range locks)\&. +-- +2.17.1 + diff --git a/SOURCES/1507143-mpo-7.5.0-vsftpd.conf.5.patch b/SOURCES/1507143-mpo-7.5.0-vsftpd.conf.5.patch deleted file mode 100644 index 3aa836a..0000000 --- a/SOURCES/1507143-mpo-7.5.0-vsftpd.conf.5.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 97f21ce933af151c0951d79f73233177d9a6f202 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Nikola=20Forr=C3=B3?= -Date: Wed, 1 Nov 2017 16:35:56 +0100 -Subject: [PATCH 3/4] vsftpd.conf.5: document allow_writeable_chroot option - ---- - vsftpd/man5/vsftpd.conf.5 | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/vsftpd/man5/vsftpd.conf.5 b/vsftpd/man5/vsftpd.conf.5 -index c7642ce..f50c9c7 100644 ---- a/vsftpd/man5/vsftpd.conf.5 -+++ b/vsftpd/man5/vsftpd.conf.5 -@@ -58,6 +58,15 @@ connections. - - Default: NO - .TP -+.B allow_writeable_chroot -+Allow chroot()'ing a user to a directory writable by that user. Note that -+setting this to YES is potentially dangerous. For example, if the user -+creates an 'etc' directory in the new root directory, they could potentially -+trick the C library into loading a user-created configuration file from the -+/etc/ directory. -+ -+Default: NO -+.TP - .B anon_mkdir_write_enable - If set to YES, anonymous users will be permitted to create new directories - under certain conditions. For this to work, the option --- -2.13.6 - diff --git a/SOURCES/1517216-mpo-7.5.0-vsftpd.conf.5.patch b/SOURCES/1517216-mpo-7.5.0-vsftpd.conf.5.patch deleted file mode 100644 index f3f0b03..0000000 --- a/SOURCES/1517216-mpo-7.5.0-vsftpd.conf.5.patch +++ /dev/null @@ -1,49 +0,0 @@ -From ad678bf89f8c1174058bb6d16e5a458e9c290bc0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Nikola=20Forr=C3=B3?= -Date: Tue, 28 Nov 2017 11:44:40 +0100 -Subject: [PATCH] vsftpd.conf.5: extend description of ascii_download_enable - and ascii_upload_enable - ---- - vsftpd/man5/vsftpd.conf.5 | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) - -diff --git a/vsftpd/man5/vsftpd.conf.5 b/vsftpd/man5/vsftpd.conf.5 -index f50c9c7..6aef05e 100644 ---- a/vsftpd/man5/vsftpd.conf.5 -+++ b/vsftpd/man5/vsftpd.conf.5 -@@ -113,11 +113,31 @@ Default: YES - .TP - .B ascii_download_enable - When enabled, ASCII mode data transfers will be honoured on downloads. -+When disabled, the server will pretend to allow ASCII mode but in fact -+ignore requests to activate it. So the client will think the ASCII mode -+is active and therefore may still translate any -+.BR -+character sequences in the received file. See the following article for -+a detailed explanation of the behaviour: -+https://access.redhat.com/articles/3250241. -+ -+Turn this option on to have the server actually do -+ASCII mangling on files when in ASCII mode. - - Default: NO - .TP - .B ascii_upload_enable - When enabled, ASCII mode data transfers will be honoured on uploads. -+When disabled, the server will pretend to allow ASCII mode but in fact -+ignore requests to activate it. So the client will think the ASCII mode -+is active and will translate native line terminators to the standard -+.BR -+line terminators for transmission, but the server will not do -+any translation. See the following article for a detailed explanation -+of the behaviour: https://access.redhat.com/articles/3250241. -+ -+Turn this option on to have the server actually do -+ASCII mangling on files when in ASCII mode. - - Default: NO - .TP --- -2.13.6 - diff --git a/SOURCES/1538499-mpo-7.6.0-iproute.8.patch b/SOURCES/1538499-mpo-7.6.0-iproute.8.patch new file mode 100644 index 0000000..89c2a5e --- /dev/null +++ b/SOURCES/1538499-mpo-7.6.0-iproute.8.patch @@ -0,0 +1,24 @@ +From 38ecc86a0dfc594b47c5aaec9e7b67be082c6094 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Thu, 21 Jun 2018 11:02:31 +0200 +Subject: [PATCH 1/2] ip-route.8: fix synopsis of ssthresh parameter +--- + iproute/man8/ip-route.8 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/iproute/man8/ip-route.8 b/iproute/man8/ip-route.8 +index 346fbd0..e85c4e4 100644 +--- a/iproute/man8/ip-route.8 ++++ b/iproute/man8/ip-route.8 +@@ -120,7 +120,7 @@ replace " } " + .B cwnd + .IR NUMBER " ] [ " + .B ssthresh +-.IR REALM " ] [ " ++.IR NUMBER " ] [ " + .B realms + .IR REALM " ] [ " + .B rto_min +-- +2.17.1 + diff --git a/SOURCES/1543420-mpo-7.6.0-tc-vlan.8.patch b/SOURCES/1543420-mpo-7.6.0-tc-vlan.8.patch new file mode 100644 index 0000000..3e0e1ab --- /dev/null +++ b/SOURCES/1543420-mpo-7.6.0-tc-vlan.8.patch @@ -0,0 +1,24 @@ +From 3d73f0746dedba90ef58eece2d1e720bc5019eee Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Thu, 21 Jun 2018 11:06:45 +0200 +Subject: [PATCH 2/2] tc-vlan.8: fix incorrect example +--- + iproute/man8/tc-vlan.8 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/iproute/man8/tc-vlan.8 b/iproute/man8/tc-vlan.8 +index af3de1c..a526f66 100644 +--- a/iproute/man8/tc-vlan.8 ++++ b/iproute/man8/tc-vlan.8 +@@ -103,7 +103,7 @@ into VLAN ID 123: + #tc qdisc add dev eth0 handle ffff: ingress + #tc filter add dev eth0 parent ffff: pref 11 protocol ip \\ + u32 match ip protocol 1 0xff flowid 1:1 \\ +- u32 match ip src 10.0.0.2 flowid 1:1 \\ ++ match ip src 10.0.0.2 flowid 1:1 \\ + action vlan push id 123 + .EE + .RE +-- +2.17.1 + diff --git a/SOURCES/1560191-mpo-7.6.0-madvise.2.patch b/SOURCES/1560191-mpo-7.6.0-madvise.2.patch new file mode 100644 index 0000000..4002832 --- /dev/null +++ b/SOURCES/1560191-mpo-7.6.0-madvise.2.patch @@ -0,0 +1,126 @@ +From 1bcf4b3aaf39cedd42bb5b34a81c6044b8a02d5a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Tue, 19 Jun 2018 13:13:18 +0200 +Subject: [PATCH] madvise.2: document MADV_WIPEONFORK and MADV_KEEPONFORK +--- + man-pages/en/man2/madvise.2 | 39 +++++++++++++++++++++++++++++++++++++ + man-pages/man2/madvise.2 | 39 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 78 insertions(+) + +diff --git a/man-pages/en/man2/madvise.2 b/man-pages/en/man2/madvise.2 +index 4e693da..8a1c46d 100644 +--- a/man-pages/en/man2/madvise.2 ++++ b/man-pages/en/man2/madvise.2 +@@ -265,6 +265,33 @@ file (see + .BR MADV_DODUMP " (since Linux 3.4)" + Undo the effect of an earlier + .BR MADV_DONTDUMP . ++.TP ++.BR MADV_WIPEONFORK " (since Linux 4.14)" ++.\" commit d2cd9ede6e193dd7d88b6d27399e96229a551b19 ++Present the child process with zero-filled memory in this range after a ++.BR fork (2). ++This is useful in forking servers in order to ensure ++that sensitive per-process data ++(for example, PRNG seeds, cryptographic secrets, and so on) ++is not handed to child processes. ++.IP ++The ++.B MADV_WIPEONFORK ++operation can be applied only to private anonymous pages (see ++.BR mmap (2)). ++.IP ++Within the child created by ++.BR fork (2), ++the ++.B MADV_WIPEONFORK ++setting remains in place on the specified address range. ++This setting is cleared during ++.BR execve (2). ++.TP ++.BR MADV_KEEPONFORK " (since Linux 4.14)" ++.\" commit d2cd9ede6e193dd7d88b6d27399e96229a551b19 ++Undo the effect of an earlier ++.BR MADV_WIPEONFORK . + .SH RETURN VALUE + On success + .BR madvise () +@@ -308,6 +335,18 @@ but the kernel was not configured with + .BR CONFIG_KSM . + .RE + .TP ++.B EINVAL ++.I advice ++is ++.BR MADV_FREE ++or ++.BR MADV_WIPEONFORK ++but the specified address range includes file, Huge TLB, ++.BR MAP_SHARED , ++or ++.BR VM_PFNMAP ++ranges. ++.TP + .B EIO + (for + .BR MADV_WILLNEED ) +diff --git a/man-pages/man2/madvise.2 b/man-pages/man2/madvise.2 +index 4e693da..8a1c46d 100644 +--- a/man-pages/man2/madvise.2 ++++ b/man-pages/man2/madvise.2 +@@ -265,6 +265,33 @@ file (see + .BR MADV_DODUMP " (since Linux 3.4)" + Undo the effect of an earlier + .BR MADV_DONTDUMP . ++.TP ++.BR MADV_WIPEONFORK " (since Linux 4.14)" ++.\" commit d2cd9ede6e193dd7d88b6d27399e96229a551b19 ++Present the child process with zero-filled memory in this range after a ++.BR fork (2). ++This is useful in forking servers in order to ensure ++that sensitive per-process data ++(for example, PRNG seeds, cryptographic secrets, and so on) ++is not handed to child processes. ++.IP ++The ++.B MADV_WIPEONFORK ++operation can be applied only to private anonymous pages (see ++.BR mmap (2)). ++.IP ++Within the child created by ++.BR fork (2), ++the ++.B MADV_WIPEONFORK ++setting remains in place on the specified address range. ++This setting is cleared during ++.BR execve (2). ++.TP ++.BR MADV_KEEPONFORK " (since Linux 4.14)" ++.\" commit d2cd9ede6e193dd7d88b6d27399e96229a551b19 ++Undo the effect of an earlier ++.BR MADV_WIPEONFORK . + .SH RETURN VALUE + On success + .BR madvise () +@@ -308,6 +335,18 @@ but the kernel was not configured with + .BR CONFIG_KSM . + .RE + .TP ++.B EINVAL ++.I advice ++is ++.BR MADV_FREE ++or ++.BR MADV_WIPEONFORK ++but the specified address range includes file, Huge TLB, ++.BR MAP_SHARED , ++or ++.BR VM_PFNMAP ++ranges. ++.TP + .B EIO + (for + .BR MADV_WILLNEED ) +-- +2.17.1 + diff --git a/SOURCES/1573281-mpo-7.6.0-mlock.2-mlock2.2.patch b/SOURCES/1573281-mpo-7.6.0-mlock.2-mlock2.2.patch new file mode 100644 index 0000000..c3c623b --- /dev/null +++ b/SOURCES/1573281-mpo-7.6.0-mlock.2-mlock2.2.patch @@ -0,0 +1,280 @@ +From 88a7af51d8ef4d9199ff9073bc09eabb5182fe80 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Tue, 19 Jun 2018 13:14:13 +0200 +Subject: [PATCH 2/3] mlock.2, mlock2.2: document mlock2(2) and MCL_ONFAULT +--- + man-pages/man2/mlock.2 | 152 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 136 insertions(+), 16 deletions(-) + +diff --git a/man-pages/man2/mlock.2 b/man-pages/man2/mlock.2 +index 27189d8..595c9e9 100644 +--- a/man-pages/man2/mlock.2 ++++ b/man-pages/man2/mlock.2 +@@ -23,35 +23,39 @@ + .\" . + .\" %%%LICENSE_END + .\" +-.TH MLOCK 2 2011-09-14 "Linux" "Linux Programmer's Manual" ++.TH MLOCK 2 2015-08-28 "Linux" "Linux Programmer's Manual" + .SH NAME +-mlock, munlock, mlockall, munlockall \- lock and unlock memory ++mlock, mlock2, munlock, mlockall, munlockall \- lock and unlock memory + .SH SYNOPSIS + .nf + .B #include + .sp + .BI "int mlock(const void *" addr ", size_t " len ); ++.BI "int mlock2(const void *" addr ", size_t " len ", int " flags ); + .BI "int munlock(const void *" addr ", size_t " len ); + .sp + .BI "int mlockall(int " flags ); + .B int munlockall(void); + .fi + .SH DESCRIPTION +-.BR mlock () ++.BR mlock (), ++.BR mlock2 (), + and + .BR mlockall () +-respectively lock part or all of the calling process's virtual address ++lock part or all of the calling process's virtual address + space into RAM, preventing that memory from being paged to the + swap area. ++ + .BR munlock () + and + .BR munlockall () + perform the converse operation, +-respectively unlocking part or all of the calling process's virtual ++unlocking part or all of the calling process's virtual + address space, so that pages in the specified virtual address range may + once more to be swapped out if required by the kernel memory manager. ++ + Memory locking and unlocking are performed in units of whole pages. +-.SS mlock() and munlock() ++.SS mlock(), mlock2(), and munlock() + .BR mlock () + locks pages in the address range starting at + .I addr +@@ -62,6 +66,41 @@ All pages that contain a part of the specified address range are + guaranteed to be resident in RAM when the call returns successfully; + the pages are guaranteed to stay in RAM until later unlocked. + ++.BR mlock2 () ++.\" commit a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e ++.\" commit de60f5f10c58d4f34b68622442c0e04180367f3f ++.\" commit b0f205c2a3082dd9081f9a94e50658c5fa906ff1 ++also locks pages in the specified range starting at ++.I addr ++and continuing for ++.I len ++bytes. ++However, the state of the pages contained in that range after the call ++returns successfully will depend on the value in the ++.I flags ++argument. ++ ++The ++.I flags ++argument can be either 0 or the following constant: ++.TP ++.B MLOCK_ONFAULT ++Lock pages that are currently resident and mark the entire range to have ++pages locked when they are populated by the page fault. ++.PP ++ ++If ++.I flags ++is 0, ++.BR mlock2 () ++behaves exactly the same as ++.BR mlock (). ++ ++Note: currently, there is not a glibc wrapper for ++.BR mlock2 (), ++so it will need to be invoked using ++.BR syscall (2). ++ + .BR munlock () + unlocks pages in the address range starting at + .I addr +@@ -93,9 +132,36 @@ the process. + .B MCL_FUTURE + Lock all pages which will become mapped into the address space of the + process in the future. +-These could be for instance new pages required +-by a growing heap and stack as well as new memory mapped files or ++These could be, for instance, new pages required ++by a growing heap and stack as well as new memory-mapped files or + shared memory regions. ++.TP ++.BR MCL_ONFAULT " (since Linux 4.4)" ++Used together with ++.BR MCL_CURRENT , ++.BR MCL_FUTURE , ++or both. ++Mark all current (with ++.BR MCL_CURRENT ) ++or future (with ++.BR MCL_FUTURE ) ++mappings to lock pages when they are faulted in. ++When used with ++.BR MCL_CURRENT , ++all present pages are locked, but ++.BR mlockall () ++will not fault in non-present pages. ++When used with ++.BR MCL_FUTURE , ++all future mappings will be marked to lock pages when they are faulted ++in, but they will not be populated by the lock when the mapping is ++created. ++.B MCL_ONFAULT ++must be used with either ++.B MCL_CURRENT ++or ++.B MCL_FUTURE ++or both. + .PP + If + .B MCL_FUTURE +@@ -148,7 +214,8 @@ to perform the requested operation. + .\"SVr4 documents an additional EAGAIN error code. + .LP + For +-.BR mlock () ++.BR mlock (), ++.BR mlock2 (), + and + .BR munlock (): + .TP +@@ -157,9 +224,9 @@ Some or all of the specified address range could not be locked. + .TP + .B EINVAL + The result of the addition +-.IR start + len ++.IR addr + len + was less than +-.IR start ++.IR addr + (e.g., the addition may have resulted in an overflow). + .TP + .B EINVAL +@@ -172,19 +239,36 @@ Some of the specified address range does not correspond to mapped + pages in the address space of the process. + .LP + For +-.BR mlockall (): ++.BR mlock2 (): + .TP + .B EINVAL + Unknown \fIflags\fP were specified. + .LP + For ++.BR mlockall (): ++.TP ++.B EINVAL ++Unknown \fIflags\fP were specified or ++.B MCL_ONFAULT ++was specified without either ++.B MCL_FUTURE ++or ++.BR MCL_CURRENT . ++.LP ++For + .BR munlockall (): + .TP + .B EPERM + (Linux 2.6.8 and earlier) The caller was not privileged + .RB ( CAP_IPC_LOCK ). ++.SH VERSIONS ++.BR mlock2 (2) ++is available since Linux 4.4. + .SH CONFORMING TO +-POSIX.1-2001, SVr4. ++POSIX.1-2001, POSIX.1-2008, SVr4. ++ ++mlock2 () ++is Linux specific. + .SH AVAILABILITY + On POSIX systems on which + .BR mlock () +@@ -247,6 +331,15 @@ Memory locks are not inherited by a child created via + and are automatically removed (unlocked) during an + .BR execve (2) + or when the process terminates. ++The ++.BR mlockall () ++.B MCL_FUTURE ++and ++.B MCL_FUTURE | MCL_ONFAULT ++settings are not inherited by a child created via ++.BR fork (2) ++and are cleared during an ++.BR execve (2). + + The memory lock on an address range is automatically removed + if the address range is unmapped via +@@ -254,7 +347,8 @@ if the address range is unmapped via + + Memory locks do not stack, that is, pages which have been locked several times + by calls to +-.BR mlock () ++.BR mlock (), ++.BR mlock2 (), + or + .BR mlockall () + will be unlocked by a single call to +@@ -264,15 +358,40 @@ for the corresponding range or by + Pages which are mapped to several locations or by several processes stay + locked into RAM as long as they are locked at least at one location or by + at least one process. ++ ++If a call to ++.BR mlockall () ++which uses the ++.B MCL_FUTURE ++flag is followed by another call that does not specify this flag, the ++changes made by the ++.B MCL_FUTURE ++call will be lost. ++ ++The ++.BR mlock2 () ++.B MLOCK_ONFAULT ++flag and the ++.BR mlockall () ++.B MCL_ONFAULT ++flag allow efficient memory locking for applications that deal with ++large mappings where only a (small) portion of pages in the mapping are touched. ++In such cases, locking all of the pages in a mapping would incur ++a significant penalty for memory locking. + .SS Linux notes + Under Linux, +-.BR mlock () ++.BR mlock (), ++.BR mlock2 (), + and + .BR munlock () + automatically round + .I addr + down to the nearest page boundary. +-However, POSIX.1-2001 allows an implementation to require that ++However, the POSIX.1 specification of ++.BR mlock () ++and ++.BR munlock () ++allows an implementation to require that + .I addr + is page aligned, so portable applications should ensure this. + +@@ -284,6 +403,7 @@ file shows how many kilobytes of memory the process with ID + .I PID + has locked using + .BR mlock (), ++.BR mlock2 (), + .BR mlockall (), + and + .BR mmap (2) +-- +2.17.1 + diff --git a/SOURCES/1577265-mpo-7.6.0-host.conf.5.patch b/SOURCES/1577265-mpo-7.6.0-host.conf.5.patch new file mode 100644 index 0000000..12ba02b --- /dev/null +++ b/SOURCES/1577265-mpo-7.6.0-host.conf.5.patch @@ -0,0 +1,107 @@ +From 9ad552df64b1de224e452d7d788f3b3473dbf945 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Tue, 19 Jun 2018 14:00:17 +0200 +Subject: [PATCH] host.conf.5: remove description of never-implemented spoof + options +--- + man-pages/man5/host.conf.5 | 63 +------------------------------------- + 1 file changed, 1 insertion(+), 62 deletions(-) + +diff --git a/man-pages/man5/host.conf.5 b/man-pages/man5/host.conf.5 +index c85fefe..3f24518 100644 +--- a/man-pages/man5/host.conf.5 ++++ b/man-pages/man5/host.conf.5 +@@ -33,7 +33,7 @@ contains configuration information specific to the resolver library. + It should contain one configuration keyword per line, followed by + appropriate configuration information. + The keywords recognized are +-.IR trim ", " multi ", " nospoof ", " spoof ", and " reorder . ++.IR trim ", " multi ", and " reorder . + These keywords are described below. + .TP + .I trim +@@ -68,52 +68,6 @@ This is + by default, as it may cause a substantial performance loss at sites + with large hosts files. + .TP +-.I nospoof +-Valid values are +-.IR on " and " off . +-If set to +-.IR on , +-the resolv+ library will attempt to prevent hostname spoofing to +-enhance the security of +-.BR rlogin " and " rsh . +-It works as follows: after performing a host address lookup, resolv+ +-will perform a hostname lookup for that address. +-If the two hostnames +-do not match, the query will fail. +-The default value is +-.IR off . +-.TP +-.I spoofalert +-Valid values are +-.IR on " and " off . +-If this option is set to +-.I on +-and the +-.I nospoof +-option is also set, resolv+ will log a warning of the error via the +-syslog facility. +-The default value is +-.IR off . +-.TP +-.I spoof +-Valid values are +-.IR off ", " nowarn " and " warn . +-If this option is set to +-.IR off , +-spoofed addresses are permitted and no warnings will be emitted +-via the syslog facility. +-If this option is set to +-.IR warn , +-resolv+ will attempt to prevent hostname spoofing to +-enhance the security and log a warning of the error via the syslog +-facility. +-If this option is set to +-.IR nowarn , +-the resolv+ library will attempt to prevent hostname spoofing to +-enhance the security but not emit warnings via the syslog facility. +-Setting this option to anything else is equal to setting it to +-.IR nowarn . +-.TP + .I reorder + Valid values are + .IR on " and " off . +@@ -135,15 +89,6 @@ override the behavior which is configured in + If set this variable points to a file that should be read instead of + .IR /etc/host.conf . + .TP +-.B RESOLV_SPOOF_CHECK +-Overrides the +-.IR nospoof ", " spoofalert " and " spoof +-commands in the same way as the +-.I spoof +-command is parsed. +-Valid values are +-.IR off ", " nowarn " and " warn . +-.TP + .B RESOLV_MULTI + Overrides the + .I multi +@@ -178,12 +123,6 @@ Resolver configuration file + Local hosts database + .SH NOTES + The following differences exist compared to the original implementation. +-A new command +-.I spoof +-and a new environment variable +-.B RESOLV_SPOOF_CHECK +-can take arguments like +-.IR off ", " nowarn " and " warn . + Line comments can appear anywhere and not only at the beginning of a line. + .SH SEE ALSO + .BR gethostbyname (3), +-- +2.17.1 + diff --git a/SOURCES/1598288-mpo-7.6.2-resolv.conf.5.patch b/SOURCES/1598288-mpo-7.6.2-resolv.conf.5.patch new file mode 100644 index 0000000..c15daea --- /dev/null +++ b/SOURCES/1598288-mpo-7.6.2-resolv.conf.5.patch @@ -0,0 +1,31 @@ +From bab019a04a4d1a4039a504ddd7f7167b326d548c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Wed, 22 Aug 2018 12:35:19 +0200 +Subject: [PATCH 1/2] resolv.conf.5: document no-reload (RES_NORELOAD) option + +--- + man-pages/man5/resolv.conf.5 | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/man-pages/man5/resolv.conf.5 b/man-pages/man5/resolv.conf.5 +index 2495dde..8dc55c0 100644 +--- a/man-pages/man5/resolv.conf.5 ++++ b/man-pages/man5/resolv.conf.5 +@@ -269,6 +269,14 @@ as if it were a top level domain (TLD). + This option can cause problems if the site has ``localhost'' as a TLD + rather than having localhost on one or more elements of the search list. + This option has no effect if neither RES_DEFNAMES or RES_DNSRCH is set. ++.\" aef16cc8a4c670036d45590877d411a97f01e0cd ++.TP ++.BR no\-reload " (since glibc 2.26)" ++sets ++.BR RES_NORELOAD ++in ++.IR _res.options . ++This option disables automatic reloading of a changed configuration file. + .RE + .LP + The \fIdomain\fP and \fIsearch\fP keywords are mutually exclusive. +-- +2.17.1 + diff --git a/SOURCES/1607318-mpo-7.6.2-proc.5.patch b/SOURCES/1607318-mpo-7.6.2-proc.5.patch new file mode 100644 index 0000000..928e500 --- /dev/null +++ b/SOURCES/1607318-mpo-7.6.2-proc.5.patch @@ -0,0 +1,301 @@ +From cfd0231bfc49d6f040f9ec90243ae35e1af216d0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Wed, 22 Aug 2018 12:35:44 +0200 +Subject: [PATCH 2/2] proc.5: document missing fields in /proc/[pid]/smaps and + /proc/[pid]/status + +--- + man-pages/man5/proc.5 | 198 +++++++++++++++++++++++++++++++++++------- + 1 file changed, 168 insertions(+), 30 deletions(-) + +diff --git a/man-pages/man5/proc.5 b/man-pages/man5/proc.5 +index e4d2fd7..fa8505d 100644 +--- a/man-pages/man5/proc.5 ++++ b/man-pages/man5/proc.5 +@@ -961,28 +961,93 @@ For each of mappings there is a series of lines such as the following: + .in +4n + .nf + +-08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash +-Size: 464 kB +-Rss: 424 kB +-Shared_Clean: 424 kB +-Shared_Dirty: 0 kB +-Private_Clean: 0 kB +-Private_Dirty: 0 kB ++00400000\-0048a000 r\-xp 00000000 fd:03 960637 /bin/bash ++Size: 552 kB ++Rss: 460 kB ++Pss: 100 kB ++Shared_Clean: 452 kB ++Shared_Dirty: 0 kB ++Private_Clean: 8 kB ++Private_Dirty: 0 kB ++Referenced: 460 kB ++Anonymous: 0 kB ++AnonHugePages: 0 kB ++Swap: 0 kB ++KernelPageSize: 4 kB ++MMUPageSize: 4 kB ++KernelPageSize: 4 kB ++MMUPageSize: 4 kB ++Locked: 0 kB ++VmFlags: rd ex mr mw me dw + + .fi + .in + The first of these lines shows the same information as is displayed + for the mapping in + .IR /proc/[pid]/maps . +-The remaining lines show the size of the mapping, +-the amount of the mapping that is currently resident in RAM, ++The following lines show the size of the mapping, ++the amount of the mapping that is currently resident in RAM ("Rss"), ++the process's proportional share of this mapping ("Pss"), + the number of clean and dirty shared pages in the mapping, + and the number of clean and dirty private pages in the mapping. +- +-This file is present only if the +-.B CONFIG_MMU +-kernel configuration +-option is enabled. ++"Referenced" indicates the amount of memory currently marked as ++referenced or accessed. ++"Anonymous" shows the amount of memory ++that does not belong to any file. ++"Swap" shows how much ++would-be-anonymous memory is also used, but out on swap. ++.IP ++The "KernelPageSize" line (available since Linux 2.6.29) ++is the page size used by the kernel to back the virtual memory area. ++This matches the size used by the MMU in the majority of cases. ++However, one counter-example occurs on PPC64 kernels ++whereby a kernel using 64kB as a base page size may still use 4kB ++pages for the MMU on older processors. ++To distinguish the two attributes, the "MMUPageSize" line ++(also available since Linux 2.6.29) ++reports the page size used by the MMU. ++.IP ++The "Locked" indicates whether the mapping is locked in memory ++or not. ++.IP ++The "VmFlags" line (available since Linux 3.8) ++represents the kernel flags associated with the virtual memory area, ++encoded using the following two-letter codes: ++.IP ++ rd - readable ++ wr - writable ++ ex - executable ++ sh - shared ++ mr - may read ++ mw - may write ++ me - may execute ++ ms - may share ++ gd - stack segment grows down ++ pf - pure PFN range ++ dw - disabled write to the mapped file ++ lo - pages are locked in memory ++ io - memory mapped I/O area ++ sr - sequential read advise provided ++ rr - random read advise provided ++ dc - do not copy area on fork ++ de - do not expand area on remapping ++ ac - area is accountable ++ nr - swap space is not reserved for the area ++ ht - area uses huge tlb pages ++ nl - non-linear mapping ++ ar - architecture specific flag ++ dd - do not include area into core dump ++ sd - soft-dirty flag ++ mm - mixed map area ++ hg - huge page advise flag ++ nh - no-huge page advise flag ++ mg - mergeable advise flag ++.IP ++The ++.IR /proc/[pid]/smaps ++file is present only if the ++.B CONFIG_PROC_PAGE_MONITOR ++kernel configuration option is enabled. + .TP + .I /proc/[pid]/stat + Status information about the process. +@@ -1300,25 +1365,31 @@ Here's an example: + + .RB "$" " cat /proc/$$/status" + Name: bash ++Umask: 0022 + State: S (sleeping) +-Tgid: 3515 +-Pid: 3515 +-PPid: 3452 ++Tgid: 17248 ++Ngid: 0 ++Pid: 17248 ++PPid: 17200 + TracerPid: 0 + Uid: 1000 1000 1000 1000 + Gid: 100 100 100 100 + FDSize: 256 + Groups: 16 33 100 +-VmPeak: 9136 kB +-VmSize: 7896 kB ++VmPeak: 131168 kB ++VmSize: 131168 kB + VmLck: 0 kB +-VmHWM: 7572 kB +-VmRSS: 6316 kB +-VmData: 5224 kB +-VmStk: 88 kB +-VmExe: 572 kB +-VmLib: 1708 kB +-VmPTE: 20 kB ++VmPin: 0 kB ++VmHWM: 13484 kB ++VmRSS: 13484 kB ++RssAnon: 10264 kB ++RssFile: 3220 kB ++RssShmem: 0 kB ++VmData: 10332 kB ++VmStk: 136 kB ++VmExe: 992 kB ++VmLib: 2104 kB ++VmPTE: 76 kB + Threads: 1 + SigQ: 0/3067 + SigPnd: 0000000000000000 +@@ -1330,6 +1401,9 @@ CapInh: 0000000000000000 + CapPrm: 0000000000000000 + CapEff: 0000000000000000 + CapBnd: ffffffffffffffff ++CapAmb: 0000000000000000 ++Seccomp: 0 ++Speculation_Store_Bypass: vulnerable + Cpus_allowed: 00000001 + Cpus_allowed_list: 0 + Mems_allowed: 1 +@@ -1345,6 +1419,11 @@ The fields are as follows: + .IR Name : + Command run by this process. + .IP * ++.IR Umask : ++Process umask, expressed in octal with a leading zero; see ++.BR umask (2). ++(Since Linux 4.7.) ++.IP * + .IR State : + Current state of the process. + One of +@@ -1360,6 +1439,9 @@ or + .IR Tgid : + Thread group ID (i.e., Process ID). + .IP * ++.IR Ngid : ++NUMA group ID (0 if none; since Linux 3.13). ++.IP * + .IR Pid : + Thread ID (see + .BR gettid (2)). +@@ -1371,7 +1453,7 @@ PID of parent process. + PID of process tracing this process (0 if not being traced). + .IP * + .IR Uid ", " Gid : +-Real, effective, saved set, and file system UIDs (GIDs). ++Real, effective, saved set, and filesystem UIDs (GIDs). + .IP * + .IR FDSize : + Number of file descriptor slots currently allocated. +@@ -1387,13 +1469,43 @@ Virtual memory size. + .IP * + .IR VmLck : + Locked memory size (see +-.BR mlock (3)). ++.BR mlock (2)). ++.IP * ++.IR VmPin : ++Pinned memory size ++.\" commit bc3e53f682d93df677dbd5006a404722b3adfe18 ++(since Linux 3.2). ++These are pages that can't be moved because something needs to ++directly access physical memory. + .IP * + .IR VmHWM : + Peak resident set size ("high water mark"). + .IP * + .IR VmRSS : + Resident set size. ++Note that the value here is the sum of ++.IR RssAnon , ++.IR RssFile , ++and ++.IR RssShmem . ++.IP * ++.IR RssAnon : ++Size of resident anonymous memory. ++.\" commit bf9683d6990589390b5178dafe8fd06808869293 ++(since Linux 4.5). ++.IP * ++.IR RssFile : ++Size of resident file mappings. ++.\" commit bf9683d6990589390b5178dafe8fd06808869293 ++(since Linux 4.5). ++.IP * ++.IR RssShmem : ++Size of resident shared memory (includes System V shared memory, ++mappings from ++.BR tmpfs (5), ++and shared anonymous mappings). ++.\" commit bf9683d6990589390b5178dafe8fd06808869293 ++(since Linux 4.5). + .IP * + .IR VmData ", " VmStk ", " VmExe : + Size of data, stack, and text segments. +@@ -1435,9 +1547,35 @@ Masks of capabilities enabled in inheritable, permitted, and effective sets + .IP * + .IR CapBnd : + Capability Bounding set +-(since kernel 2.6.26, see ++(since Linux 2.6.26, see ++.BR capabilities (7)). ++.IP * ++.IR CapAmb : ++Ambient capability set ++(since Linux 4.3, see + .BR capabilities (7)). + .IP * ++.IR Seccomp : ++.\" commit 2f4b3bf6b2318cfaa177ec5a802f4d8d6afbd816 ++Seccomp mode of the process ++(since Linux 3.8, see ++.BR seccomp (2)). ++0 means ++.BR SECCOMP_MODE_DISABLED ; ++1 means ++.BR SECCOMP_MODE_STRICT ; ++2 means ++.BR SECCOMP_MODE_FILTER . ++This field is provided only if the kernel was built with the ++.BR CONFIG_SECCOMP ++kernel configuration option enabled. ++.IP * ++.IR Speculation_Store_Bypass : ++.\" commit fae1fa0fc6cca8beee3ab8ed71d54f9a78fa3f64 ++Speculation flaw mitigation state ++(since Linux 4.17, see ++.BR prctl (2)). ++.IP * + .IR Cpus_allowed : + Mask of CPUs on which this process may run + (since Linux 2.6.24, see +@@ -1458,7 +1596,7 @@ Same as previous, but in "list format" + (since Linux 2.6.26, see + .BR cpuset (7)). + .IP * +-.IR voluntary_context_switches ", " nonvoluntary_context_switches : ++.IR voluntary_ctxt_switches ", " nonvoluntary_ctxt_switches : + Number of voluntary and involuntary context switches (since Linux 2.6.23). + .RE + .TP +-- +2.17.1 + diff --git a/SOURCES/1619272-mpo-7.6.2-perf_event_open.2.patch b/SOURCES/1619272-mpo-7.6.2-perf_event_open.2.patch new file mode 100644 index 0000000..54da751 --- /dev/null +++ b/SOURCES/1619272-mpo-7.6.2-perf_event_open.2.patch @@ -0,0 +1,3582 @@ +From b4bd8040f471c245455a66c32c8979d3262faa2f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Wed, 22 Aug 2018 11:11:05 +0200 +Subject: [PATCH 3/3] perf_event_open.2: sync with upstream + +--- + man-pages/man2/perf_event_open.2 | 2448 +++++++++++++++++++++++------- + 1 file changed, 1861 insertions(+), 587 deletions(-) + +diff --git a/man-pages/man2/perf_event_open.2 b/man-pages/man2/perf_event_open.2 +index 2ab3eb9..0f29d96 100644 +--- a/man-pages/man2/perf_event_open.2 ++++ b/man-pages/man2/perf_event_open.2 +@@ -24,19 +24,19 @@ + .\" This document is based on the perf_event.h header file, the + .\" tools/perf/design.txt file, and a lot of bitter experience. + .\" +-.TH PERF_EVENT_OPEN 2 2013-07-16 "Linux" "Linux Programmer's Manual" ++.TH PERF_EVENT_OPEN 2 2018-02-02 "Linux" "Linux Programmer's Manual" + .SH NAME + perf_event_open \- set up performance monitoring + .SH SYNOPSIS + .nf + .B #include + .B #include +-.sp ++.PP + .BI "int perf_event_open(struct perf_event_attr *" attr , + .BI " pid_t " pid ", int " cpu ", int " group_fd , + .BI " unsigned long " flags ); + .fi +- ++.PP + .IR Note : + There is no glibc wrapper for this system call; see NOTES. + .SH DESCRIPTION +@@ -56,7 +56,7 @@ to measure multiple events simultaneously. + Events can be enabled and disabled in two ways: via + .BR ioctl (2) + and via +-.BR prctl (2) . ++.BR prctl (2). + When an event is disabled it does not count or generate overflows but does + continue to exist and maintain its count value. + .PP +@@ -72,58 +72,48 @@ A + .I sampling + event periodically writes measurements to a buffer that can then + be accessed via +-.BR mmap (2) . ++.BR mmap (2). + .SS Arguments +-.P +-The argument +-.I pid +-allows events to be attached to processes in various ways. +-If +-.I pid +-is 0, measurements happen on the current thread, if +-.I pid +-is greater than 0, the process indicated by +-.I pid +-is measured, and if +-.I pid +-is \-1, all processes are counted. +- ++.PP + The +-.I cpu +-argument allows measurements to be specific to a CPU. +-If +-.I cpu +-is greater than or equal to 0, +-measurements are restricted to the specified CPU; +-if +-.I cpu +-is \-1, the events are measured on all CPUs. +-.P +-Note that the combination of +-.IR pid " == \-1" +-and +-.IR cpu " == \-1" +-is not valid. +-.P +-A +-.IR pid " > 0" +-and +-.IR cpu " == \-1" +-setting measures per-process and follows that process to whatever CPU the +-process gets scheduled to. +-Per-process events can be created by any user. +-.P +-A +-.IR pid " == \-1" ++.I pid + and +-.IR cpu " >= 0" +-setting is per-CPU and measures all processes on the specified CPU. +-Per-CPU events need the ++.I cpu ++arguments allow specifying which process and CPU to monitor: ++.TP ++.BR "pid == 0" " and " "cpu == \-1" ++This measures the calling process/thread on any CPU. ++.TP ++.BR "pid == 0" " and " "cpu >= 0" ++This measures the calling process/thread only ++when running on the specified CPU. ++.TP ++.BR "pid > 0" " and " "cpu == \-1" ++This measures the specified process/thread on any CPU. ++.TP ++.BR "pid > 0" " and " "cpu >= 0" ++This measures the specified process/thread only ++when running on the specified CPU. ++.TP ++.BR "pid == \-1" " and " "cpu >= 0" ++This measures all processes/threads on the specified CPU. ++This requires + .B CAP_SYS_ADMIN + capability or a + .I /proc/sys/kernel/perf_event_paranoid + value of less than 1. +-.P ++.TP ++.BR "pid == \-1" " and " "cpu == \-1" ++This setting is invalid and will return an error. ++.PP ++When ++.I pid ++is greater than zero, permission to perform this system call ++is governed by a ptrace access mode ++.B PTRACE_MODE_READ_REALCREDS ++check; see ++.BR ptrace (2). ++.PP + The + .I group_fd + argument allows event groups to be created. +@@ -134,7 +124,7 @@ The rest of the group members are created with subsequent + .BR perf_event_open () + calls with + .IR group_fd +-being set to the fd of the group leader. ++being set to the file descriptor of the group leader. + (A single event on its own is created with + .IR group_fd " = \-1" + and is considered to be a group with only 1 member.) +@@ -142,29 +132,51 @@ An event group is scheduled onto the CPU as a unit: it will + be put onto the CPU only if all of the events in the group can be put onto + the CPU. + This means that the values of the member events can be +-meaningfully compared, added, divided (to get ratios), etc., with each ++meaningfully compared\(emadded, divided (to get ratios), and so on\(emwith each + other, since they have counted events for the same set of executed + instructions. +-.P ++.PP + The + .I flags + argument is formed by ORing together zero or more of the following values: + .TP ++.BR PERF_FLAG_FD_CLOEXEC " (since Linux 3.14)" ++.\" commit a21b0b354d4ac39be691f51c53562e2c24443d9e ++This flag enables the close-on-exec flag for the created ++event file descriptor, ++so that the file descriptor is automatically closed on ++.BR execve (2). ++Setting the close-on-exec flags at creation time, rather than later with ++.BR fcntl (2), ++avoids potential race conditions where the calling thread invokes ++.BR perf_event_open () ++and ++.BR fcntl (2) ++at the same time as another thread calls ++.BR fork (2) ++then ++.BR execve (2). ++.TP + .BR PERF_FLAG_FD_NO_GROUP +-.\" FIXME The following sentence is unclear +-This flag allows creating an event as part of an event group but +-having no group leader. +-It is unclear why this is useful. +-.\" FIXME So, why is it useful? ++This flag tells the event to ignore the ++.IR group_fd ++parameter except for the purpose of setting up output redirection ++using the ++.B PERF_FLAG_FD_OUTPUT ++flag. + .TP +-.BR PERF_FLAG_FD_OUTPUT +-This flag re-routes the output from an event to the group leader. ++.BR PERF_FLAG_FD_OUTPUT " (broken since Linux 2.6.35)" ++.\" commit ac9721f3f54b27a16c7e1afb2481e7ee95a70318 ++This flag re-routes the event's sampled output to instead ++be included in the mmap buffer of the event specified by ++.IR group_fd . + .TP +-.BR PERF_FLAG_PID_CGROUP " (Since Linux 2.6.39)." ++.BR PERF_FLAG_PID_CGROUP " (since Linux 2.6.39)" ++.\" commit e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25 + This flag activates per-container system-wide monitoring. + A container +-is an abstraction that isolates a set of resources for finer grain +-control (CPUs, memory, etc...). ++is an abstraction that isolates a set of resources for finer-grained ++control (CPUs, memory, etc.). + In this mode, the event is measured + only if the thread running on the monitored CPU belongs to the designated + container (cgroup). +@@ -182,52 +194,58 @@ must be passed as the + parameter. + cgroup monitoring is available only + for system-wide events and may therefore require extra permissions. +-.P ++.PP + The + .I perf_event_attr + structure provides detailed configuration information + for the event being created. +- ++.PP + .in +4n +-.nf ++.EX + struct perf_event_attr { +- __u32 type; /* Type of event */ +- __u32 size; /* Size of attribute structure */ +- __u64 config; /* Type-specific configuration */ ++ __u32 type; /* Type of event */ ++ __u32 size; /* Size of attribute structure */ ++ __u64 config; /* Type-specific configuration */ + + union { + __u64 sample_period; /* Period of sampling */ + __u64 sample_freq; /* Frequency of sampling */ + }; + +- __u64 sample_type; /* Specifies values included in sample */ +- __u64 read_format; /* Specifies values returned in read */ +- +- __u64 disabled : 1, /* off by default */ +- inherit : 1, /* children inherit it */ +- pinned : 1, /* must always be on PMU */ +- exclusive : 1, /* only group on PMU */ +- exclude_user : 1, /* don't count user */ +- exclude_kernel : 1, /* don't count kernel */ +- exclude_hv : 1, /* don't count hypervisor */ +- exclude_idle : 1, /* don't count when idle */ +- mmap : 1, /* include mmap data */ +- comm : 1, /* include comm data */ +- freq : 1, /* use freq, not period */ +- inherit_stat : 1, /* per task counts */ +- enable_on_exec : 1, /* next exec enables */ +- task : 1, /* trace fork/exit */ +- watermark : 1, /* wakeup_watermark */ +- precise_ip : 2, /* skid constraint */ +- mmap_data : 1, /* non-exec mmap data */ +- sample_id_all : 1, /* sample_type all events */ +- exclude_host : 1, /* don't count in host */ +- exclude_guest : 1, /* don't count in guest */ +- exclude_callchain_kernel : 1, +- /* exclude kernel callchains */ +- exclude_callchain_user : 1, +- /* exclude user callchains */ +- __reserved_1 : 41; ++ __u64 sample_type; /* Specifies values included in sample */ ++ __u64 read_format; /* Specifies values returned in read */ ++ ++ __u64 disabled : 1, /* off by default */ ++ inherit : 1, /* children inherit it */ ++ pinned : 1, /* must always be on PMU */ ++ exclusive : 1, /* only group on PMU */ ++ exclude_user : 1, /* don't count user */ ++ exclude_kernel : 1, /* don't count kernel */ ++ exclude_hv : 1, /* don't count hypervisor */ ++ exclude_idle : 1, /* don't count when idle */ ++ mmap : 1, /* include mmap data */ ++ comm : 1, /* include comm data */ ++ freq : 1, /* use freq, not period */ ++ inherit_stat : 1, /* per task counts */ ++ enable_on_exec : 1, /* next exec enables */ ++ task : 1, /* trace fork/exit */ ++ watermark : 1, /* wakeup_watermark */ ++ precise_ip : 2, /* skid constraint */ ++ mmap_data : 1, /* non-exec mmap data */ ++ sample_id_all : 1, /* sample_type all events */ ++ exclude_host : 1, /* don't count in host */ ++ exclude_guest : 1, /* don't count in guest */ ++ exclude_callchain_kernel : 1, ++ /* exclude kernel callchains */ ++ exclude_callchain_user : 1, ++ /* exclude user callchains */ ++ mmap2 : 1, /* include mmap with inode data */ ++ comm_exec : 1, /* flag comm events that are ++ due to exec */ ++ use_clockid : 1, /* use clockid for time fields */ ++ context_switch : 1, /* context switch data */ ++ ++ __reserved_1 : 37; + + union { + __u32 wakeup_events; /* wakeup every n events */ +@@ -238,23 +256,31 @@ struct perf_event_attr { + + union { + __u64 bp_addr; /* breakpoint address */ ++ __u64 kprobe_func; /* for perf_kprobe */ ++ __u64 uprobe_path; /* for perf_uprobe */ + __u64 config1; /* extension of config */ + }; + + union { + __u64 bp_len; /* breakpoint length */ ++ __u64 kprobe_addr; /* with kprobe_func == NULL */ ++ __u64 probe_offset; /* for perf_[k,u]probe */ + __u64 config2; /* extension of config1 */ + }; +- __u64 branch_sample_type; /* enum perf_branch_sample_type */ +- __u64 sample_regs_user; /* user regs to dump on samples */ +- __u32 sample_stack_user; /* size of stack to dump on ++ __u64 branch_sample_type; /* enum perf_branch_sample_type */ ++ __u64 sample_regs_user; /* user regs to dump on samples */ ++ __u32 sample_stack_user; /* size of stack to dump on + samples */ +- __u32 __reserved_2; /* Align to u64 */ ++ __s32 clockid; /* clock to use for time fields */ ++ __u64 sample_regs_intr; /* regs to dump on samples */ ++ __u32 aux_watermark; /* aux bytes before wakeup */ ++ __u16 sample_max_stack; /* max frames in callchain */ ++ __u16 __reserved_2; /* align to u64 */ + + }; +-.fi ++.EE + .in +- ++.PP + The fields of the + .I perf_event_attr + structure are described in more detail below: +@@ -289,13 +315,15 @@ field definition. + This indicates a "raw" implementation-specific event in the + .IR config " field." + .TP +-.BR PERF_TYPE_BREAKPOINT " (Since Linux 2.6.33)" ++.BR PERF_TYPE_BREAKPOINT " (since Linux 2.6.33)" ++.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e + This indicates a hardware breakpoint as provided by the CPU. + Breakpoints can be read/write accesses to an address as well as + execution of an instruction address. + .TP + .RB "dynamic PMU" +-Since Linux 2.6.39, ++Since Linux 2.6.38, ++.\" commit 2e80a82a49c4c7eca4e35734380f28298ba5db19 + .BR perf_event_open () + can support multiple PMUs. + To enable this, a value exported by the kernel can be used in the +@@ -304,7 +332,7 @@ field to indicate which PMU to use. + The value to use can be found in the sysfs filesystem: + there is a subdirectory per PMU instance under + .IR /sys/bus/event_source/devices . +-In each sub-directory there is a ++In each subdirectory there is a + .I type + file whose content is an integer that can be used in the + .I type +@@ -312,6 +340,17 @@ field. + For instance, + .I /sys/bus/event_source/devices/cpu/type + contains the value for the core CPU PMU, which is usually 4. ++.TP ++.BR kprobe " and " uprobe " (since Linux 4.17)" ++.\" commit 65074d43fc77bcae32776724b7fa2696923c78e4 ++.\" commit e12f03d7031a977356e3d7b75a68c2185ff8d155 ++.\" commit 33ea4b24277b06dbc55d7f5772a46f029600255e ++These two dynamic PMUs create a kprobe/uprobe and attach it to the ++file descriptor generated by perf_event_open. ++The kprobe/uprobe will be destroyed on the destruction of the file descriptor. ++See fields ++.IR kprobe_func ", " uprobe_path ", " kprobe_addr ", and " probe_offset ++for more details. + .RE + .TP + .I "size" +@@ -322,21 +361,36 @@ Set this using + .I sizeof(struct perf_event_attr) + to allow the kernel to see + the struct size at the time of compilation. +- ++.IP + The related define + .B PERF_ATTR_SIZE_VER0 + is set to 64; this was the size of the first published struct. + .B PERF_ATTR_SIZE_VER1 + is 72, corresponding to the addition of breakpoints in Linux 2.6.33. ++.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2 ++.\" this was added much later when PERF_ATTR_SIZE_VER2 happened ++.\" but the actual attr_size had increased in 2.6.33 + .B PERF_ATTR_SIZE_VER2 + is 80 corresponding to the addition of branch sampling in Linux 3.4. +-.B PERF_ATR_SIZE_VER3 ++.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2 ++.B PERF_ATTR_SIZE_VER3 + is 96 corresponding to the addition + of + .I sample_regs_user + and + .I sample_stack_user + in Linux 3.7. ++.\" commit 1659d129ed014b715b0b2120e6fd929bdd33ed03 ++.B PERF_ATTR_SIZE_VER4 ++is 104 corresponding to the addition of ++.I sample_regs_intr ++in Linux 3.19. ++.\" commit 60e2364e60e86e81bc6377f49779779e6120977f ++.B PERF_ATTR_SIZE_VER5 ++is 112 corresponding to the addition of ++.I aux_watermark ++in Linux 4.1. ++.\" commit 1a5941312414c71dece6717da9a0fa1303127afa + .TP + .I "config" + This specifies which event you want, in conjunction with +@@ -348,13 +402,7 @@ The + fields are also taken into account in cases where 64 bits is not + enough to fully specify the event. + The encoding of these fields are event dependent. +- +-The most significant bit (bit 63) of +-.I config +-signifies CPU-specific (raw) counter configuration data; +-if the most significant bit is unset, the next 7 bits are an event +-type and the rest of the bits are the event identifier. +- ++.IP + There are various ways to set the + .I config + field that are dependent on the value of the previously +@@ -365,7 +413,7 @@ What follows are various possible settings for + .I config + separated out by + .IR type . +- ++.IP + If + .I type + is +@@ -379,12 +427,12 @@ to one of the following: + .TP + .B PERF_COUNT_HW_CPU_CYCLES + Total cycles. +-Be wary of what happens during CPU frequency scaling ++Be wary of what happens during CPU frequency scaling. + .TP + .B PERF_COUNT_HW_INSTRUCTIONS + Retired instructions. + Be careful, these can be affected by various +-issues, most notably hardware interrupt counts ++issues, most notably hardware interrupt counts. + .TP + .B PERF_COUNT_HW_CACHE_REFERENCES + Cache accesses. +@@ -402,8 +450,9 @@ event to calculate cache miss rates. + .TP + .B PERF_COUNT_HW_BRANCH_INSTRUCTIONS + Retired branch instructions. +-Prior to Linux 2.6.34, this used ++Prior to Linux 2.6.35, this used + the wrong event on AMD processors. ++.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2 + .TP + .B PERF_COUNT_HW_BRANCH_MISSES + Mispredicted branch instructions. +@@ -411,13 +460,16 @@ Mispredicted branch instructions. + .B PERF_COUNT_HW_BUS_CYCLES + Bus cycles, which can be different from total cycles. + .TP +-.BR PERF_COUNT_HW_STALLED_CYCLES_FRONTEND " (Since Linux 3.0)" ++.BR PERF_COUNT_HW_STALLED_CYCLES_FRONTEND " (since Linux 3.0)" ++.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a + Stalled cycles during issue. + .TP +-.BR PERF_COUNT_HW_STALLED_CYCLES_BACKEND " (Since Linux 3.0)" ++.BR PERF_COUNT_HW_STALLED_CYCLES_BACKEND " (since Linux 3.0)" ++.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a + Stalled cycles during retirement. + .TP +-.BR PERF_COUNT_HW_REF_CPU_CYCLES " (Since Linux 3.3)" ++.BR PERF_COUNT_HW_REF_CPU_CYCLES " (since Linux 3.3)" ++.\" commit c37e17497e01fc0f5d2d6feb5723b210b3ab8890 + Total cycles; not affected by CPU frequency scaling. + .RE + .IP +@@ -444,6 +496,7 @@ This reports the number of page faults. + This counts context switches. + Until Linux 2.6.34, these were all reported as user-space + events, after that they are reported as happening in the kernel. ++.\" commit e49a5bd38159dfb1928fd25b173bc9de4bbadb21 + .TP + .B PERF_COUNT_SW_CPU_MIGRATIONS + This reports the number of times the process +@@ -457,19 +510,29 @@ These did not require disk I/O to handle. + This counts the number of major page faults. + These required disk I/O to handle. + .TP +-.BR PERF_COUNT_SW_ALIGNMENT_FAULTS " (Since Linux 2.6.33)" ++.BR PERF_COUNT_SW_ALIGNMENT_FAULTS " (since Linux 2.6.33)" ++.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497 + This counts the number of alignment faults. + These happen when unaligned memory accesses happen; the kernel + can handle these but it reduces performance. + This happens only on some architectures (never on x86). + .TP +-.BR PERF_COUNT_SW_EMULATION_FAULTS " (Since Linux 2.6.33)" ++.BR PERF_COUNT_SW_EMULATION_FAULTS " (since Linux 2.6.33)" ++.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497 + This counts the number of emulation faults. + The kernel sometimes traps on unimplemented instructions + and emulates them for user space. + This can negatively impact performance. ++.TP ++.BR PERF_COUNT_SW_DUMMY " (since Linux 3.12)" ++.\" commit fa0097ee690693006ab1aea6c01ad3c851b65c77 ++This is a placeholder event that counts nothing. ++Informational sample record types such as mmap or comm ++must be associated with an active event. ++This dummy event allows gathering such records without requiring ++a counting event. + .RE +- ++.PP + .RS + If + .I type +@@ -482,7 +545,7 @@ can be obtained from under debugfs + .I tracing/events/*/*/id + if ftrace is enabled in the kernel. + .RE +- ++.PP + .RS + If + .I type +@@ -492,13 +555,13 @@ then we are measuring a hardware CPU cache event. + To calculate the appropriate + .I config + value use the following equation: ++.PP + .RS 4 + .nf +- + (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) | + (perf_hw_cache_op_result_id << 16) + .fi +-.P ++.PP + where + .I perf_hw_cache_id + is one of: +@@ -522,13 +585,14 @@ for measuring the Instruction TLB + .B PERF_COUNT_HW_CACHE_BPU + for measuring the branch prediction unit + .TP +-.BR PERF_COUNT_HW_CACHE_NODE " (Since Linux 3.0)" ++.BR PERF_COUNT_HW_CACHE_NODE " (since Linux 3.1)" ++.\" commit 89d6c0b5bdbb1927775584dcf532d98b3efe1477 + for measuring local memory accesses + .RE +-.P ++.PP + and + .I perf_hw_cache_op_id +-is one of ++is one of: + .RS 4 + .TP + .B PERF_COUNT_HW_CACHE_OP_READ +@@ -540,10 +604,10 @@ for write accesses + .B PERF_COUNT_HW_CACHE_OP_PREFETCH + for prefetch accesses + .RE +-.P ++.PP + and + .I perf_hw_cache_op_result_id +-is one of ++is one of: + .RS 4 + .TP + .B PERF_COUNT_HW_CACHE_RESULT_ACCESS +@@ -553,7 +617,7 @@ to measure accesses + to measure misses + .RE + .RE +- ++.PP + If + .I type + is +@@ -569,7 +633,7 @@ The libpfm4 library can be used to translate from the name in the + architectural manuals to the raw hex value + .BR perf_event_open () + expects in this field. +- ++.PP + If + .I type + is +@@ -578,23 +642,63 @@ then leave + .I config + set to zero. + Its parameters are set in other places. ++.PP ++If ++.I type ++is ++.BR kprobe ++or ++.BR uprobe , ++set ++.IR retprobe ++(bit 0 of ++.IR config , ++see ++.IR /sys/bus/event_source/devices/[k,u]probe/format/retprobe ) ++for kretprobe/uretprobe. ++See fields ++.IR kprobe_func ", " uprobe_path ", " kprobe_addr ", and " probe_offset ++for more details. + .RE + .TP ++.IR kprobe_func ", " uprobe_path ", " kprobe_addr ", and " probe_offset ++These fields describe the kprobe/uprobe for dynamic PMUs ++.BR kprobe ++and ++.BR uprobe . ++For ++.BR kprobe : ++use ++.I kprobe_func ++and ++.IR probe_offset , ++or use ++.I kprobe_addr ++and leave ++.I kprobe_func ++as NULL. ++For ++.BR uprobe : ++use ++.I uprobe_path ++and ++.IR probe_offset . ++.TP + .IR sample_period ", " sample_freq +-A "sampling" counter is one that generates an interrupt ++A "sampling" event is one that generates an overflow notification + every N events, where N is given by + .IR sample_period . +-A sampling counter has ++A sampling event has + .IR sample_period " > 0." +-When an overflow interrupt occurs, requested data is recorded ++When an overflow occurs, requested data is recorded + in the mmap buffer. + The + .I sample_type +-field controls what data is recorded on each interrupt. +- ++field controls what data is recorded on each overflow. ++.IP + .I sample_freq + can be used if you wish to use frequency rather than period. +-In this case you set the ++In this case, you set the + .I freq + flag. + The kernel will adjust the sampling period +@@ -647,39 +751,106 @@ Records a unique ID for the opened event. + Unlike + .B PERF_SAMPLE_ID + the actual ID is returned, not the group leader. +-This ID is the same as the one returned by PERF_FORMAT_ID. ++This ID is the same as the one returned by ++.BR PERF_FORMAT_ID . + .TP + .B PERF_SAMPLE_RAW + Records additional data, if applicable. + Usually returned by tracepoint events. + .TP +-.BR PERF_SAMPLE_BRANCH_STACK " (Since Linux 3.4)" ++.BR PERF_SAMPLE_BRANCH_STACK " (since Linux 3.4)" ++.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e + This provides a record of recent branches, as provided + by CPU branch sampling hardware (such as Intel Last Branch Record). + Not all hardware supports this feature. +- ++.IP + See the + .I branch_sample_type + field for how to filter which branches are reported. + .TP +-.BR PERF_SAMPLE_REGS_USER " (Since Linux 3.7)" ++.BR PERF_SAMPLE_REGS_USER " (since Linux 3.7)" ++.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56 + Records the current user-level CPU register state + (the values in the process before the kernel was called). + .TP +-.BR PERF_SAMPLE_STACK_USER " (Since Linux 3.7)" ++.BR PERF_SAMPLE_STACK_USER " (since Linux 3.7)" ++.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 + Records the user level stack, allowing stack unwinding. + .TP +-.BR PERF_SAMPLE_WEIGHT " (Since Linux 3.10)" ++.BR PERF_SAMPLE_WEIGHT " (since Linux 3.10)" ++.\" commit c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c + Records a hardware provided weight value that expresses how + costly the sampled event was. + This allows the hardware to highlight expensive events in + a profile. + .TP +-.BR PERF_SAMPLE_DATA_SRC " (Since Linux 3.10)" ++.BR PERF_SAMPLE_DATA_SRC " (since Linux 3.10)" ++.\" commit d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 + Records the data source: where in the memory hierarchy + the data associated with the sampled instruction came from. +-This is only available if the underlying hardware ++This is available only if the underlying hardware + supports this feature. ++.TP ++.BR PERF_SAMPLE_IDENTIFIER " (since Linux 3.12)" ++.\" commit ff3d527cebc1fa3707c617bfe9e74f53fcfb0955 ++Places the ++.B SAMPLE_ID ++value in a fixed position in the record, ++either at the beginning (for sample events) or at the end ++(if a non-sample event). ++.IP ++This was necessary because a sample stream may have ++records from various different event sources with different ++.I sample_type ++settings. ++Parsing the event stream properly was not possible because the ++format of the record was needed to find ++.BR SAMPLE_ID , ++but ++the format could not be found without knowing what ++event the sample belonged to (causing a circular ++dependency). ++.IP ++The ++.B PERF_SAMPLE_IDENTIFIER ++setting makes the event stream always parsable ++by putting ++.B SAMPLE_ID ++in a fixed location, even though ++it means having duplicate ++.B SAMPLE_ID ++values in records. ++.TP ++.BR PERF_SAMPLE_TRANSACTION " (since Linux 3.13)" ++.\" commit fdfbbd07e91f8fe387140776f3fd94605f0c89e5 ++Records reasons for transactional memory abort events ++(for example, from Intel TSX transactional memory support). ++.IP ++The ++.I precise_ip ++setting must be greater than 0 and a transactional memory abort ++event must be measured or no values will be recorded. ++Also note that some perf_event measurements, such as sampled ++cycle counting, may cause extraneous aborts (by causing an ++interrupt during a transaction). ++.TP ++.BR PERF_SAMPLE_REGS_INTR " (since Linux 3.19)" ++.\" commit 60e2364e60e86e81bc6377f49779779e6120977f ++Records a subset of the current CPU register state ++as specified by ++.IR sample_regs_intr . ++Unlike ++.B PERF_SAMPLE_REGS_USER ++the register values will return kernel register ++state if the overflow happened while kernel ++code is running. ++If the CPU supports hardware sampling of ++register state (i.e., PEBS on Intel x86) and ++.I precise_ip ++is set higher than zero then the register ++values returned are those captured by ++hardware at the time of the sampled ++instruction's retirement. + .RE + .TP + .IR "read_format" +@@ -702,7 +873,7 @@ Adds the 64-bit + .I time_running + field. + This can be used to calculate estimated totals if +-the PMU is overcommitted and multiplexing is happening. ++the PMU is overcommitted and multiplexing is happening. + .TP + .B PERF_FORMAT_ID + Adds a 64-bit unique value that corresponds to the event group. +@@ -720,6 +891,17 @@ If disabled, the event can later be enabled by + .BR prctl (2), + or + .IR enable_on_exec . ++.IP ++When creating an event group, typically the group leader is initialized ++with ++.I disabled ++set to 1 and any child events are initialized with ++.I disabled ++set to 0. ++Despite ++.I disabled ++being 0, the child events will not start until the group leader ++is enabled. + .TP + .IR "inherit" + The +@@ -729,10 +911,10 @@ tasks as well as the task specified. + This applies only to new children, not to any existing children at + the time the counter is created (nor to any new children of + existing children). +- ++.IP + Inherit does not work for some combinations of +-.IR read_format s, +-such as ++.IR read_format ++values, such as + .BR PERF_FORMAT_GROUP . + .TP + .IR "pinned" +@@ -756,12 +938,19 @@ it should be the only group using the CPU's counters. + In the future this may allow monitoring programs to + support PMU features that need to run alone so that they do not + disrupt other hardware counters. ++.IP ++Note that many unexpected situations may prevent events with the ++.I exclusive ++bit set from ever running. ++This includes any users running a system-wide ++measurement as well as any kernel use of the performance counters ++(including the commonly enabled NMI Watchdog Timer interface). + .TP + .IR "exclude_user" + If this bit is set, the count excludes events that happen in user space. + .TP + .IR "exclude_kernel" +-If this bit is set, the count excludes events that happen in kernel-space. ++If this bit is set, the count excludes events that happen in kernel space. + .TP + .IR "exclude_hv" + If this bit is set, the count excludes events that happen in the +@@ -772,23 +961,42 @@ Extra support is needed for handling hypervisor measurements on most + machines. + .TP + .IR "exclude_idle" +-If set, don't count when the CPU is idle. ++If set, don't count when the CPU is running the idle task. ++While you can currently enable this for any event type, it is ignored ++for all but software events. + .TP + .IR "mmap" + The + .I mmap +-bit enables recording of exec mmap events. ++bit enables generation of ++.B PERF_RECORD_MMAP ++samples for every ++.BR mmap (2) ++call that has ++.B PROT_EXEC ++set. ++This allows tools to notice new executable code being mapped into ++a program (dynamic shared libraries for example) ++so that addresses can be mapped back to the original code. + .TP + .IR "comm" + The + .I comm + bit enables tracking of process command name as modified by the +-.IR exec (2) ++.BR exec (2) + and +-.IR prctl (PR_SET_NAME) +-system calls. +-Unfortunately for tools, +-there is no way to distinguish one system call versus the other. ++.BR prctl (PR_SET_NAME) ++system calls as well as writing to ++.IR /proc/self/comm . ++If the ++.I comm_exec ++flag is also successfully set (possible since Linux 3.16), ++.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871 ++then the misc flag ++.B PERF_RECORD_MISC_COMM_EXEC ++can be used to differentiate the ++.BR exec (2) ++case from the others. + .TP + .IR "freq" + If this bit is set, then +@@ -814,14 +1022,15 @@ If this bit is set, then + fork/exit notifications are included in the ring buffer. + .TP + .IR "watermark" +-If set, have a sampling interrupt happen when we cross the ++If set, have an overflow notification happen when we cross the + .I wakeup_watermark + boundary. +-Otherwise interrupts happen after ++Otherwise, overflow notifications happen after + .I wakeup_events + samples. + .TP +-.IR "precise_ip" " (Since Linux 2.6.35)" ++.IR "precise_ip" " (since Linux 2.6.35)" ++.\" commit ab608344bcbde4f55ec4cd911b686b0ce3eae076 + This controls the amount of skid. + Skid is how many instructions + execute between an event of interest happening and the kernel +@@ -830,95 +1039,200 @@ Smaller skid is + better and allows more accurate reporting of which events + correspond to which instructions, but hardware is often limited + with how small this can be. +- +-The values of this are the following: ++.IP ++The possible values of this field are the following: + .RS +-.TP +-0 - ++.IP 0 3 + .B SAMPLE_IP +-can have arbitrary skid +-.TP +-1 - ++can have arbitrary skid. ++.IP 1 + .B SAMPLE_IP +-must have constant skid +-.TP +-2 - ++must have constant skid. ++.IP 2 + .B SAMPLE_IP +-requested to have 0 skid +-.TP +-3 - ++requested to have 0 skid. ++.IP 3 + .B SAMPLE_IP + must have 0 skid. +-See also ++See also the description of + .BR PERF_RECORD_MISC_EXACT_IP . + .RE + .TP +-.IR "mmap_data" " (Since Linux 2.6.36)" +-The counterpart of the ++.IR "mmap_data" " (since Linux 2.6.36)" ++.\" commit 3af9e859281bda7eb7c20b51879cf43aa788ac2e ++This is the counterpart of the + .I mmap +-field, but enables including data mmap events +-in the ring-buffer. ++field. ++This enables generation of ++.B PERF_RECORD_MMAP ++samples for ++.BR mmap (2) ++calls that do not have ++.B PROT_EXEC ++set (for example data and SysV shared memory). + .TP +-.IR "sample_id_all" " (Since Linux 2.6.38)" +-If set, then TID, TIME, ID, CPU, and STREAM_ID can ++.IR "sample_id_all" " (since Linux 2.6.38)" ++.\" commit c980d1091810df13f21aabbce545fd98f545bbf7 ++If set, then TID, TIME, ID, STREAM_ID, and CPU can + additionally be included in + .RB non- PERF_RECORD_SAMPLE s + if the corresponding + .I sample_type + is selected. ++.IP ++If ++.B PERF_SAMPLE_IDENTIFIER ++is specified, then an additional ID value is included ++as the last value to ease parsing the record stream. ++This may lead to the ++.I id ++value appearing twice. ++.IP ++The layout is described by this pseudo-structure: ++.IP ++.in +4n ++.EX ++struct sample_id { ++ { u32 pid, tid; } /* if PERF_SAMPLE_TID set */ ++ { u64 time; } /* if PERF_SAMPLE_TIME set */ ++ { u64 id; } /* if PERF_SAMPLE_ID set */ ++ { u64 stream_id;} /* if PERF_SAMPLE_STREAM_ID set */ ++ { u32 cpu, res; } /* if PERF_SAMPLE_CPU set */ ++ { u64 id; } /* if PERF_SAMPLE_IDENTIFIER set */ ++}; ++.EE ++.in + .TP +-.IR "exclude_host" " (Since Linux 3.2)" +-Do not measure time spent in VM host +-.TP +-.IR "exclude_guest" " (Since Linux 3.2)" +-Do not measure time spent in VM guest +-.TP +-.IR "exclude_callchain_kernel" " (Since Linux 3.7)" ++.IR "exclude_host" " (since Linux 3.2)" ++.\" commit a240f76165e6255384d4bdb8139895fac7988799 ++When conducting measurements that include processes running ++VM instances (i.e., have executed a ++.B KVM_RUN ++.BR ioctl (2)), ++only measure events happening inside a guest instance. ++This is only meaningful outside the guests; this setting does ++not change counts gathered inside of a guest. ++Currently, this functionality is x86 only. ++.TP ++.IR "exclude_guest" " (since Linux 3.2)" ++.\" commit a240f76165e6255384d4bdb8139895fac7988799 ++When conducting measurements that include processes running ++VM instances (i.e., have executed a ++.B KVM_RUN ++.BR ioctl (2)), ++do not measure events happening inside guest instances. ++This is only meaningful outside the guests; this setting does ++not change counts gathered inside of a guest. ++Currently, this functionality is x86 only. ++.TP ++.IR "exclude_callchain_kernel" " (since Linux 3.7)" ++.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91 + Do not include kernel callchains. + .TP +-.IR "exclude_callchain_user" " (Since Linux 3.7)" ++.IR "exclude_callchain_user" " (since Linux 3.7)" ++.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91 + Do not include user callchains. + .TP ++.IR "mmap2" " (since Linux 3.16)" ++.\" commit 13d7a2410fa637f450a29ecb515ac318ee40c741 ++.\" This is tricky; was committed during 3.12 development ++.\" but right before release was disabled. ++.\" So while you could select mmap2 starting with 3.12 ++.\" it did not work until 3.16 ++.\" commit a5a5ba72843dd05f991184d6cb9a4471acce1005 ++Generate an extended executable mmap record that contains enough ++additional information to uniquely identify shared mappings. ++The ++.I mmap ++flag must also be set for this to work. ++.TP ++.IR "comm_exec" " (since Linux 3.16)" ++.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871 ++This is purely a feature-detection flag, it does not change ++kernel behavior. ++If this flag can successfully be set, then, when ++.I comm ++is enabled, the ++.B PERF_RECORD_MISC_COMM_EXEC ++flag will be set in the ++.I misc ++field of a comm record header if the rename event being ++reported was caused by a call to ++.BR exec (2). ++This allows tools to distinguish between the various ++types of process renaming. ++.TP ++.IR "use_clockid" " (since Linux 4.1)" ++.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b ++This allows selecting which internal Linux clock to use ++when generating timestamps via the ++.I clockid ++field. ++This can make it easier to correlate perf sample times with ++timestamps generated by other tools. ++.TP ++.IR "context_switch" " (since Linux 4.3)" ++.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4 ++This enables the generation of ++.B PERF_RECORD_SWITCH ++records when a context switch occurs. ++It also enables the generation of ++.B PERF_RECORD_SWITCH_CPU_WIDE ++records when sampling in CPU-wide mode. ++This functionality is in addition to existing tracepoint and ++software events for measuring context switches. ++The advantage of this method is that it will give full ++information even with strict ++.I perf_event_paranoid ++settings. ++.TP + .IR "wakeup_events" ", " "wakeup_watermark" + This union sets how many samples + .RI ( wakeup_events ) + or bytes + .RI ( wakeup_watermark ) +-happen before an overflow signal happens. ++happen before an overflow notification happens. + Which one is used is selected by the + .I watermark +-bitflag. +- ++bit flag. ++.IP + .I wakeup_events +-only counts ++counts only + .B PERF_RECORD_SAMPLE + record types. +-To receive a signal for every incoming ++To receive overflow notification for all + .B PERF_RECORD +-type set ++types choose watermark and set + .I wakeup_watermark + to 1. ++.IP ++Prior to Linux 3.0, setting ++.\" commit f506b3dc0ec454a16d40cab9ee5d75435b39dc50 ++.I wakeup_events ++to 0 resulted in no overflow notifications; ++more recent kernels treat 0 the same as 1. + .TP +-.IR "bp_type" " (Since Linux 2.6.33)" ++.IR "bp_type" " (since Linux 2.6.33)" ++.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e + This chooses the breakpoint type. + It is one of: + .RS + .TP + .BR HW_BREAKPOINT_EMPTY +-no breakpoint ++No breakpoint. + .TP + .BR HW_BREAKPOINT_R +-count when we read the memory location ++Count when we read the memory location. + .TP + .BR HW_BREAKPOINT_W +-count when we write the memory location ++Count when we write the memory location. + .TP + .BR HW_BREAKPOINT_RW +-count when we read or write the memory location ++Count when we read or write the memory location. + .TP + .BR HW_BREAKPOINT_X +-count when we execute code at the memory location +-.LP ++Count when we execute code at the memory location. ++.PP + The values can be combined via a bitwise or, but the + combination of + .B HW_BREAKPOINT_R +@@ -929,21 +1243,23 @@ with + is not allowed. + .RE + .TP +-.IR "bp_addr" " (Since Linux 2.6.33)" +-.I bp_addr +-address of the breakpoint. +-For execution breakpoints this is the memory address of the instruction +-of interest; for read and write breakpoints it is the memory address ++.IR "bp_addr" " (since Linux 2.6.33)" ++.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e ++This is the address of the breakpoint. ++For execution breakpoints, this is the memory address of the instruction ++of interest; for read and write breakpoints, it is the memory address + of the memory location of interest. + .TP +-.IR "config1" " (Since Linux 2.6.39)" ++.IR "config1" " (since Linux 2.6.39)" ++.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6 + .I config1 + is used for setting events that need an extra register or otherwise + do not fit in the regular config field. + Raw OFFCORE_EVENTS on Nehalem/Westmere/SandyBridge use this field +-on 3.3 and later kernels. ++on Linux 3.3 and later kernels. + .TP +-.IR "bp_len" " (Since Linux 2.6.33)" ++.IR "bp_len" " (since Linux 2.6.33)" ++.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e + .I bp_len + is the length of the breakpoint being measured if + .I type +@@ -953,100 +1269,167 @@ Options are + .BR HW_BREAKPOINT_LEN_1 , + .BR HW_BREAKPOINT_LEN_2 , + .BR HW_BREAKPOINT_LEN_4 , ++and + .BR HW_BREAKPOINT_LEN_8 . + For an execution breakpoint, set this to + .IR sizeof(long) . + .TP +-.IR "config2" " (Since Linux 2.6.39)" +- ++.IR "config2" " (since Linux 2.6.39)" ++.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6 + .I config2 + is a further extension of the + .I config1 + field. + .TP +-.IR "branch_sample_type" " (Since Linux 3.4)" ++.IR "branch_sample_type" " (since Linux 3.4)" ++.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e + If + .B PERF_SAMPLE_BRANCH_STACK + is enabled, then this specifies what branches to include + in the branch record. ++.IP ++The first part of the value is the privilege level, which ++is a combination of one of the values listed below. + If the user does not set privilege level explicitly, the kernel + will use the event's privilege level. + Event and branch privilege levels do not have to match. +-The value is formed by ORing together zero or more of the following values, +-although +-.B PERF_SAMPLE_BRANCH_ANY +-covers all branch types. + .RS + .TP + .B PERF_SAMPLE_BRANCH_USER +-Branch target is in user space ++Branch target is in user space. + .TP + .B PERF_SAMPLE_BRANCH_KERNEL +-Branch target is in kernel space ++Branch target is in kernel space. + .TP + .B PERF_SAMPLE_BRANCH_HV +-Branch target is in hypervisor ++Branch target is in hypervisor. ++.TP ++.B PERF_SAMPLE_BRANCH_PLM_ALL ++A convenience value that is the three preceding values ORed together. ++.PP ++In addition to the privilege value, at least one or more of the ++following bits must be set. + .TP + .B PERF_SAMPLE_BRANCH_ANY + Any branch type. + .TP + .B PERF_SAMPLE_BRANCH_ANY_CALL +-Any call branch ++Any call branch (includes direct calls, indirect calls, and far jumps). + .TP +-.B PERF_SAMPLE_BRANCH_ANY_RETURN +-Any return branch ++.B PERF_SAMPLE_BRANCH_IND_CALL ++Indirect calls. + .TP +-.BR PERF_SAMPLE_BRANCH_IND_CALL +-Indirect calls ++.BR PERF_SAMPLE_BRANCH_CALL " (since Linux 4.4)" ++.\" commit c229bf9dc179d2023e185c0f705bdf68484c1e73 ++Direct calls. + .TP +-.BR PERF_SAMPLE_BRANCH_PLM_ALL +-User, kernel, and hv ++.B PERF_SAMPLE_BRANCH_ANY_RETURN ++Any return branch. ++.TP ++.BR PERF_SAMPLE_BRANCH_IND_JUMP " (since Linux 4.2)" ++.\" commit c9fdfa14c3792c0160849c484e83aa57afd80ccc ++Indirect jumps. ++.TP ++.BR PERF_SAMPLE_BRANCH_COND " (since Linux 3.16)" ++.\" commit bac52139f0b7ab31330e98fd87fc5a2664951050 ++Conditional branches. ++.TP ++.BR PERF_SAMPLE_BRANCH_ABORT_TX " (since Linux 3.11)" ++.\" commit 135c5612c460f89657c4698fe2ea753f6f667963 ++Transactional memory aborts. ++.TP ++.BR PERF_SAMPLE_BRANCH_IN_TX " (since Linux 3.11)" ++.\" commit 135c5612c460f89657c4698fe2ea753f6f667963 ++Branch in transactional memory transaction. ++.TP ++.BR PERF_SAMPLE_BRANCH_NO_TX " (since Linux 3.11)" ++.\" commit 135c5612c460f89657c4698fe2ea753f6f667963 ++Branch not in transactional memory transaction. ++.BR PERF_SAMPLE_BRANCH_CALL_STACK " (since Linux 4.1)" ++.\" commit 2c44b1936bb3b135a3fac8b3493394d42e51cf70 ++Branch is part of a hardware-generated call stack. ++This requires hardware support, currently only found ++on Intel x86 Haswell or newer. + .RE + .TP +-.IR "sample_regs_user" " (Since Linux 3.7)" +-This bitmask defines the set of user CPU registers to dump on samples. +-The layout of the register mask is architecture specific and +-described in the kernel header ++.IR "sample_regs_user" " (since Linux 3.7)" ++.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56 ++This bit mask defines the set of user CPU registers to dump on samples. ++The layout of the register mask is architecture-specific and ++is described in the kernel header file + .IR arch/ARCH/include/uapi/asm/perf_regs.h . + .TP +-.IR "sample_stack_user" " (Since Linux 3.7)" ++.IR "sample_stack_user" " (since Linux 3.7)" ++.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 + This defines the size of the user stack to dump if + .B PERF_SAMPLE_STACK_USER + is specified. ++.TP ++.IR "clockid" " (since Linux 4.1)" ++.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b ++If ++.I use_clockid ++is set, then this field selects which internal Linux timer to ++use for timestamps. ++The available timers are defined in ++.IR linux/time.h , ++with ++.BR CLOCK_MONOTONIC , ++.BR CLOCK_MONOTONIC_RAW , ++.BR CLOCK_REALTIME , ++.BR CLOCK_BOOTTIME , ++and ++.B CLOCK_TAI ++currently supported. ++.TP ++.IR "aux_watermark" " (since Linux 4.1)" ++.\" commit 1a5941312414c71dece6717da9a0fa1303127afa ++This specifies how much data is required to trigger a ++.B PERF_RECORD_AUX ++sample. ++.TP ++.IR "sample_max_stack" " (since Linux 4.8)" ++.\" commit 97c79a38cd454602645f0470ffb444b3b75ce574 ++When ++.I sample_type ++includes ++.BR PERF_SAMPLE_CALLCHAIN , ++this field specifies how many stack frames to report when ++generating the callchain. + .SS Reading results + Once a + .BR perf_event_open () +-file descriptor has been opened, the values ++file descriptor has been opened, the values + of the events can be read from the file descriptor. + The values that are there are specified by the + .I read_format + field in the + .I attr + structure at open time. +- ++.PP + If you attempt to read into a buffer that is not big enough to hold the +-data ++data, the error + .B ENOSPC +-is returned +- ++results. ++.PP + Here is the layout of the data returned by a read: + .IP * 2 + If + .B PERF_FORMAT_GROUP + was specified to allow reading all events in a group at once: +- ++.IP + .in +4n +-.nf ++.EX + struct read_format { + u64 nr; /* The number of events */ + u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ + u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ +- struct ++ struct { + u64 value; /* The value of the event */ + u64 id; /* if PERF_FORMAT_ID */ + } values[nr]; + }; +-.fi ++.EE + .in + .IP * + If +@@ -1054,33 +1437,33 @@ If + was + .I not + specified: +- ++.IP + .in +4n +-.nf ++.EX + struct read_format { + u64 value; /* The value of the event */ + u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ + u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ + u64 id; /* if PERF_FORMAT_ID */ + }; +-.fi ++.EE + .in + .PP + The values read are as follows: + .TP + .I nr + The number of events in this file descriptor. +-Only available if ++Available only if + .B PERF_FORMAT_GROUP + was specified. + .TP + .IR time_enabled ", " time_running + Total time the event was enabled and running. +-Normally these are the same. +-If more events are started +-than available counter slots on the PMU, then multiplexing ++Normally these values are the same. ++If more events are started, ++then available counter slots on the PMU, then multiplexing + happens and events run only part of the time. +-In that case the ++In that case, the + .I time_enabled + and + .I time running +@@ -1090,7 +1473,7 @@ values can be used to scale an estimated value for the count. + An unsigned 64-bit value containing the counter result. + .TP + .I id +-A globally unique value for this particular event, only there if ++A globally unique value for this particular event; only present if + .B PERF_FORMAT_ID + was specified in + .IR read_format . +@@ -1104,45 +1487,57 @@ mmap tracking) + are logged into a ring-buffer. + This ring-buffer is created and accessed through + .BR mmap (2). +- ++.PP + The mmap size should be 1+2^n pages, where the first page is a + metadata page + .RI ( "struct perf_event_mmap_page" ) + that contains various + bits of information such as where the ring-buffer head is. +- +-Before kernel 2.6.39, there is a bug that means you must allocate a mmap ++.PP ++Before kernel 2.6.39, there is a bug that means you must allocate an mmap + ring buffer when sampling even if you do not plan to access it. +- ++.PP + The structure of the first metadata mmap page is as follows: +- ++.PP + .in +4n +-.nf ++.EX + struct perf_event_mmap_page { +- __u32 version; /* version number of this structure */ +- __u32 compat_version; /* lowest version this is compat with */ +- __u32 lock; /* seqlock for synchronization */ +- __u32 index; /* hardware counter identifier */ +- __s64 offset; /* add to hardware counter value */ +- __u64 time_enabled; /* time event active */ +- __u64 time_running; /* time event on CPU */ ++ __u32 version; /* version number of this structure */ ++ __u32 compat_version; /* lowest version this is compat with */ ++ __u32 lock; /* seqlock for synchronization */ ++ __u32 index; /* hardware counter identifier */ ++ __s64 offset; /* add to hardware counter value */ ++ __u64 time_enabled; /* time event active */ ++ __u64 time_running; /* time event on CPU */ + union { + __u64 capabilities; +- __u64 cap_usr_time : 1, +- cap_usr_rdpmc : 1, ++ struct { ++ __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1, ++ cap_bit0_is_deprecated : 1, ++ cap_user_rdpmc : 1, ++ cap_user_time : 1, ++ cap_user_time_zero : 1, ++ }; + }; +- __u16 pmc_width; +- __u16 time_shift; +- __u32 time_mult; +- __u64 time_offset; +- __u64 __reserved[120]; /* Pad to 1k */ +- __u64 data_head; /* head in the data section */ +- __u64 data_tail; /* user-space written tail */ ++ __u16 pmc_width; ++ __u16 time_shift; ++ __u32 time_mult; ++ __u64 time_offset; ++ __u64 __reserved[120]; /* Pad to 1 k */ ++ __u64 data_head; /* head in the data section */ ++ __u64 data_tail; /* user-space written tail */ ++ __u64 data_offset; /* where the buffer starts */ ++ __u64 data_size; /* data buffer size */ ++ __u64 aux_head; ++ __u64 aux_tail; ++ __u64 aux_offset; ++ __u64 aux_size; ++ + } +-.fi ++.EE + .in +- +-The following looks at the fields in the ++.PP ++The following list describes the fields in the + .I perf_event_mmap_page + structure in more detail: + .TP +@@ -1159,8 +1554,9 @@ A seqlock for synchronization. + A unique hardware counter identifier. + .TP + .I offset +-.\" FIXME clarify +-Add this to hardware counter value?? ++When using rdpmc for reads this offset value ++must be added to the one returned by rdpmc to get ++the current total event count. + .TP + .I time_enabled + Time the event was active. +@@ -1168,20 +1564,56 @@ Time the event was active. + .I time_running + Time the event was running. + .TP ++.IR cap_usr_time " / " cap_usr_rdpmc " / " cap_bit0 " (since Linux 3.4)" ++.\" commit c7206205d00ab375839bd6c7ddb247d600693c09 ++There was a bug in the definition of + .I cap_usr_time +-User time capability +-.TP ++and ++.I cap_usr_rdpmc ++from Linux 3.4 until Linux 3.11. ++Both bits were defined to point to the same location, so it was ++impossible to know if ++.I cap_usr_time ++or ++.I cap_usr_rdpmc ++were actually set. ++.IP ++Starting with Linux 3.12, these are renamed to ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 ++.I cap_bit0 ++and you should use the ++.I cap_user_time ++and ++.I cap_user_rdpmc ++fields instead. ++.TP ++.IR cap_bit0_is_deprecated " (since Linux 3.12)" ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 ++If set, this bit indicates that the kernel supports ++the properly separated ++.I cap_user_time ++and ++.I cap_user_rdpmc ++bits. ++.IP ++If not-set, it indicates an older kernel where ++.I cap_usr_time ++and + .I cap_usr_rdpmc ++map to the same bit and thus both features should ++be used with caution. ++.TP ++.IR cap_user_rdpmc " (since Linux 3.12)" ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 + If the hardware supports user-space read of performance counters + without syscall (this is the "rdpmc" instruction on x86), then + the following code can be used to do a read: +- ++.IP + .in +4n +-.nf ++.EX + u32 seq, time_mult, time_shift, idx, width; + u64 count, enabled, running; + u64 cyc, time_offset; +-s64 pmc = 0; + + do { + seq = pc\->lock; +@@ -1201,45 +1633,59 @@ do { + + if (pc\->cap_usr_rdpmc && idx) { + width = pc\->pmc_width; +- pmc = rdpmc(idx \- 1); ++ count += rdpmc(idx \- 1); + } + + barrier(); + } while (pc\->lock != seq); +-.fi ++.EE + .in + .TP ++.IR cap_user_time " (since Linux 3.12)" ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 ++This bit indicates the hardware has a constant, nonstop ++timestamp counter (TSC on x86). ++.TP ++.IR cap_user_time_zero " (since Linux 3.12)" ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 ++Indicates the presence of ++.I time_zero ++which allows mapping timestamp values to ++the hardware clock. ++.TP + .I pmc_width + If + .IR cap_usr_rdpmc , + this field provides the bit-width of the value + read using the rdpmc or equivalent instruction. + This can be used to sign extend the result like: +- ++.IP + .in +4n +-.nf ++.EX + pmc <<= 64 \- pmc_width; + pmc >>= 64 \- pmc_width; // signed shift right + count += pmc; +-.fi ++.EE + .in + .TP + .IR time_shift ", " time_mult ", " time_offset +- ++.IP + If + .IR cap_usr_time , + these fields can be used to compute the time +-delta since time_enabled (in nanoseconds) using rdtsc or similar. ++delta since ++.I time_enabled ++(in nanoseconds) using rdtsc or similar. ++.IP + .nf +- + u64 quot, rem; + u64 delta; + quot = (cyc >> time_shift); +- rem = cyc & ((1 << time_shift) \- 1); ++ rem = cyc & (((u64)1 << time_shift) \- 1); + delta = time_offset + quot * time_mult + + ((rem * time_mult) >> time_shift); + .fi +- ++.IP + Where + .IR time_offset , + .IR time_mult , +@@ -1250,8 +1696,8 @@ are read in the + seqcount loop described above. + This delta can then be added to + enabled and possible running (if idx), improving the scaling: ++.IP + .nf +- + enabled += delta; + if (idx) + running += delta; +@@ -1260,25 +1706,102 @@ enabled and possible running (if idx), improving the scaling: + count = quot * enabled + (rem * enabled) / running; + .fi + .TP ++.IR time_zero " (since Linux 3.12)" ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 ++.IP ++If ++.I cap_usr_time_zero ++is set, then the hardware clock (the TSC timestamp counter on x86) ++can be calculated from the ++.IR time_zero ", " time_mult ", and " time_shift " values:" ++.IP ++.nf ++ time = timestamp - time_zero; ++ quot = time / time_mult; ++ rem = time % time_mult; ++ cyc = (quot << time_shift) + (rem << time_shift) / time_mult; ++.fi ++.IP ++And vice versa: ++.IP ++.nf ++ quot = cyc >> time_shift; ++ rem = cyc & (((u64)1 << time_shift) - 1); ++ timestamp = time_zero + quot * time_mult + ++ ((rem * time_mult) >> time_shift); ++.fi ++.TP + .I data_head + This points to the head of the data section. + The value continuously increases, it does not wrap. + The value needs to be manually wrapped by the size of the mmap buffer + before accessing the samples. +- +-On SMP-capable platforms, after reading the data_head value, ++.IP ++On SMP-capable platforms, after reading the ++.I data_head ++value, + user space should issue an rmb(). + .TP +-.I data_tail; ++.I data_tail + When the mapping is + .BR PROT_WRITE , + the + .I data_tail + value should be written by user space to reflect the last read data. +-In this case the kernel will not over-write unread data. ++In this case, the kernel will not overwrite unread data. ++.TP ++.IR data_offset " (since Linux 4.1)" ++.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f ++Contains the offset of the location in the mmap buffer ++where perf sample data begins. ++.TP ++.IR data_size " (since Linux 4.1)" ++.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f ++Contains the size of the perf sample region within ++the mmap buffer. ++.TP ++.IR aux_head ", " aux_tail ", " aux_offset ", " aux_size " (since Linux 4.1) ++.\" commit 45bfb2e50471abbbfd83d40d28c986078b0d24ff ++The AUX region allows mmaping a separate sample buffer for ++high-bandwidth data streams (separate from the main perf sample buffer). ++An example of a high-bandwidth stream is instruction tracing support, ++as is found in newer Intel processors. ++.IP ++To set up an AUX area, first ++.I aux_offset ++needs to be set with an offset greater than ++.IR data_offset + data_size ++and ++.I aux_size ++needs to be set to the desired buffer size. ++The desired offset and size must be page aligned, and the size ++must be a power of two. ++These values are then passed to mmap in order to map the AUX buffer. ++Pages in the AUX buffer are included as part of the ++.BR RLIMIT_MEMLOCK ++resource limit (see ++.BR setrlimit (2)), ++and also as part of the ++.I perf_event_mlock_kb ++allowance. ++.IP ++By default, the AUX buffer will be truncated if it will not fit ++in the available space in the ring buffer. ++If the AUX buffer is mapped as a read only buffer, then it will ++operate in ring buffer mode where old data will be overwritten ++by new. ++In overwrite mode, it might not be possible to infer where the ++new data began, and it is the consumer's job to disable ++measurement while reading to avoid possible data races. ++.IP ++The ++.IR aux_head " and " aux_tail ++ring buffer pointers have the same behavior and ordering ++rules as the previous described ++.IR data_head " and " data_tail . + .PP + The following 2^n ring-buffer pages have the layout described below. +- ++.PP + If + .I perf_event_attr.sample_id_all + is set, then all event types will +@@ -1288,46 +1811,136 @@ an event took place (TID, TIME, ID, CPU, STREAM_ID) described in + below, it will be stashed just after the + .I perf_event_header + and the fields already present for the existing +-fields, i.e., at the end of the payload. +-That way a newer perf.data +-file will be supported by older perf tools, with these new optional ++fields, that is, at the end of the payload. ++This allows a newer perf.data ++file to be supported by older perf tools, with the new optional + fields being ignored. +- ++.PP + The mmap values start with a header: +- ++.PP + .in +4n +-.nf ++.EX + struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; + }; +-.fi ++.EE + .in +- ++.PP + Below, we describe the + .I perf_event_header + fields in more detail. ++For ease of reading, ++the fields with shorter descriptions are presented first. + .TP +-.I type ++.I size ++This indicates the size of the record. ++.TP ++.I misc + The +-.I type +-value is one of the below. +-The values in the corresponding record (that follows the header) +-depend on the +-.I type +-selected as shown. +-.RS +-.TP 4 ++.I misc ++field contains additional information about the sample. ++.IP ++The CPU mode can be determined from this value by masking with ++.B PERF_RECORD_MISC_CPUMODE_MASK ++and looking for one of the following (note these are not ++bit masks, only one can be set at a time): ++.RS ++.TP ++.B PERF_RECORD_MISC_CPUMODE_UNKNOWN ++Unknown CPU mode. ++.TP ++.B PERF_RECORD_MISC_KERNEL ++Sample happened in the kernel. ++.TP ++.B PERF_RECORD_MISC_USER ++Sample happened in user code. ++.TP ++.B PERF_RECORD_MISC_HYPERVISOR ++Sample happened in the hypervisor. ++.TP ++.BR PERF_RECORD_MISC_GUEST_KERNEL " (since Linux 2.6.35)" ++.\" commit 39447b386c846bbf1c56f6403c5282837486200f ++Sample happened in the guest kernel. ++.TP ++.B PERF_RECORD_MISC_GUEST_USER " (since Linux 2.6.35)" ++.\" commit 39447b386c846bbf1c56f6403c5282837486200f ++Sample happened in guest user code. ++.RE ++.PP ++.RS ++Since the following three statuses are generated by ++different record types, they alias to the same bit: ++.TP ++.BR PERF_RECORD_MISC_MMAP_DATA " (since Linux 3.10)" ++.\" commit 2fe85427e3bf65d791700d065132772fc26e4d75 ++This is set when the mapping is not executable; ++otherwise the mapping is executable. ++.TP ++.BR PERF_RECORD_MISC_COMM_EXEC " (since Linux 3.16)" ++.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871 ++This is set for a ++.B PERF_RECORD_COMM ++record on kernels more recent than Linux 3.16 ++if a process name change was caused by an ++.BR exec (2) ++system call. ++.TP ++.BR PERF_RECORD_MISC_SWITCH_OUT " (since Linux 4.3)" ++.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4 ++When a ++.BR PERF_RECORD_SWITCH ++or ++.BR PERF_RECORD_SWITCH_CPU_WIDE ++record is generated, this bit indicates that the ++context switch is away from the current process ++(instead of into the current process). ++.RE ++.PP ++.RS ++In addition, the following bits can be set: ++.TP ++.B PERF_RECORD_MISC_EXACT_IP ++This indicates that the content of ++.B PERF_SAMPLE_IP ++points ++to the actual instruction that triggered the event. ++See also ++.IR perf_event_attr.precise_ip . ++.TP ++.BR PERF_RECORD_MISC_EXT_RESERVED " (since Linux 2.6.35)" ++.\" commit 1676b8a077c352085d52578fb4f29350b58b6e74 ++This indicates there is extended data available (currently not used). ++.TP ++.B PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT ++.\" commit 930e6fcd2bcce9bcd9d4aa7e755678d33f3fe6f4 ++This bit is not set by the kernel. ++It is reserved for the user-space perf utility to indicate that ++.I /proc/i[pid]/maps ++parsing was taking too long and was stopped, and thus the mmap ++records may be truncated. ++.RE ++.TP ++.I type ++The ++.I type ++value is one of the below. ++The values in the corresponding record (that follows the header) ++depend on the ++.I type ++selected as shown. ++.RS ++.TP 4 + .B PERF_RECORD_MMAP + The MMAP events record the + .B PROT_EXEC + mappings so that we can correlate + user-space IPs to code. + They have the following structure: +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; + u32 pid, tid; +@@ -1336,20 +1949,38 @@ struct { + u64 pgoff; + char filename[]; + }; +-.fi ++.EE + .in ++.RS ++.TP ++.I pid ++is the process ID. ++.TP ++.I tid ++is the thread ID. ++.TP ++.I addr ++is the address of the allocated memory. ++.I len ++is the length of the allocated memory. ++.I pgoff ++is the page offset of the allocated memory. ++.I filename ++is a string describing the backing of the allocated memory. ++.RE + .TP + .B PERF_RECORD_LOST + This record indicates when events are lost. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u64 id; +- u64 lost; ++ u64 id; ++ u64 lost; ++ struct sample_id sample_id; + }; +-.fi ++.EE + .in + .RS + .TP +@@ -1362,106 +1993,140 @@ is the number of events that were lost. + .TP + .B PERF_RECORD_COMM + This record indicates a change in the process name. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u32 pid, tid; +- char comm[]; ++ u32 pid; ++ u32 tid; ++ char comm[]; ++ struct sample_id sample_id; + }; +-.fi ++.EE + .in ++.RS ++.TP ++.I pid ++is the process ID. ++.TP ++.I tid ++is the thread ID. ++.TP ++.I comm ++is a string containing the new name of the process. ++.RE + .TP + .B PERF_RECORD_EXIT + This record indicates a process exit event. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u32 pid, ppid; +- u32 tid, ptid; +- u64 time; ++ u32 pid, ppid; ++ u32 tid, ptid; ++ u64 time; ++ struct sample_id sample_id; + }; +-.fi ++.EE + .in + .TP + .BR PERF_RECORD_THROTTLE ", " PERF_RECORD_UNTHROTTLE + This record indicates a throttle/unthrottle event. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u64 time; +- u64 id; +- u64 stream_id; ++ u64 time; ++ u64 id; ++ u64 stream_id; ++ struct sample_id sample_id; + }; +-.fi ++.EE + .in + .TP + .B PERF_RECORD_FORK + This record indicates a fork event. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u32 pid, ppid; +- u32 tid, ptid; +- u64 time; ++ u32 pid, ppid; ++ u32 tid, ptid; ++ u64 time; ++ struct sample_id sample_id; + }; +-.fi ++.EE + .in + .TP + .B PERF_RECORD_READ + This record indicates a read event. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u32 pid, tid; ++ u32 pid, tid; + struct read_format values; ++ struct sample_id sample_id; + }; +-.fi ++.EE + .in + .TP + .B PERF_RECORD_SAMPLE + This record indicates a sample. +- ++.IP + .in +4n +-.nf ++.EX + struct { + struct perf_event_header header; +- u64 ip; /* if PERF_SAMPLE_IP */ +- u32 pid, tid; /* if PERF_SAMPLE_TID */ +- u64 time; /* if PERF_SAMPLE_TIME */ +- u64 addr; /* if PERF_SAMPLE_ADDR */ +- u64 id; /* if PERF_SAMPLE_ID */ +- u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */ +- u32 cpu, res; /* if PERF_SAMPLE_CPU */ +- u64 period; /* if PERF_SAMPLE_PERIOD */ +- struct read_format v; /* if PERF_SAMPLE_READ */ +- u64 nr; /* if PERF_SAMPLE_CALLCHAIN */ +- u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */ +- u32 size; /* if PERF_SAMPLE_RAW */ +- char data[size]; /* if PERF_SAMPLE_RAW */ +- u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */ ++ u64 sample_id; /* if PERF_SAMPLE_IDENTIFIER */ ++ u64 ip; /* if PERF_SAMPLE_IP */ ++ u32 pid, tid; /* if PERF_SAMPLE_TID */ ++ u64 time; /* if PERF_SAMPLE_TIME */ ++ u64 addr; /* if PERF_SAMPLE_ADDR */ ++ u64 id; /* if PERF_SAMPLE_ID */ ++ u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */ ++ u32 cpu, res; /* if PERF_SAMPLE_CPU */ ++ u64 period; /* if PERF_SAMPLE_PERIOD */ ++ struct read_format v; ++ /* if PERF_SAMPLE_READ */ ++ u64 nr; /* if PERF_SAMPLE_CALLCHAIN */ ++ u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */ ++ u32 size; /* if PERF_SAMPLE_RAW */ ++ char data[size]; /* if PERF_SAMPLE_RAW */ ++ u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */ + struct perf_branch_entry lbr[bnr]; +- /* if PERF_SAMPLE_BRANCH_STACK */ +- u64 abi; /* if PERF_SAMPLE_REGS_USER */ +- u64 regs[weight(mask)]; +- /* if PERF_SAMPLE_REGS_USER */ +- u64 size; /* if PERF_SAMPLE_STACK_USER */ +- char data[size]; /* if PERF_SAMPLE_STACK_USER */ +- u64 dyn_size; /* if PERF_SAMPLE_STACK_USER */ +- u64 weight; /* if PERF_SAMPLE_WEIGHT */ +- u64 data_src; /* if PERF_SAMPLE_DATA_SRC */ ++ /* if PERF_SAMPLE_BRANCH_STACK */ ++ u64 abi; /* if PERF_SAMPLE_REGS_USER */ ++ u64 regs[weight(mask)]; ++ /* if PERF_SAMPLE_REGS_USER */ ++ u64 size; /* if PERF_SAMPLE_STACK_USER */ ++ char data[size]; /* if PERF_SAMPLE_STACK_USER */ ++ u64 dyn_size; /* if PERF_SAMPLE_STACK_USER && ++ size != 0 */ ++ u64 weight; /* if PERF_SAMPLE_WEIGHT */ ++ u64 data_src; /* if PERF_SAMPLE_DATA_SRC */ ++ u64 transaction; /* if PERF_SAMPLE_TRANSACTION */ ++ u64 abi; /* if PERF_SAMPLE_REGS_INTR */ ++ u64 regs[weight(mask)]; ++ /* if PERF_SAMPLE_REGS_INTR */ + }; +-.fi +-.RS ++.EE ++.RS 4 ++.TP 4 ++.I sample_id ++If ++.B PERF_SAMPLE_IDENTIFIER ++is enabled, a 64-bit unique ID is included. ++This is a duplication of the ++.B PERF_SAMPLE_ID ++.I id ++value, but included at the beginning of the sample ++so parsers can easily obtain the value. + .TP + .I ip + If +@@ -1546,7 +2211,7 @@ If + is enabled, then a 32-bit value indicating size + is included followed by an array of 8-bit values of length size. + The values are padded with 0 to have 64-bit alignment. +- ++.IP + This RAW record data is opaque with respect to the ABI. + The ABI doesn't make any promises with respect to the stability + of its content, it may vary depending +@@ -1563,39 +2228,57 @@ structures which each include the fields: + .RS + .TP + .I from +-indicating the source instruction (may not be a branch) ++This indicates the source instruction (may not be a branch). + .TP + .I to +-the branch target ++The branch target. + .TP + .I mispred +-the branch target was mispredicted ++The branch target was mispredicted. + .TP + .I predicted +-the branch target was predicted. +-.RE ++The branch target was predicted. ++.TP ++.IR in_tx " (since Linux 3.11)" ++.\" commit 135c5612c460f89657c4698fe2ea753f6f667963 ++The branch was in a transactional memory transaction. ++.TP ++.IR abort " (since Linux 3.11)" ++.\" commit 135c5612c460f89657c4698fe2ea753f6f667963 ++The branch was in an aborted transactional memory transaction. ++.TP ++.IR cycles " (since Linux 4.3)" ++.\" commit 71ef3c6b9d4665ee7afbbe4c208a98917dcfc32f ++This reports the number of cycles elapsed since the ++previous branch stack update. ++.PP + The entries are from most to least recent, so the first entry + has the most recent branch. +- ++.PP + Support for +-.I mispred ++.IR mispred , ++.IR predicted , + and +-.I predicted +-is optional; if not supported, both ++.IR cycles ++is optional; if not supported, those + values will be 0. +- ++.PP ++The type of branches recorded is specified by the ++.I branch_sample_type ++field. ++.RE + .TP + .IR abi ", " regs[weight(mask)] + If + .B PERF_SAMPLE_REGS_USER + is enabled, then the user CPU registers are recorded. +- ++.IP + The + .I abi + field is one of + .BR PERF_SAMPLE_REGS_ABI_NONE ", " PERF_SAMPLE_REGS_ABI_32 " or " + .BR PERF_SAMPLE_REGS_ABI_64 . +- ++.IP + The + .I regs + field is an array of the CPU registers that were specified by +@@ -1604,26 +2287,33 @@ the + attr field. + The number of values is the number of bits set in the + .I sample_regs_user +-bitmask. ++bit mask. + .TP + .IR size ", " data[size] ", " dyn_size + If + .B PERF_SAMPLE_STACK_USER +-is enabled, then record the user stack to enable backtracing. ++is enabled, then the user stack is recorded. ++This can be used to generate stack backtraces. + .I size + is the size requested by the user in +-.I stack_user_size ++.I sample_stack_user + or else the maximum record size. + .I data +-is the stack data. ++is the stack data (a raw dump of the memory pointed to by the ++stack pointer at the time of sampling). + .I dyn_size + is the amount of data actually dumped (can be less than + .IR size ). ++Note that ++.I dyn_size ++is omitted if ++.I size ++is 0. + .TP + .I weight + If + .B PERF_SAMPLE_WEIGHT +-is enabled, then a 64 bit value provided by the hardware ++is enabled, then a 64-bit value provided by the hardware + is recorded that indicates how costly the event was. + This allows expensive events to stand out more clearly + in profiles. +@@ -1631,211 +2321,508 @@ in profiles. + .I data_src + If + .B PERF_SAMPLE_DATA_SRC +-is enabled, then a 64 bit value is recorded that is made up of ++is enabled, then a 64-bit value is recorded that is made up of + the following fields: + .RS +-.TP ++.TP 4 + .I mem_op +-type of opcode, a bitwise combination of ++Type of opcode, a bitwise combination of: ++.IP ++.PD 0 ++.RS ++.TP 24 + .B PERF_MEM_OP_NA +-(not available), ++Not available ++.TP + .B PERF_MEM_OP_LOAD +-(load instruction), ++Load instruction ++.TP + .B PERF_MEM_OP_STORE +-(store instruction), ++Store instruction ++.TP + .B PERF_MEM_OP_PFETCH +-(prefetch), and ++Prefetch ++.TP + .B PERF_MEM_OP_EXEC +-(executable code). ++Executable code ++.RE ++.PD + .TP + .I mem_lvl +-memory hierarchy level hit or miss, a bitwise combination of ++Memory hierarchy level hit or miss, a bitwise combination of ++the following, shifted left by ++.BR PERF_MEM_LVL_SHIFT : ++.IP ++.PD 0 ++.RS ++.TP 24 + .B PERF_MEM_LVL_NA +-(not available), ++Not available ++.TP + .B PERF_MEM_LVL_HIT +-(hit), ++Hit ++.TP + .B PERF_MEM_LVL_MISS +-(miss), ++Miss ++.TP + .B PERF_MEM_LVL_L1 +-(level 1 cache), ++Level 1 cache ++.TP + .B PERF_MEM_LVL_LFB +-(line fill buffer), ++Line fill buffer ++.TP + .B PERF_MEM_LVL_L2 +-(level 2 cache), ++Level 2 cache ++.TP + .B PERF_MEM_LVL_L3 +-(level 3 cache), ++Level 3 cache ++.TP + .B PERF_MEM_LVL_LOC_RAM +-(local DRAM), ++Local DRAM ++.TP + .B PERF_MEM_LVL_REM_RAM1 +-(remote DRAM 1 hop), ++Remote DRAM 1 hop ++.TP + .B PERF_MEM_LVL_REM_RAM2 +-(remote DRAM 2 hops), ++Remote DRAM 2 hops ++.TP + .B PERF_MEM_LVL_REM_CCE1 +-(remote cache 1 hop), ++Remote cache 1 hop ++.TP + .B PERF_MEM_LVL_REM_CCE2 +-(remote cache 2 hops), ++Remote cache 2 hops ++.TP + .B PERF_MEM_LVL_IO +-(I/O memory), and ++I/O memory ++.TP + .B PERF_MEM_LVL_UNC +-(uncached memory). ++Uncached memory ++.RE ++.PD + .TP + .I mem_snoop +-snoop mode, a bitwise combination of ++Snoop mode, a bitwise combination of the following, shifted left by ++.BR PERF_MEM_SNOOP_SHIFT : ++.IP ++.PD 0 ++.RS ++.TP 24 + .B PERF_MEM_SNOOP_NA +-(not available), ++Not available ++.TP + .B PERF_MEM_SNOOP_NONE +-(no snoop), ++No snoop ++.TP + .B PERF_MEM_SNOOP_HIT +-(snoop hit), ++Snoop hit ++.TP + .B PERF_MEM_SNOOP_MISS +-(snoop miss), and ++Snoop miss ++.TP + .B PERF_MEM_SNOOP_HITM +-(snoop hit modified). ++Snoop hit modified ++.RE ++.PD + .TP + .I mem_lock +-lock instruction, a bitwise combination of ++Lock instruction, a bitwise combination of the following, shifted left by ++.BR PERF_MEM_LOCK_SHIFT : ++.IP ++.PD 0 ++.RS ++.TP 24 + .B PERF_MEM_LOCK_NA +-(not available) and ++Not available ++.TP + .B PERF_MEM_LOCK_LOCKED +-(locked transaction). ++Locked transaction ++.RE ++.PD + .TP + .I mem_dtlb +-tlb access hit or miss, a bitwise combination of ++TLB access hit or miss, a bitwise combination of the following, shifted ++left by ++.BR PERF_MEM_TLB_SHIFT : ++.IP ++.PD 0 ++.RS ++.TP 24 + .B PERF_MEM_TLB_NA +-(not available), ++Not available ++.TP + .B PERF_MEM_TLB_HIT +-(hit), ++Hit ++.TP + .B PERF_MEM_TLB_MISS +-(miss), ++Miss ++.TP + .B PERF_MEM_TLB_L1 +-(level 1 TLB), ++Level 1 TLB ++.TP + .B PERF_MEM_TLB_L2 +-(level 2 TLB), ++Level 2 TLB ++.TP + .B PERF_MEM_TLB_WK +-(hardware walker), and ++Hardware walker ++.TP + .B PERF_MEM_TLB_OS +-(OS fault handler). ++OS fault handler + .RE ++.PD + .RE ++.TP ++.I transaction ++If the ++.B PERF_SAMPLE_TRANSACTION ++flag is set, then a 64-bit field is recorded describing ++the sources of any transactional memory aborts. ++.IP ++The field is a bitwise combination of the following values: ++.RS ++.TP ++.B PERF_TXN_ELISION ++Abort from an elision type transaction (Intel-CPU-specific). ++.TP ++.B PERF_TXN_TRANSACTION ++Abort from a generic transaction. ++.TP ++.B PERF_TXN_SYNC ++Synchronous abort (related to the reported instruction). ++.TP ++.B PERF_TXN_ASYNC ++Asynchronous abort (not related to the reported instruction). ++.TP ++.B PERF_TXN_RETRY ++Retryable abort (retrying the transaction may have succeeded). ++.TP ++.B PERF_TXN_CONFLICT ++Abort due to memory conflicts with other threads. ++.TP ++.B PERF_TXN_CAPACITY_WRITE ++Abort due to write capacity overflow. ++.TP ++.B PERF_TXN_CAPACITY_READ ++Abort due to read capacity overflow. + .RE ++.IP ++In addition, a user-specified abort code can be obtained from ++the high 32 bits of the field by shifting right by ++.B PERF_TXN_ABORT_SHIFT ++and masking with the value ++.BR PERF_TXN_ABORT_MASK . + .TP +-.I misc ++.IR abi ", " regs[weight(mask)] ++If ++.B PERF_SAMPLE_REGS_INTR ++is enabled, then the user CPU registers are recorded. ++.IP + The +-.I misc +-field contains additional information about the sample. +- +-The CPU mode can be determined from this value by masking with +-.B PERF_RECORD_MISC_CPUMODE_MASK +-and looking for one of the following (note these are not +-bit masks, only one can be set at a time): ++.I abi ++field is one of ++.BR PERF_SAMPLE_REGS_ABI_NONE , ++.BR PERF_SAMPLE_REGS_ABI_32 , ++or ++.BR PERF_SAMPLE_REGS_ABI_64 . ++.IP ++The ++.I regs ++field is an array of the CPU registers that were specified by ++the ++.I sample_regs_intr ++attr field. ++The number of values is the number of bits set in the ++.I sample_regs_intr ++bit mask. ++.RE ++.TP ++.B PERF_RECORD_MMAP2 ++This record includes extended information on ++.BR mmap (2) ++calls returning executable mappings. ++The format is similar to that of the ++.B PERF_RECORD_MMAP ++record, but includes extra values that allow uniquely identifying ++shared mappings. ++.IP ++.in +4n ++.EX ++struct { ++ struct perf_event_header header; ++ u32 pid; ++ u32 tid; ++ u64 addr; ++ u64 len; ++ u64 pgoff; ++ u32 maj; ++ u32 min; ++ u64 ino; ++ u64 ino_generation; ++ u32 prot; ++ u32 flags; ++ char filename[]; ++ struct sample_id sample_id; ++}; ++.EE + .RS + .TP +-.B PERF_RECORD_MISC_CPUMODE_UNKNOWN +-Unknown CPU mode. ++.I pid ++is the process ID. + .TP +-.B PERF_RECORD_MISC_KERNEL +-Sample happened in the kernel. ++.I tid ++is the thread ID. + .TP +-.B PERF_RECORD_MISC_USER +-Sample happened in user code. ++.I addr ++is the address of the allocated memory. + .TP +-.B PERF_RECORD_MISC_HYPERVISOR +-Sample happened in the hypervisor. ++.I len ++is the length of the allocated memory. + .TP +-.B PERF_RECORD_MISC_GUEST_KERNEL +-Sample happened in the guest kernel. ++.I pgoff ++is the page offset of the allocated memory. + .TP +-.B PERF_RECORD_MISC_GUEST_USER +-Sample happened in guest user code. ++.I maj ++is the major ID of the underlying device. ++.TP ++.I min ++is the minor ID of the underlying device. ++.TP ++.I ino ++is the inode number. ++.TP ++.I ino_generation ++is the inode generation. ++.TP ++.I prot ++is the protection information. ++.TP ++.I flags ++is the flags information. ++.TP ++.I filename ++is a string describing the backing of the allocated memory. + .RE +- ++.TP ++.BR PERF_RECORD_AUX " (since Linux 4.1)" ++\" commit 68db7e98c3a6ebe7284b6cf14906ed7c55f3f7f0 ++This record reports that new data is available in the separate ++AUX buffer region. ++.IP ++.in +4n ++.EX ++struct { ++ struct perf_event_header header; ++ u64 aux_offset; ++ u64 aux_size; ++ u64 flags; ++ struct sample_id sample_id; ++}; ++.EE + .RS +-In addition, one of the following bits can be set: + .TP +-.B PERF_RECORD_MISC_MMAP_DATA +-This is set when the mapping is not executable; +-otherwise the mapping is executable. ++.I aux_offset ++offset in the AUX mmap region where the new data begins. + .TP +-.B PERF_RECORD_MISC_EXACT_IP +-This indicates that the content of +-.B PERF_SAMPLE_IP +-points +-to the actual instruction that triggered the event. +-See also +-.IR perf_event_attr.precise_ip . ++.I aux_size ++size of the data made available. + .TP +-.B PERF_RECORD_MISC_EXT_RESERVED +-This indicates there is extended data available (currently not used). ++.I flags ++describes the AUX update. ++.RS ++.TP ++.B PERF_AUX_FLAG_TRUNCATED ++if set, then the data returned was truncated to fit the available ++buffer size. ++.TP ++.B PERF_AUX_FLAG_OVERWRITE ++.\" commit 2023a0d2829e521fe6ad6b9907f3f90bfbf57142 ++if set, then the data returned has overwritten previous data. ++.RE + .RE + .TP +-.I size +-This indicates the size of the record. ++.BR PERF_RECORD_ITRACE_START " (since Linux 4.1)" ++\" ec0d7729bbaed4b9d2d3fada693278e13a3d1368 ++This record indicates which process has initiated an instruction ++trace event, allowing tools to properly correlate the instruction ++addresses in the AUX buffer with the proper executable. ++.IP ++.in +4n ++.EX ++struct { ++ struct perf_event_header header; ++ u32 pid; ++ u32 tid; ++}; ++.EE ++.RS ++.TP ++.I pid ++process ID of the thread starting an instruction trace. ++.TP ++.I tid ++thread ID of the thread starting an instruction trace. ++.RE ++.TP ++.BR PERF_RECORD_LOST_SAMPLES " (since Linux 4.2)" ++\" f38b0dbb491a6987e198aa6b428db8692a6480f8 ++When using hardware sampling (such as Intel PEBS) this record ++indicates some number of samples that may have been lost. ++.IP ++.in +4n ++.EX ++struct { ++ struct perf_event_header header; ++ u64 lost; ++ struct sample_id sample_id; ++}; ++.EE ++.RS ++.TP ++.I lost ++the number of potentially lost samples. + .RE +-.SS Signal overflow +-Events can be set to deliver a signal when a threshold is crossed. +-The signal handler is set up using the ++.TP ++.BR PERF_RECORD_SWITCH " (since Linux 4.3)" ++\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4 ++This record indicates a context switch has happened. ++The ++.B PERF_RECORD_MISC_SWITCH_OUT ++bit in the ++.I misc ++field indicates whether it was a context switch into ++or away from the current process. ++.IP ++.in +4n ++.EX ++struct { ++ struct perf_event_header header; ++ struct sample_id sample_id; ++}; ++.EE ++.TP ++.BR PERF_RECORD_SWITCH_CPU_WIDE " (since Linux 4.3)" ++\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4 ++As with ++.B PERF_RECORD_SWITCH ++this record indicates a context switch has happened, ++but it only occurs when sampling in CPU-wide mode ++and provides additional information on the process ++being switched to/from. ++The ++.B PERF_RECORD_MISC_SWITCH_OUT ++bit in the ++.I misc ++field indicates whether it was a context switch into ++or away from the current process. ++.IP ++.in +4n ++.EX ++struct { ++ struct perf_event_header header; ++ u32 next_prev_pid; ++ u32 next_prev_tid; ++ struct sample_id sample_id; ++}; ++.EE ++.RS ++.TP ++.I next_prev_pid ++The process ID of the previous (if switching in) ++or next (if switching out) process on the CPU. ++.TP ++.I next_prev_tid ++The thread ID of the previous (if switching in) ++or next (if switching out) thread on the CPU. ++.RE ++.RE ++.SS Overflow handling ++Events can be set to notify when a threshold is crossed, ++indicating an overflow. ++Overflow conditions can be captured by monitoring the ++event file descriptor with + .BR poll (2), + .BR select (2), +-.BR epoll (2) ++or ++.BR epoll (7). ++Alternatively, the overflow events can be captured via sa signal handler, ++by enabling I/O signaling on the file descriptor; see the discussion of the ++.BR F_SETOWN + and +-.BR fcntl (2), +-system calls. +- +-To generate signals, sampling must be enabled ++.BR F_SETSIG ++operations in ++.BR fcntl (2). ++.PP ++Overflows are generated only by sampling events + .RI ( sample_period +-must have a non-zero value). +- +-There are two ways to generate signals. +- ++must have a nonzero value). ++.PP ++There are two ways to generate overflow notifications. ++.PP + The first is to set a + .I wakeup_events + or + .I wakeup_watermark +-value that will generate a signal if a certain number of samples ++value that will trigger if a certain number of samples + or bytes have been written to the mmap ring buffer. +-In this case a signal of type ++In this case, + .B POLL_IN +-is sent. +- ++is indicated. ++.PP + The other way is by use of the + .B PERF_EVENT_IOC_REFRESH + ioctl. + This ioctl adds to a counter that decrements each time the event overflows. +-When non-zero, a ++When nonzero, + .B POLL_IN +-signal is sent on overflow, but +-once the value reaches 0, a signal is sent of type ++is indicated, but ++once the counter reaches 0 + .B POLL_HUP +-and ++is indicated and + the underlying event is disabled. +- +-Note: on newer kernels (definitely noticed with 3.2) +-.\" FIXME(Vince) : Find out when this was introduced +-a signal is provided for every overflow, even if +-.I wakeup_events +-is not set. ++.PP ++Refreshing an event group leader refreshes all siblings and ++refreshing with a parameter of 0 currently enables infinite ++refreshes; ++these behaviors are unsupported and should not be relied on. ++.\" See https://lkml.org/lkml/2011/5/24/337 ++.PP ++Starting with Linux 3.18, ++.\" commit 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883 ++.B POLL_HUP ++is indicated if the event being monitored is attached to a different ++process and that process exits. + .SS rdpmc instruction + Starting with Linux 3.4 on x86, you can use the ++.\" commit c7206205d00ab375839bd6c7ddb247d600693c09 + .I rdpmc + instruction to get low-latency reads without having to enter the kernel. + Note that using + .I rdpmc + is not necessarily faster than other methods for reading event values. +- ++.PP + Support for this can be detected with the + .I cap_usr_rdpmc + field in the mmap page; documentation on how + to calculate event values can be found in that section. ++.PP ++Originally, when rdpmc support was enabled, any process (not just ones ++with an active perf event) could use the rdpmc instruction to access ++the counters. ++Starting with Linux 4.0, ++.\" 7911d3f7af14a614617e38245fedf98a724e46a9 ++rdpmc support is only allowed if an event is currently enabled ++in a process's context. ++To restore the old behavior, write the value 2 to ++.IR /sys/devices/cpu/rdpmc . + .SS perf_event ioctl calls + .PP + Various ioctls act on + .BR perf_event_open () +-file descriptors ++file descriptors: + .TP + .B PERF_EVENT_IOC_ENABLE +-Enables the individual event or event group specified by the ++This enables the individual event or event group specified by the + file descriptor argument. +- ++.IP + If the + .B PERF_IOC_FLAG_GROUP + bit is set in the ioctl argument, then all events in a group are +@@ -1843,16 +2830,16 @@ enabled, even if the event specified is not the group leader + (but see BUGS). + .TP + .B PERF_EVENT_IOC_DISABLE +-Disables the individual counter or event group specified by the ++This disables the individual counter or event group specified by the + file descriptor argument. +- ++.IP + Enabling or disabling the leader of a group enables or disables the + entire group; that is, while the group leader is disabled, none of the + counters in the group will count. + Enabling or disabling a member of a group other than the leader + affects only that counter; disabling a non-leader + stops that counter from counting but doesn't affect any other counter. +- ++.IP + If the + .B PERF_IOC_FLAG_GROUP + bit is set in the ioctl argument, then all events in a group are +@@ -1865,11 +2852,11 @@ to enable a counter for a number of overflows specified by the argument, + after which it is disabled. + Subsequent calls of this ioctl add the argument value to the current + count. +-A signal with ++An overflow notification with + .B POLL_IN + set will happen on each overflow until the +-count reaches 0; when that happens a signal with +-POLL_HUP ++count reaches 0; when that happens a notification with ++.B POLL_HUP + set is sent and the event is disabled. + Using an argument of 0 is considered undefined behavior. + .TP +@@ -1882,7 +2869,7 @@ multiplexing + or + .I time_running + values. +- ++.IP + If the + .B PERF_IOC_FLAG_GROUP + bit is set in the ioctl argument, then all events in a group are +@@ -1890,63 +2877,168 @@ reset, even if the event specified is not the group leader + (but see BUGS). + .TP + .B PERF_EVENT_IOC_PERIOD +-IOC_PERIOD is the command to update the period; it +-does not update the current period but instead defers until next. +- ++This updates the overflow period for the event. ++.IP ++Since Linux 3.7 (on ARM) ++.\" commit 3581fe0ef37ce12ac7a4f74831168352ae848edc ++and Linux 3.14 (all other architectures), ++.\" commit bad7192b842c83e580747ca57104dd51fe08c223 ++the new period takes effect immediately. ++On older kernels, the new period did not take effect until ++after the next overflow. ++.IP + The argument is a pointer to a 64-bit value containing the + desired new period. ++.IP ++Prior to Linux 2.6.36, ++.\" commit ad0cf3478de8677f720ee06393b3147819568d6a ++this ioctl always failed due to a bug ++in the kernel. + .TP + .B PERF_EVENT_IOC_SET_OUTPUT + This tells the kernel to report event notifications to the specified + file descriptor rather than the default one. + The file descriptors must all be on the same CPU. +- ++.IP + The argument specifies the desired file descriptor, or \-1 if + output should be ignored. + .TP +-.BR PERF_EVENT_IOC_SET_FILTER " (Since Linux 2.6.33)" ++.BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)" ++.\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830 + This adds an ftrace filter to this event. +- ++.IP + The argument is a pointer to the desired ftrace filter. +-.SS Using prctl +-A process can enable or disable all the event groups that are +-attached to it using the ++.TP ++.BR PERF_EVENT_IOC_ID " (since Linux 3.12)" ++.\" commit cf4957f17f2a89984915ea808876d9c82225b862 ++This returns the event ID value for the given event file descriptor. ++.IP ++The argument is a pointer to a 64-bit unsigned integer ++to hold the result. ++.TP ++.BR PERF_EVENT_IOC_SET_BPF " (since Linux 4.1)" ++.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5 ++This allows attaching a Berkeley Packet Filter (BPF) ++program to an existing kprobe tracepoint event. ++You need ++.B CAP_SYS_ADMIN ++privileges to use this ioctl. ++.IP ++The argument is a BPF program file descriptor that was created by ++a previous ++.BR bpf (2) ++system call. ++.TP ++.BR PERF_EVENT_IOC_PAUSE_OUTPUT " (since Linux 4.7)" ++.\" commit 86e7972f690c1017fd086cdfe53d8524e68c661c ++This allows pausing and resuming the event's ring-buffer. ++A paused ring-buffer does not prevent generation of samples, ++but simply discards them. ++The discarded samples are considered lost, and cause a ++.BR PERF_RECORD_LOST ++sample to be generated when possible. ++An overflow signal may still be triggered by the discarded sample ++even though the ring-buffer remains empty. ++.IP ++The argument is an unsigned 32-bit integer. ++A nonzero value pauses the ring-buffer, while a ++zero value resumes the ring-buffer. ++.TP ++.BR PERF_EVENT_MODIFY_ATTRIBUTES " (since Linux 4.17)" ++.\" commit 32ff77e8cc9e66cc4fb38098f64fd54cc8f54573 ++This allows modifying an existing event without the overhead ++of closing and reopening a new event. ++Currently this is supported only for breakpoint events. ++.IP ++The argument is a pointer to a ++.I perf_event_attr ++structure containing the updated event settings. ++.TP ++.BR PERF_EVENT_IOC_QUERY_BPF " (since Linux 4.16)" ++.\" commit f371b304f12e31fe30207c41ca7754564e0ea4dc ++This allows querying which Berkeley Packet Filter (BPF) ++programs are attached to an existing kprobe tracepoint. ++You can only attach one BPF program per event, but you can ++have multiple events attached to a tracepoint. ++Querying this value on one tracepoint event returns the id ++of all BPF programs in all events attached to the tracepoint. ++You need ++.B CAP_SYS_ADMIN ++privileges to use this ioctl. ++.IP ++The argument is a pointer to a structure ++.in +4n ++.EX ++struct perf_event_query_bpf { ++ __u32 ids_len; ++ __u32 prog_cnt; ++ __u32 ids[0]; ++}; ++.EE ++.IP ++The ++.I ids_len ++field indicates the number of ids that can fit in the provided ++.I ids ++array. ++The ++.I prog_cnt ++value is filled in by the kernel with the number of attached ++BPF programs. ++The ++.I ids ++array is filled with the id of each attached BPF program. ++If there are more programs than will fit in the array, then the ++kernel will return ++.B ENOSPC ++and ++.I ids_len ++will indicate the number of program IDs that were successfully copied. ++.\" ++.SS Using prctl(2) ++A process can enable or disable all currently open event groups ++using the + .BR prctl (2) + .B PR_TASK_PERF_EVENTS_ENABLE + and + .B PR_TASK_PERF_EVENTS_DISABLE + operations. +-This applies to all counters on the current process, whether created by +-this process or by another, and does not affect any counters that this +-process has created on other processes. +-It enables or disables only +-the group leaders, not any other members in the groups. ++This applies only to events created locally by the calling process. ++This does not apply to events created by other processes attached ++to the calling process or inherited events from a parent process. ++Only group leaders are enabled and disabled, ++not any other members of the groups. + .SS perf_event related configuration files ++.PP + Files in + .I /proc/sys/kernel/ + .RS 4 + .TP + .I /proc/sys/kernel/perf_event_paranoid +- + The + .I perf_event_paranoid + file can be set to restrict access to the performance counters. +- +-2 - only allow user-space measurements +- +-1 - (default) allow both kernel and user measurements +- +-0 - allow access to CPU-specific data but not raw tracepoint samples +- +-\-1 - no restrictions +- ++.IP ++.PD 0 ++.RS ++.IP 2 4 ++allow only user-space measurements (default since Linux 4.6). ++.\" default changed in commit 0161028b7c8aebef64194d3d73e43bc3b53b5c66 ++.IP 1 ++allow both kernel and user measurements (default before Linux 4.6). ++.IP 0 ++allow access to CPU-specific data but not raw tracepoint samples. ++.IP \-1 ++no restrictions. ++.RE ++.PD ++.IP + The existence of the + .I perf_event_paranoid + file is the official method for determining if a kernel supports + .BR perf_event_open (). + .TP + .I /proc/sys/kernel/perf_event_max_sample_rate +- + This sets the maximum sample rate. + Setting this too high can allow + users to sample at a rate that impacts overall machine performance +@@ -1954,71 +3046,96 @@ and potentially lock up the machine. + The default value is + 100000 (samples per second). + .TP ++.I /proc/sys/kernel/perf_event_max_stack ++.\" Introduced in c5dfd78eb79851e278b7973031b9ca363da87a7e ++This file sets the maximum depth of stack frame entries reported ++when generating a call trace. ++.TP + .I /proc/sys/kernel/perf_event_mlock_kb +- +-Maximum number of pages an unprivileged user can mlock (2) . ++Maximum number of pages an unprivileged user can ++.BR mlock (2). + The default is 516 (kB). +- + .RE ++.PP + Files in + .I /sys/bus/event_source/devices/ ++.PP + .RS 4 +-Since Linux 2.6.34 the kernel supports having multiple PMUs ++Since Linux 2.6.34, the kernel supports having multiple PMUs + available for monitoring. + Information on how to program these PMUs can be found under + .IR /sys/bus/event_source/devices/ . + Each subdirectory corresponds to a different PMU. + .TP +-.IR /sys/bus/event_source/devices/*/type " (Since Linux 2.6.38)" ++.IR /sys/bus/event_source/devices/*/type " (since Linux 2.6.38)" ++.\" commit abe43400579d5de0078c2d3a760e6598e183f871 + This contains an integer that can be used in the + .I type +-field of perf_event_attr to indicate you wish to use this PMU. ++field of ++.I perf_event_attr ++to indicate that you wish to use this PMU. + .TP +-.IR /sys/bus/event_source/devices/*/rdpmc " (Since Linux 3.4)" ++.IR /sys/bus/event_source/devices/cpu/rdpmc " (since Linux 3.4)" ++.\" commit 0c9d42ed4cee2aa1dfc3a260b741baae8615744f + If this file is 1, then direct user-space access to the + performance counter registers is allowed via the rdpmc instruction. + This can be disabled by echoing 0 to the file. +-.TP +-.IR /sys/bus/event_source/devices/*/format/ " (Since Linux 3.4)" +-This sub-directory contains information on the architecture-specific +-sub-fields available for programming the various ++.IP ++As of Linux 4.0 ++.\" a66734297f78707ce39d756b656bfae861d53f62 ++.\" 7911d3f7af14a614617e38245fedf98a724e46a9 ++the behavior has changed, so that 1 now means only allow access ++to processes with active perf events, with 2 indicating the old ++allow-anyone-access behavior. ++.TP ++.IR /sys/bus/event_source/devices/*/format/ " (since Linux 3.4)" ++.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33 ++This subdirectory contains information on the architecture-specific ++subfields available for programming the various + .I config +-fields in the perf_event_attr struct. +- ++fields in the ++.I perf_event_attr ++struct. ++.IP + The content of each file is the name of the config field, followed + by a colon, followed by a series of integer bit ranges separated by + commas. + For example, the file + .I event + may contain the value +-.I config1:1,6-10,44 +-which indicates that event is an attribute that occupies bits 1,6-10, and 44 +-of perf_event_attr::config1. ++.I config1:1,6\-10,44 ++which indicates that event is an attribute that occupies bits 1,6\(en10, and 44 ++of ++.IR perf_event_attr::config1 . + .TP +-.IR /sys/bus/event_source/devices/*/events/ " (Since Linux 3.4)" +-This sub-directory contains files with pre-defined events. ++.IR /sys/bus/event_source/devices/*/events/ " (since Linux 3.4)" ++.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33 ++This subdirectory contains files with predefined events. + The contents are strings describing the event settings + expressed in terms of the fields found in the previously mentioned + .I ./format/ + directory. + These are not necessarily complete lists of all events supported by + a PMU, but usually a subset of events deemed useful or interesting. +- ++.IP + The content of each file is a list of attribute names + separated by commas. + Each entry has an optional value (either hex or decimal). +-If no value is specified than it is assumed to be a single-bit ++If no value is specified, then it is assumed to be a single-bit + field with a value of 1. + An example entry may look like this: +-.I event=0x2,inv,ldlat=3 ++.IR event=0x2,inv,ldlat=3 . + .TP + .I /sys/bus/event_source/devices/*/uevent + This file is the standard kernel device interface + for injecting hotplug events. + .TP +-.IR /sys/bus/event_source/devices/*/cpumask " (Since Linux 3.7)" +-The cpumask file contains a comma-separated list of integers that +-indicate a representative cpu number for each socket (package) ++.IR /sys/bus/event_source/devices/*/cpumask " (since Linux 3.7)" ++.\" commit 314d9f63f385096580e9e2a06eaa0745d92fe4ac ++The ++.I cpumask ++file contains a comma-separated list of integers that ++indicate a representative CPU number for each socket (package) + on the motherboard. + This is needed when setting up uncore or northbridge events, as + those PMUs present socket-wide events. +@@ -2030,34 +3147,172 @@ returns the new file descriptor, or \-1 if an error occurred + .I errno + is set appropriately). + .SH ERRORS ++The errors returned by ++.BR perf_event_open () ++can be inconsistent, and may ++vary across processor architectures and performance monitoring units. ++.TP ++.B E2BIG ++Returned if the ++.I perf_event_attr ++.I size ++value is too small ++(smaller than ++.BR PERF_ATTR_SIZE_VER0 ), ++too big (larger than the page size), ++or larger than the kernel supports and the extra bytes are not zero. ++When ++.B E2BIG ++is returned, the ++.I perf_event_attr ++.I size ++field is overwritten by the kernel to be the size of the structure ++it was expecting. ++.TP ++.B EACCES ++Returned when the requested event requires ++.B CAP_SYS_ADMIN ++permissions (or a more permissive perf_event paranoid setting). ++Some common cases where an unprivileged process ++may encounter this error: ++attaching to a process owned by a different user; ++monitoring all processes on a given CPU (i.e., specifying the ++.I pid ++argument as \-1); ++and not setting ++.I exclude_kernel ++when the paranoid setting requires it. ++.TP ++.B EBADF ++Returned if the ++.I group_fd ++file descriptor is not valid, or, if ++.B PERF_FLAG_PID_CGROUP ++is set, ++the cgroup file descriptor in ++.I pid ++is not valid. ++.TP ++.BR EBUSY " (since Linux 4.1)" ++.\" bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0 ++Returned if another event already has exclusive ++access to the PMU. ++.TP ++.B EFAULT ++Returned if the ++.I attr ++pointer points at an invalid memory address. + .TP + .B EINVAL +-Returned if the specified event is not available. ++Returned if the specified event is invalid. ++There are many possible reasons for this. ++A not-exhaustive list: ++.I sample_freq ++is higher than the maximum setting; ++the ++.I cpu ++to monitor does not exist; ++.I read_format ++is out of range; ++.I sample_type ++is out of range; ++the ++.I flags ++value is out of range; ++.I exclusive ++or ++.I pinned ++set and the event is not a group leader; ++the event ++.I config ++values are out of range or set reserved bits; ++the generic event selected is not supported; or ++there is not enough room to add the selected event. ++.TP ++.B EMFILE ++Each opened event uses one file descriptor. ++If a large number of events are opened, ++the per-process limit on the number of open file descriptors will be reached, ++and no more events can be created. ++.TP ++.B ENODEV ++Returned when the event involves a feature not supported ++by the current CPU. ++.TP ++.B ENOENT ++Returned if the ++.I type ++setting is not valid. ++This error is also returned for ++some unsupported generic events. + .TP + .B ENOSPC + Prior to Linux 3.3, if there was not enough room for the event, ++.\" commit aa2bc1ade59003a379ffc485d6da2d92ea3370a6 + .B ENOSPC + was returned. +-Linus did not like this, and this was changed to ++In Linux 3.3, this was changed to + .BR EINVAL . + .B ENOSPC +-is still returned if you try to read results into +-too small of a buffer. ++is still returned if you try to add more breakpoint events ++than supported by the hardware. ++.TP ++.B ENOSYS ++Returned if ++.B PERF_SAMPLE_STACK_USER ++is set in ++.I sample_type ++and it is not supported by hardware. ++.TP ++.B EOPNOTSUPP ++Returned if an event requiring a specific hardware feature is ++requested but there is no hardware support. ++This includes requesting low-skid events if not supported, ++branch tracing if it is not available, sampling if no PMU ++interrupt is available, and branch stacks for software events. ++.TP ++.BR EOVERFLOW " (since Linux 4.8)" ++.\" 97c79a38cd454602645f0470ffb444b3b75ce574 ++Returned if ++.B PERF_SAMPLE_CALLCHAIN ++is requested and ++.I sample_max_stack ++is larger than the maximum specified in ++.IR /proc/sys/kernel/perf_event_max_stack . ++.TP ++.B EPERM ++Returned on many (but not all) architectures when an unsupported ++.IR exclude_hv ", " exclude_idle ", " exclude_user ", or " exclude_kernel ++setting is specified. ++.IP ++It can also happen, as with ++.BR EACCES , ++when the requested event requires ++.B CAP_SYS_ADMIN ++permissions (or a more permissive perf_event paranoid setting). ++This includes setting a breakpoint on a kernel address, ++and (since Linux 3.13) setting a kernel function-trace tracepoint. ++.\" commit a4e95fc2cbb31d70a65beffeaf8773f881328c34 ++.TP ++.B ESRCH ++Returned if attempting to attach to a process that does not exist. + .SH VERSION + .BR perf_event_open () + was introduced in Linux 2.6.31 but was called +-.BR perf_counter_open () . ++.\" commit 0793a61d4df8daeac6492dbf8d2f3e5713caae5e ++.BR perf_counter_open (). + It was renamed in Linux 2.6.32. ++.\" commit cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 + .SH CONFORMING TO + This + .BR perf_event_open () +-system call Linux- specific ++system call Linux-specific + and should not be used in programs intended to be portable. + .SH NOTES + Glibc does not provide a wrapper for this system call; call it using + .BR syscall (2). + See the example below. +- ++.PP + The official way of knowing if + .BR perf_event_open () + support is enabled is checking +@@ -2070,56 +3325,74 @@ option to + .BR fcntl (2) + is needed to properly get overflow signals in threads. + This was introduced in Linux 2.6.32. +- +-Prior to Linux 2.6.33 (at least for x86) the kernel did not check ++.\" commit ba0a6c9f6fceed11c6a99e8326f0477fe383e6b5 ++.PP ++Prior to Linux 2.6.33 (at least for x86), ++.\" commit b690081d4d3f6a23541493f1682835c3cd5c54a1 ++the kernel did not check + if events could be scheduled together until read time. + The same happens on all known kernels if the NMI watchdog is enabled. + This means to see if a given set of events works you have to + .BR perf_event_open (), + start, then read before you know for sure you + can get valid measurements. +- +-Prior to Linux 2.6.34 event constraints were not enforced by the kernel. ++.PP ++Prior to Linux 2.6.34, ++.\" FIXME . cannot find a kernel commit for this one ++event constraints were not enforced by the kernel. + In that case, some events would silently return "0" if the kernel + scheduled them in an improper counter slot. +- +-Prior to Linux 2.6.34 there was a bug when multiplexing where the ++.PP ++Prior to Linux 2.6.34, there was a bug when multiplexing where the + wrong results could be returned. +- ++.\" commit 45e16a6834b6af098702e5ea6c9a40de42ff77d8 ++.PP + Kernels from Linux 2.6.35 to Linux 2.6.39 can quickly crash the kernel if + "inherit" is enabled and many threads are started. +- ++.\" commit 38b435b16c36b0d863efcf3f07b34a6fac9873fd ++.PP + Prior to Linux 2.6.35, ++.\" commit 050735b08ca8a016bbace4445fa025b88fee770b + .B PERF_FORMAT_GROUP + did not work with attached processes. +- +-In older Linux 2.6 versions, +-refreshing an event group leader refreshed all siblings, +-and refreshing with a parameter of 0 enabled infinite refresh. +-This behavior is unsupported and should not be relied on. +- ++.PP + There is a bug in the kernel code between + Linux 2.6.36 and Linux 3.0 that ignores the + "watermark" field and acts as if a wakeup_event + was chosen if the union has a +-non-zero value in it. +- ++nonzero value in it. ++.\" commit 4ec8363dfc1451f8c8f86825731fe712798ada02 ++.PP + From Linux 2.6.31 to Linux 3.4, the + .B PERF_IOC_FLAG_GROUP + ioctl argument was broken and would repeatedly operate + on the event specified rather than iterating across + all sibling events in a group. +- ++.\" commit 724b6daa13e100067c30cfc4d1ad06629609dc4e ++.PP ++From Linux 3.4 to Linux 3.11, the mmap ++.\" commit fa7315871046b9a4c48627905691dbde57e51033 ++.I cap_usr_rdpmc ++and ++.I cap_usr_time ++bits mapped to the same location. ++Code should migrate to the new ++.I cap_user_rdpmc ++and ++.I cap_user_time ++fields instead. ++.PP + Always double-check your results! + Various generalized events have had wrong values. + For example, retired branches measured + the wrong thing on AMD machines until Linux 2.6.35. ++.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2 + .SH EXAMPLE + The following is a short example that measures the total + instruction count of a call to + .BR printf (3). +-.nf +- ++.PP ++.EX + #include + #include + #include +@@ -2128,7 +3401,7 @@ instruction count of a call to + #include + #include + +-long ++static long + perf_event_open(struct perf_event_attr *hw_event, pid_t pid, + int cpu, int group_fd, unsigned long flags) + { +@@ -2172,8 +3445,9 @@ main(int argc, char **argv) + + close(fd); + } +-.fi ++.EE + .SH SEE ALSO ++.BR perf (1), + .BR fcntl (2), + .BR mmap (2), + .BR open (2), +-- +2.17.1 + diff --git a/SPECS/man-pages-overrides.spec b/SPECS/man-pages-overrides.spec index 1f5bb84..f5e25f2 100644 --- a/SPECS/man-pages-overrides.spec +++ b/SPECS/man-pages-overrides.spec @@ -2,7 +2,7 @@ Summary: Complementary and updated manual pages Name: man-pages-overrides -Version: 7.5.2 +Version: 7.6.2 Release: 1%{?dist} # license is the same as for the man-pages package License: GPL+ and GPLv2+ and BSD and MIT and Copyright only and IEEE @@ -13,7 +13,7 @@ Source: man-pages-overrides-%{version}.tar.xz Patch0: 1073718-mpo-7.1.0-open.2.patch Patch1: 1086994-mpo-7.1.0-proc.5.patch Patch2: 1112307-mpo-7.3.0-cciss.4.patch -Patch3: 1021967-mpo-7.1.0-socat.1.patch +Patch3: 1619272-mpo-7.6.2-perf_event_open.2.patch Patch4: 1131853-mpo-7.1.0-proc.5-proc-fs-not-empty.patch Patch5: 1085531-mpo-7.4.0-ipvsadm.8.patch Patch6: 1255283-mpo-7.3.0-captest.8.patch @@ -51,12 +51,17 @@ Patch35: 1360898-mpo-7.3.2-prctl.2-capabilities.7.patch Patch36: 1390935-mpo-7.4.0-nsswitch.conf.5.patch Patch37: 1404478-mpo-7.4.0-packet.7.patch Patch38: 1452368-mpo-7.4.2-clone.2.patch -Patch39: 1439724-mpo-7.5.0-vsftpd.conf.5.patch +Patch39: 1560191-mpo-7.6.0-madvise.2.patch Patch40: 1488506-mpo-7.5.0-imtest.1.patch -Patch41: 1507143-mpo-7.5.0-vsftpd.conf.5.patch -Patch42: 1517216-mpo-7.5.0-vsftpd.conf.5.patch +Patch41: 1573281-mpo-7.6.0-mlock.2-mlock2.2.patch +Patch42: 1577265-mpo-7.6.0-host.conf.5.patch Patch43: 1437003-mpo-7.5.1-aide.1-aide.conf.5.patch Patch44: 1422950-mpo-7.5.2-recvmmsg.2-sendmmsg.2.patch +Patch45: 1495572-mpo-7.6.0-mount.cifs.8.patch +Patch46: 1538499-mpo-7.6.0-iproute.8.patch +Patch47: 1543420-mpo-7.6.0-tc-vlan.8.patch +Patch48: 1598288-mpo-7.6.2-resolv.conf.5.patch +Patch49: 1607318-mpo-7.6.2-proc.5.patch %description A collection of manual ("man") pages to complement other packages or update @@ -65,6 +70,8 @@ installed. %prep %autosetup -p1 +# remove unwanted *.orig files +find -name "*.orig" -delete %build %ifarch aarch64 @@ -113,6 +120,55 @@ done %{_mandir}/overrides/ %changelog +* Wed Aug 22 2018 Nikola Forró - 7.6.2-1 +- Upload new tarball + related: #1592800 +- remove bug fixed in original component: #1147562 (socat) +- perf_event_open.2: sync with upstream + resolves: #1619272 +- resolv.conf.5: document no-reload (RES_NORELOAD) option + resolves: #1598288 +- proc.5: document missing fields in /proc/[pid]/smaps and /proc/[pid]/status + resolves: #1607318 + +* Fri Aug 17 2018 Nikola Forró - 7.6.1-4 +- patch also localized madvise.2 man page + related: #1560191 + +* Mon Aug 13 2018 Nikola Forró - 7.6.1-3 +- remove spoof options also from description section of host.conf.5 + related: #1577265 + +* Mon Jul 09 2018 Nikola Forró - 7.6.1-2 +- remove unwanted (*.orig) files + related: #1278492, #1560191, #1577265 + +* Wed Jul 04 2018 Nikola Forró - 7.6.1-1 +- Upload new tarball +- move mlock2.2 to a proper directory + related: #1573281 + +* Tue Jun 19 2018 Nikola Forró - 7.6.0-1 +- Upload new tarball + resolves: #1592800 +- remove bug fixed in original component: #1508021 (vsftpd) +- remove bug fixed in original component: #1508022 (vsftpd) +- remove bug fixed in original component: #1517227 (vsftpd) +- madvise.2: document MADV_WIPEONFORK and MADV_KEEPONFORK + resolves: #1560191 +- mlock.2, mlock2.2: document mlock2(2) and MCL_ONFAULT + resolves: #1573281 +- host.conf.5: remove description of never-implemented spoof options + resolves: #1577265 +- membarrier.2: add new man page + resolves: #1566315 +- mount.cifs.8: describe seal option + resolves: #1495572 +- ip-route.8: fix synopsis of ssthresh parameter + resolves: #1538499 +- tc-vlan.8: fix incorrect example + resolves: #1543420 + * Wed Jan 10 2018 Nikola Forró - 7.5.2-1 - Upload new tarball - remove obsolete patch