| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: Peter Jones <pjones@redhat.com> |
| Date: Tue, 7 Nov 2017 17:12:17 -0500 |
| Subject: [PATCH] Make pmtimer tsc calibration not take 51 seconds to fail. |
| |
| On my laptop running at 2.4GHz, if I run a VM where tsc calibration |
| using pmtimer will fail presuming a broken pmtimer, it takes ~51 seconds |
| to do so (as measured with the stopwatch on my phone), with a tsc delta |
| of 0x1cd1c85300, or around 125 billion cycles. |
| |
| If instead of trying to wait for 5-200ms to show up on the pmtimer, we try |
| to wait for 5-200us, it decides it's broken in ~0x2626aa0 TSCs, aka ~2.4 |
| million cycles, or more or less instantly. |
| |
| Additionally, this reading the pmtimer was returning 0xffffffff anyway, |
| and that's obviously an invalid return. I've added a check for that and |
| 0 so we don't bother waiting for the test if what we're seeing is dead |
| pins with no response at all. |
| |
| If "debug" is includes "pmtimer", you will see one of the following |
| three outcomes. If pmtimer gives all 0 or all 1 bits, you will see: |
| |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 1 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 2 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 3 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 4 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 5 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 6 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 7 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 8 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 9 |
| kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 10 |
| kern/i386/tsc_pmtimer.c:78: timer is broken; giving up. |
| |
| This outcome was tested using qemu+kvm with UEFI (OVMF) firmware and |
| these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX |
| |
| If pmtimer gives any other bit patterns but is not actually marching |
| forward fast enough to use for clock calibration, you will see: |
| |
| kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations) |
| kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0 |
| |
| This outcome was tested using grub compiled with GRUB_PMTIMER_IGNORE_BAD_READS |
| defined (so as not to trip the bad read test) using qemu+kvm with UEFI |
| (OVMF) firmware, and these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX |
| |
| If pmtimer actually works, you'll see something like: |
| |
| kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations) |
| kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0 |
| |
| This outcome was tested using qemu+kvm with UEFI (OVMF) firmware, and |
| these options: -machine pc-i440fx-2.4 -cpu Broadwell-noTSX |
| |
| I've also tested this outcome on a real Intel Xeon E3-1275v3 on an Intel |
| Server Board S1200V3RPS using the SDV.RP.B8 "Release" build here: |
| https://firmware.intel.com/sites/default/files/UEFIDevKit_S1200RP_vB8.zip |
| |
| Signed-off-by: Peter Jones <pjones@redhat.com> |
| |
| grub-core/kern/i386/tsc_pmtimer.c | 109 +++++++++++++++++++++++++++++++------- |
| 1 file changed, 89 insertions(+), 20 deletions(-) |
| |
| diff --git a/grub-core/kern/i386/tsc_pmtimer.c b/grub-core/kern/i386/tsc_pmtimer.c |
| index c9c36169978..ca15c3aacd7 100644 |
| |
| |
| @@ -28,40 +28,101 @@ |
| #include <grub/acpi.h> |
| #include <grub/cpu/io.h> |
| |
| +/* |
| + * Define GRUB_PMTIMER_IGNORE_BAD_READS if you're trying to test a timer that's |
| + * present but doesn't keep time well. |
| + */ |
| +// #define GRUB_PMTIMER_IGNORE_BAD_READS |
| + |
| grub_uint64_t |
| grub_pmtimer_wait_count_tsc (grub_port_t pmtimer, |
| grub_uint16_t num_pm_ticks) |
| { |
| grub_uint32_t start; |
| - grub_uint32_t last; |
| - grub_uint32_t cur, end; |
| + grub_uint64_t cur, end; |
| grub_uint64_t start_tsc; |
| grub_uint64_t end_tsc; |
| - int num_iter = 0; |
| + unsigned int num_iter = 0; |
| +#ifndef GRUB_PMTIMER_IGNORE_BAD_READS |
| + int bad_reads = 0; |
| +#endif |
| |
| - start = grub_inl (pmtimer) & 0xffffff; |
| - last = start; |
| + /* |
| + * Some timers are 24-bit and some are 32-bit, but it doesn't make much |
| + * difference to us. Caring which one we have isn't really worth it since |
| + * the low-order digits will give us enough data to calibrate TSC. So just |
| + * mask the top-order byte off. |
| + */ |
| + cur = start = grub_inl (pmtimer) & 0xffffffUL; |
| end = start + num_pm_ticks; |
| start_tsc = grub_get_tsc (); |
| while (1) |
| { |
| - cur = grub_inl (pmtimer) & 0xffffff; |
| - if (cur < last) |
| - cur |= 0x1000000; |
| - num_iter++; |
| + cur &= 0xffffffffff000000ULL; |
| + cur |= grub_inl (pmtimer) & 0xffffffUL; |
| + |
| + end_tsc = grub_get_tsc(); |
| + |
| +#ifndef GRUB_PMTIMER_IGNORE_BAD_READS |
| + /* |
| + * If we get 10 reads in a row that are obviously dead pins, there's no |
| + * reason to do this thousands of times. |
| + */ |
| + if (cur == 0xffffffUL || cur == 0) |
| + { |
| + bad_reads++; |
| + grub_dprintf ("pmtimer", |
| + "pmtimer: 0x%"PRIxGRUB_UINT64_T" bad_reads: %d\n", |
| + cur, bad_reads); |
| + grub_dprintf ("pmtimer", "timer is broken; giving up.\n"); |
| + |
| + if (bad_reads == 10) |
| + return 0; |
| + } |
| +#endif |
| + |
| + if (cur < start) |
| + cur += 0x1000000; |
| + |
| if (cur >= end) |
| { |
| - end_tsc = grub_get_tsc (); |
| + grub_dprintf ("pmtimer", "pmtimer delta is 0x%"PRIxGRUB_UINT64_T"\n", |
| + cur - start); |
| + grub_dprintf ("pmtimer", "tsc delta is 0x%"PRIxGRUB_UINT64_T"\n", |
| + end_tsc - start_tsc); |
| return end_tsc - start_tsc; |
| } |
| - /* Check for broken PM timer. |
| - 50000000 TSCs is between 5 ms (10GHz) and 200 ms (250 MHz) |
| - if after this time we still don't have 1 ms on pmtimer, then |
| - pmtimer is broken. |
| + |
| + /* |
| + * Check for broken PM timer. 1ms at 10GHz should be 1E+7 TSCs; at |
| + * 250MHz it should be 2.5E6. So if after 4E+7 TSCs on a 10GHz machine, |
| + * we should have seen pmtimer show 4ms of change (i.e. cur =~ |
| + * start+14320); on a 250MHz machine that should be 16ms (start+57280). |
| + * If after this a time we still don't have 1ms on pmtimer, then pmtimer |
| + * is broken. |
| + * |
| + * Likewise, if our code is perfectly efficient and introduces no delays |
| + * whatsoever, on a 10GHz system we should see a TSC delta of 3580 in |
| + * ~3580 iterations. On a 250MHz machine that should be ~900 iterations. |
| + * |
| + * With those factors in mind, there are two limits here. There's a hard |
| + * limit here at 8x our desired pm timer delta, picked as an arbitrarily |
| + * large value that's still not a lot of time to humans, because if we |
| + * get that far this is either an implausibly fast machine or the pmtimer |
| + * is not running. And there's another limit on 4x our 10GHz tsc delta |
| + * without seeing cur converge on our target value. |
| */ |
| - if ((num_iter & 0xffffff) == 0 && grub_get_tsc () - start_tsc > 5000000) { |
| - return 0; |
| - } |
| + if ((++num_iter > (grub_uint32_t)num_pm_ticks << 3UL) || |
| + end_tsc - start_tsc > 40000000) |
| + { |
| + grub_dprintf ("pmtimer", |
| + "pmtimer delta is 0x%"PRIxGRUB_UINT64_T" (%u iterations)\n", |
| + cur - start, num_iter); |
| + grub_dprintf ("pmtimer", |
| + "tsc delta is implausible: 0x%"PRIxGRUB_UINT64_T"\n", |
| + end_tsc - start_tsc); |
| + return 0; |
| + } |
| } |
| } |
| |
| @@ -74,12 +135,20 @@ grub_tsc_calibrate_from_pmtimer (void) |
| |
| fadt = grub_acpi_find_fadt (); |
| if (!fadt) |
| - return 0; |
| + { |
| + grub_dprintf ("pmtimer", "No FADT found; not using pmtimer.\n"); |
| + return 0; |
| + } |
| pmtimer = fadt->pmtimer; |
| if (!pmtimer) |
| - return 0; |
| + { |
| + grub_dprintf ("pmtimer", "FADT does not specify pmtimer; skipping.\n"); |
| + return 0; |
| + } |
| |
| - /* It's 3.579545 MHz clock. Wait 1 ms. */ |
| + /* |
| + * It's 3.579545 MHz clock. Wait 1 ms. |
| + */ |
| tsc_diff = grub_pmtimer_wait_count_tsc (pmtimer, 3580); |
| if (tsc_diff == 0) |
| return 0; |