|
|
1c9a37 |
#
|
|
|
1c9a37 |
# config file for mcelog
|
|
|
1c9a37 |
# For further options, see the mcelog manpage and documentation
|
|
|
1c9a37 |
#
|
|
|
1c9a37 |
|
|
|
1c9a37 |
# Filter out known broken events by default
|
|
|
1c9a37 |
filter = yes
|
|
|
1c9a37 |
# don't log memory errors individually
|
|
|
1c9a37 |
#filter-memory-errors = yes
|
|
|
1c9a37 |
|
|
|
1c9a37 |
# output in undecoded raw format to be easier machine readable
|
|
|
1c9a37 |
#raw = yes
|
|
|
1c9a37 |
|
|
|
1c9a37 |
[server]
|
|
|
1c9a37 |
# An upstream bug prevents this from being disabled
|
|
|
1c9a37 |
# Only allow root to connect by default
|
|
|
1c9a37 |
client-user = root
|
|
|
1c9a37 |
# Path to socket client uses to connect
|
|
|
1c9a37 |
socket-path = /var/run/mcelog-client
|
|
|
1c9a37 |
|
|
|
1c9a37 |
[dimm]
|
|
|
1c9a37 |
# Enable DIMM-tracking
|
|
|
1c9a37 |
dimm-tracking-enabled = yes
|
|
|
1c9a37 |
# Disable DIMM DMI pre-population unless supported on your system
|
|
|
1c9a37 |
dmi-prepopulate = no
|
|
|
1c9a37 |
|
|
|
1c9a37 |
# execute these triggers when the rate of corrected or uncorrected
|
|
|
1c9a37 |
# errors per DIMM exceeds the threshold
|
|
|
5eea47 |
# The default of 10/24h was reasonable for server quality
|
|
|
5eea47 |
# DDR3 DIMMs as of 2009/10. Newer systems can benefit from
|
|
|
5eea47 |
# more aggressive page offline when corrected errors are seen
|
|
|
5eea47 |
# See:
|
|
|
5eea47 |
# https://www.intel.com/content/dam/www/public/us/en/documents/intel-and-samsung-mrt-improving-memory-reliability-at-data-centers.pdf
|
|
|
5eea47 |
# for details.
|
|
|
1c9a37 |
uc-error-trigger = dimm-error-trigger
|
|
|
1c9a37 |
uc-error-threshold = 1 / 24h
|
|
|
1c9a37 |
ce-error-trigger = dimm-error-trigger
|
|
|
5eea47 |
ce-error-threshold = 2 / 24h
|
|
|
1c9a37 |
|
|
|
1c9a37 |
[socket]
|
|
|
1c9a37 |
# Memory error accounting per socket
|
|
|
1c9a37 |
socket-tracing-enabled = yes
|
|
|
1c9a37 |
mem-uc-error-threshold = 100 / 24h
|
|
|
1c9a37 |
mem-ce-error-trigger = socket-memory-error-trigger
|
|
|
1c9a37 |
mem-ce-error-threshold = 100 / 24h
|
|
|
1c9a37 |
mem-ce-error-log = yes
|
|
|
1c9a37 |
|
|
|
1c9a37 |
[cache]
|
|
|
1c9a37 |
# Attempt to off-line CPUs causing cache errors
|
|
|
1c9a37 |
cache-threshold-trigger = cache-error-trigger
|
|
|
1c9a37 |
cache-threshold-log = yes
|
|
|
1c9a37 |
|
|
|
1c9a37 |
[page]
|
|
|
1c9a37 |
# Try to soft-offline a 4K page if it exceeds the threshold
|
|
|
1c9a37 |
memory-ce-threshold = 10 / 24h
|
|
|
1c9a37 |
memory-ce-trigger = page-error-trigger
|
|
|
1c9a37 |
memory-ce-log = yes
|
|
|
1c9a37 |
memory-ce-action = soft
|
|
|
1c9a37 |
|
|
|
1c9a37 |
[trigger]
|
|
|
1c9a37 |
# Maximum number of running triggers
|
|
|
1c9a37 |
children-max = 2
|
|
|
1c9a37 |
directory = /etc/mcelog/triggers
|