Blame SOURCES/check-debug-symbols.py

8c3e2c
#!/usr/bin/python3
8c3e2c
8c3e2c
"""
8c3e2c
Check debug symbols are present in shared object and can identify
8c3e2c
code.
8c3e2c
8c3e2c
It starts scanning from a directory and recursively scans all ELF
8c3e2c
files found in it for various symbols to ensure all debuginfo is
8c3e2c
present and nothing has been stripped.
8c3e2c
8c3e2c
Usage:
8c3e2c
8c3e2c
./check-debug-symbols /path/of/dir/to/scan/
8c3e2c
8c3e2c
8c3e2c
Example:
8c3e2c
8c3e2c
./check-debug-symbols /usr/lib64
8c3e2c
"""
8c3e2c
8c3e2c
# This technique was explained to me by Mark Wielaard (mjw).
8c3e2c
8c3e2c
import collections
8c3e2c
import os
8c3e2c
import re
8c3e2c
import subprocess
8c3e2c
import sys
8c3e2c
8c3e2c
ScanResult = collections.namedtuple('ScanResult',
8c3e2c
                                    'file_name debug_info debug_abbrev file_symbols gnu_debuglink')
8c3e2c
8c3e2c
8c3e2c
def scan_file(file):
8c3e2c
    "Scan the provided file and return a ScanResult containing results of the scan."
8c3e2c
8c3e2c
    # Test for .debug_* sections in the shared object. This is the  main test.
8c3e2c
    # Stripped objects will not contain these.
8c3e2c
    readelf_S_result = subprocess.run(['eu-readelf', '-S', file],
8c3e2c
                                      stdout=subprocess.PIPE, encoding='utf-8', check=True)
8c3e2c
    has_debug_info = any(line for line in readelf_S_result.stdout.split('\n') if '] .debug_info' in line)
8c3e2c
8c3e2c
    has_debug_abbrev = any(line for line in readelf_S_result.stdout.split('\n') if '] .debug_abbrev' in line)
8c3e2c
8c3e2c
    # Test FILE symbols. These will most likely be removed by anyting that
8c3e2c
    # manipulates symbol tables because it's generally useless. So a nice test
8c3e2c
    # that nothing has messed with symbols.
8c3e2c
    def contains_file_symbols(line):
8c3e2c
        parts = line.split()
8c3e2c
        if len(parts) < 8:
8c3e2c
            return False
8c3e2c
        return \
8c3e2c
            parts[2] == '0' and parts[3] == 'FILE' and parts[4] == 'LOCAL' and parts[5] == 'DEFAULT' and \
8c3e2c
            parts[6] == 'ABS' and re.match(r'((.*/)?[-_a-zA-Z0-9]+\.(c|cc|cpp|cxx))?', parts[7])
8c3e2c
8c3e2c
    readelf_s_result = subprocess.run(["eu-readelf", '-s', file],
8c3e2c
                                      stdout=subprocess.PIPE, encoding='utf-8', check=True)
8c3e2c
    has_file_symbols = any(line for line in readelf_s_result.stdout.split('\n') if contains_file_symbols(line))
8c3e2c
8c3e2c
    # Test that there are no .gnu_debuglink sections pointing to another
8c3e2c
    # debuginfo file. There shouldn't be any debuginfo files, so the link makes
8c3e2c
    # no sense either.
8c3e2c
    has_gnu_debuglink = any(line for line in readelf_s_result.stdout.split('\n') if '] .gnu_debuglink' in line)
8c3e2c
8c3e2c
    return ScanResult(file, has_debug_info, has_debug_abbrev, has_file_symbols, has_gnu_debuglink)
8c3e2c
8c3e2c
def is_elf(file):
8c3e2c
    result = subprocess.run(['file', file], stdout=subprocess.PIPE, encoding='utf-8', check=True)
c6de7a
    return re.search(r'ELF 64-bit [LM]SB (?:pie )?(?:executable|shared object)', result.stdout)
8c3e2c
8c3e2c
def scan_file_if_sensible(file):
8c3e2c
    if is_elf(file):
8c3e2c
        # print(file)
8c3e2c
        return scan_file(file)
8c3e2c
    return None
8c3e2c
8c3e2c
def scan_dir(dir):
8c3e2c
    results = []
8c3e2c
    for root, _, files in os.walk(dir):
8c3e2c
        for name in files:
8c3e2c
            result = scan_file_if_sensible(os.path.join(root, name))
8c3e2c
            if result:
8c3e2c
                results.append(result)
8c3e2c
    return results
8c3e2c
8c3e2c
def scan(file):
8c3e2c
    file = os.path.abspath(file)
8c3e2c
    if os.path.isdir(file):
8c3e2c
        return scan_dir(file)
8c3e2c
    elif os.path.isfile(file):
8c3e2c
        return [scan_file_if_sensible(file)]
8c3e2c
8c3e2c
def is_bad_result(result):
8c3e2c
    return not result.debug_info or not result.debug_abbrev or not result.file_symbols or result.gnu_debuglink
8c3e2c
8c3e2c
def print_scan_results(results, verbose):
8c3e2c
    # print(results)
8c3e2c
    for result in results:
8c3e2c
        file_name = result.file_name
8c3e2c
        found_issue = False
8c3e2c
        if not result.debug_info:
8c3e2c
            found_issue = True
8c3e2c
            print('error: missing .debug_info section in', file_name)
8c3e2c
        if not result.debug_abbrev:
8c3e2c
            found_issue = True
8c3e2c
            print('error: missing .debug_abbrev section in', file_name)
8c3e2c
        if not result.file_symbols:
8c3e2c
            found_issue = True
8c3e2c
            print('error: missing FILE symbols in', file_name)
8c3e2c
        if result.gnu_debuglink:
8c3e2c
            found_issue = True
8c3e2c
            print('error: unexpected .gnu_debuglink section in', file_name)
8c3e2c
        if verbose and not found_issue:
8c3e2c
            print('OK: ', file_name)
8c3e2c
8c3e2c
def main(args):
8c3e2c
    verbose = False
8c3e2c
    files = []
8c3e2c
    for arg in args:
8c3e2c
        if arg == '--verbose' or arg == '-v':
8c3e2c
            verbose = True
8c3e2c
        else:
8c3e2c
            files.append(arg)
8c3e2c
8c3e2c
    results = []
8c3e2c
    for file in files:
8c3e2c
        results.extend(scan(file))
8c3e2c
8c3e2c
    print_scan_results(results, verbose)
8c3e2c
8c3e2c
    if any(is_bad_result(result) for result in results):
8c3e2c
        return 1
8c3e2c
    return 0
8c3e2c
8c3e2c
8c3e2c
if __name__ == '__main__':
8c3e2c
    sys.exit(main(sys.argv[1:]))