From ddb7c4abddd746d7ec354fb89be16a2411a92cba Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Mon, 27 Feb 2017 16:28:18 +0100 Subject: [PATCH 13/54] debugedit: Support String/Line table rewriting for larger/smaller paths. debugedit --base to --dest rewriting of debug source file paths only supported dest paths that were smaller or equal than the base path (and the size should differ more than 1 character for correct debug lines). All paths were changed "in place". Which could in theory mess up debug str sharing. This rewrite supports base and dest strings of any size (some limitations, see below). This is done by reconstructing the debug_str and debug_line tables and updating the references in the debug_info attributes pointing to these tables. Plus, if necessary (only for ET_REL kernel modules), updating any relocations for the debug_info and debug_line sections. This has the nice benefit of merging any duplicate strings in the debug_str table which might resulting on slightly smaller files. kernel modules are ET_REL files that often contain a lot of duplicate strings. The rewrite uses elfutils (either libebl or libdw) to reconstruct the debug_str table. Since we are changing some section sizes now we cannot just use mmap and rawdata to poke the values, but need to read in and write out the changed sections. This does take a bit more memory because we now also need to keep track of all string/line references. There are still some limitations (already in the original debugedit) not fixed by this rewrite: - DW_AT_comp_dir in .debug_info using DW_FORM_string can not be made larger. We only warn about that now instead of failing. The only producer of DW_FORM_string comp_dirs is binutils gas. It seems simpler to fix gas than to try to support resizing the debug_info section. - A DW_AT_name on a DW_TAG_compile_unit is only rewritten for DW_FORM_strp not for DW_FORM_string. Probably no problem in practice since this wasn't supported originally either. - The debug_line program isn't scanned for DW_LNE_define_file which could in theory define an absolute path that might need rewriting. Again probably not a problem because this wasn't supported before and there are no know producers for this construct. To support the upcoming DWARFv5 in gcc 7 (not on by default), we will need to add support for the new debug_line format and scan the new debug_macro section that can have references to the debug_str table. Signed-off-by: Mark Wielaard (cherry picked from commit 88989572fff1f31e0c4f972a6895585e4742ef4b) --- Makefile.am | 8 +- configure.ac | 6 + tools/debugedit.c | 1569 ++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 1330 insertions(+), 253 deletions(-) diff --git a/Makefile.am b/Makefile.am index 6b37b5898..1b77730aa 100644 --- a/Makefile.am +++ b/Makefile.am @@ -156,13 +156,18 @@ rpm2archive_LDADD += @WITH_NSS_LIB@ @WITH_POPT_LIB@ @WITH_ZLIB_LIB@ @WITH_ARCHIV if LIBELF if LIBDWARF +if LIBDW rpmconfig_SCRIPTS += scripts/find-debuginfo.sh rpmlibexec_PROGRAMS += debugedit debugedit_SOURCES = tools/debugedit.c tools/hashtab.c tools/hashtab.h debugedit_LDADD = rpmio/librpmio.la debugedit_LDADD += @WITH_LIBELF_LIB@ @WITH_POPT_LIB@ - +if HAVE_LIBDW_STRTAB +debugedit_LDADD += @WITH_LIBDW_LIB@ +else +debugedit_LDADD += @WITH_LIBDW_LIB@ -lebl +endif rpmlibexec_PROGRAMS += elfdeps elfdeps_SOURCES = tools/elfdeps.c elfdeps_LDADD = rpmio/librpmio.la @@ -173,6 +178,7 @@ sepdebugcrcfix_SOURCES = tools/sepdebugcrcfix.c sepdebugcrcfix_LDADD = @WITH_LIBELF_LIB@ endif endif +endif rpmlibexec_PROGRAMS += rpmdeps rpmdeps_SOURCES = tools/rpmdeps.c diff --git a/configure.ac b/configure.ac index 9596a97b3..e6362535b 100644 --- a/configure.ac +++ b/configure.ac @@ -362,18 +362,24 @@ AM_CONDITIONAL(WITH_ARCHIVE,[test "$with_archive" = yes]) #================= # Check for elfutils libdw library with dwelf_elf_gnu_build_id. WITH_LIBDW_LIB= +HAVE_LIBDW_STRTAB= AS_IF([test "$WITH_LIBELF" = yes],[ AC_CHECK_HEADERS([elfutils/libdwelf.h],[ + # dwelf_elf_gnu_build_id was introduced in elfutils 0.159 AC_CHECK_LIB(dw, dwelf_elf_gnu_build_id, [ AC_DEFINE(HAVE_LIBDW, 1, [Define to 1 if you have elfutils libdw library]) WITH_LIBDW_LIB="-ldw" WITH_LIBDW=yes + # If possible we also want the strtab functions from elfutils 0.167. + # But we can fall back on the (unsupported) ebl alternatives if not. + AC_CHECK_LIB(dw, dwelf_strtab_init, [HAVE_LIBDW_STRTAB=yes]) ]) ]) ]) AC_SUBST(WITH_LIBDW_LIB) AM_CONDITIONAL(LIBDW,[test "$WITH_LIBDW" = yes]) +AM_CONDITIONAL(HAVE_LIBDW_STRTAB,[test "$HAVE_LIBDW_STRTAB" = yes]) #================= # Process --with/without-external-db diff --git a/tools/debugedit.c b/tools/debugedit.c index c0147f086..4798c6370 100644 --- a/tools/debugedit.c +++ b/tools/debugedit.c @@ -1,6 +1,7 @@ -/* Copyright (C) 2001-2003, 2005, 2007, 2009-2011, 2016 Red Hat, Inc. +/* Copyright (C) 2001-2003, 2005, 2007, 2009-2011, 2016, 2017 Red Hat, Inc. Written by Alexander Larsson , 2002 Based on code by Jakub Jelinek , 2001. + String/Line table rewriting by Mark Wielaard , 2017. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +41,35 @@ #include #include +/* Unfortunately strtab manipulation functions were only officially added + to elfutils libdw in 0.167. Before that there were internal unsupported + ebl variants. While libebl.h isn't supported we'll try to use it anyway + if the elfutils we build against is too old. */ +#include +#if _ELFUTILS_PREREQ (0, 167) +#include +typedef Dwelf_Strent Strent; +typedef Dwelf_Strtab Strtab; +#define strtab_init dwelf_strtab_init +#define strtab_add(X,Y) dwelf_strtab_add(X,Y) +#define strtab_add_len(X,Y,Z) dwelf_strtab_add_len(X,Y,Z) +#define strtab_free dwelf_strtab_free +#define strtab_finalize dwelf_strtab_finalize +#define strent_offset dwelf_strent_off +#else +#include +typedef struct Ebl_Strent Strent; +typedef struct Ebl_Strtab Strtab; +#define strtab_init ebl_strtabinit +#define strtab_add(X,Y) ebl_strtabadd(X,Y,0) +#define strtab_add_len(X,Y,Z) ebl_strtabadd(X,Y,Z) +#define strtab_free ebl_strtabfree +#define strtab_finalize ebl_strtabfinalize +#define strent_offset ebl_strtaboffset +#endif + +#include + #include #include #include "tools/hashtab.h" @@ -56,6 +87,99 @@ int list_file_fd = -1; int do_build_id = 0; char *build_id_seed = NULL; +/* We go over the debug sections in two phases. In phase zero we keep + track of any needed changes and collect strings, indexes and + sizes. In phase one we do the actual replacements updating the + strings, indexes and writing out new debug sections. The following + keep track of various changes that might be needed. */ + +/* Whether we need to do any literal string (DW_FORM_string) replacements + in debug_info. */ +static bool need_string_replacement = false; +/* Whether we need to do any updates of the string indexes (DW_FORM_strp) + in debug_info for string indexes. */ +static bool need_strp_update = false; +/* If the debug_line changes size we will need to update the + DW_AT_stmt_list attributes indexes in the debug_info. */ +static bool need_stmt_update = false; + +/* Storage for dynamically allocated strings to put into string + table. Keep together in memory blocks of 16K. */ +#define STRMEMSIZE (16 * 1024) +struct strmemblock +{ + struct strmemblock *next; + char memory[0]; +}; + +/* We keep track of each index in the original string table and the + associated entry in the new table so we don't insert identical + strings into the new string table. If constructed correctly the + original strtab shouldn't contain duplicate strings anyway. Any + actual identical strings could be deduplicated, but searching for + and comparing the indexes is much faster than comparing strings + (and we don't have to construct replacement strings). */ +struct stridxentry +{ + uint32_t idx; /* Original index in the string table. */ + Strent *entry; /* Entry in the new table. */ +}; + +/* Storage for new string table entries. Keep together in memory to + quickly search through them with tsearch. */ +#define STRIDXENTRIES ((16 * 1024) / sizeof (struct stridxentry)) +struct strentblock +{ + struct strentblock *next; + struct stridxentry entry[0]; +}; + +/* All data to keep track of the existing and new string table. */ +struct strings +{ + Strtab *str_tab; /* The new string table. */ + char *str_buf; /* New Elf_Data d_buf. */ + struct strmemblock *blocks; /* The first strmemblock. */ + struct strmemblock *last_block; /* The currently used strmemblock. */ + size_t stridx; /* Next free byte in last block. */ + struct strentblock *entries; /* The first string index block. */ + struct strentblock *last_entries; /* The currently used strentblock. */ + size_t entryidx; /* Next free entry in the last block. */ + void *strent_root; /* strent binary search tree root. */ +}; + +struct line_table +{ + size_t old_idx; /* Original offset. */ + size_t new_idx; /* Offset in new debug_line section. */ + ssize_t size_diff; /* Difference in (header) size. */ + bool replace_dirs; /* Whether to replace any dir paths. */ + bool replace_files; /* Whether to replace any file paths. */ + + /* Header fields. */ + uint32_t unit_length; + uint16_t version; + uint32_t header_length; + uint8_t min_instr_len; + uint8_t max_op_per_instr; /* Only if version >= 4 */ + uint8_t default_is_stmt; + int8_t line_base; + uint8_t line_range; + uint8_t opcode_base; +}; + +struct debug_lines +{ + struct line_table *table; /* Malloc/Realloced. */ + size_t size; /* Total number of line_tables. + Updated by get_line_table. */ + size_t used; /* Used number of line_tables. + Updated by get_line_table. */ + size_t debug_lines_len; /* Total size of new debug_line section. + updated by edit_dwarf2_line. */ + char *line_buf; /* New Elf_Data d_buf. */ +}; + typedef struct { Elf *elf; @@ -63,15 +187,42 @@ typedef struct Elf_Scn **scn; const char *filename; int lastscn; + size_t phnum; + struct strings strings; + struct debug_lines lines; GElf_Shdr shdr[0]; } DSO; +static void +setup_lines (struct debug_lines *lines) +{ + lines->table = NULL; + lines->size = 0; + lines->used = 0; + lines->debug_lines_len = 0; + lines->line_buf = NULL; +} + +static void +destroy_lines (struct debug_lines *lines) +{ + free (lines->table); + free (lines->line_buf); +} + typedef struct { unsigned char *ptr; uint32_t addend; + int ndx; } REL; +typedef struct +{ + Elf64_Addr r_offset; + int ndx; +} LINE_REL; + #define read_uleb128(ptr) ({ \ unsigned int ret = 0; \ unsigned int c; \ @@ -88,9 +239,23 @@ typedef struct ret; \ }) +#define write_uleb128(ptr,val) ({ \ + uint32_t valv = (val); \ + do \ + { \ + unsigned char c = valv & 0x7f; \ + valv >>= 7; \ + if (valv) \ + c |= 0x80; \ + *ptr++ = c; \ + } \ + while (valv); \ +}) + static uint16_t (*do_read_16) (unsigned char *ptr); static uint32_t (*do_read_32) (unsigned char *ptr); -static void (*write_32) (unsigned char *ptr, GElf_Addr val); +static void (*do_write_16) (unsigned char *ptr, uint16_t val); +static void (*do_write_32) (unsigned char *ptr, uint32_t val); static int ptr_size; static int cu_version; @@ -129,7 +294,7 @@ strptr (DSO *dso, int sec, off_t offset) if (offset >= 0 && (GElf_Addr) offset < dso->shdr[sec].sh_size) { data = NULL; - while ((data = elf_rawdata (scn, data)) != NULL) + while ((data = elf_getdata (scn, data)) != NULL) { if (data->d_buf && offset >= data->d_off @@ -142,7 +307,7 @@ strptr (DSO *dso, int sec, off_t offset) } -#define read_1(ptr) *ptr++ +#define read_8(ptr) *ptr++ #define read_16(ptr) ({ \ uint16_t ret = do_read_16 (ptr); \ @@ -183,28 +348,73 @@ int reltype; }) static void -dwarf2_write_le32 (unsigned char *p, GElf_Addr val) +dwarf2_write_le16 (unsigned char *p, uint16_t v) { - uint32_t v = (uint32_t) val; + p[0] = v; + p[1] = v >> 8; +} +static void +dwarf2_write_le32 (unsigned char *p, uint32_t v) +{ p[0] = v; p[1] = v >> 8; p[2] = v >> 16; p[3] = v >> 24; } - static void -dwarf2_write_be32 (unsigned char *p, GElf_Addr val) +dwarf2_write_be16 (unsigned char *p, uint16_t v) { - uint32_t v = (uint32_t) val; + p[1] = v; + p[0] = v >> 8; +} +static void +dwarf2_write_be32 (unsigned char *p, uint32_t v) +{ p[3] = v; p[2] = v >> 8; p[1] = v >> 16; p[0] = v >> 24; } +#define write_8(ptr,val) ({ \ + *ptr++ = (val); \ +}) + +#define write_16(ptr,val) ({ \ + do_write_16 (ptr,val); \ + ptr += 2; \ +}) + +#define write_32(ptr,val) ({ \ + do_write_32 (ptr,val); \ + ptr += 4; \ +}) + +/* relocated writes can only be called immediately after + do_read_32_relocated. ptr must be equal to relptr->ptr (or + relend). Might just update the addend. So relocations need to be + updated at the end. */ + +#define do_write_32_relocated(ptr,val) ({ \ + if (relptr && relptr < relend && relptr->ptr == ptr) \ + { \ + if (reltype == SHT_REL) \ + do_write_32 (ptr, val - relptr->addend); \ + else \ + relptr->addend = val; \ + } \ + else \ + do_write_32 (ptr,val); \ +}) + +#define write_32_relocated(ptr,val) ({ \ + do_write_32_relocated (ptr,val); \ + ptr += 4; \ +}) + static struct { const char *name; @@ -448,90 +658,638 @@ canonicalize_path (const char *s, char *d) return rv; } +/* Returns the rest of PATH if it starts with DIR_PREFIX, skipping any + / path separators, or NULL if PATH doesn't start with + DIR_PREFIX. Might return the empty string if PATH equals DIR_PREFIX + (modulo trailing slashes). Never returns path starting with '/'. */ +static const char * +skip_dir_prefix (const char *path, const char *dir_prefix) +{ + size_t prefix_len = strlen (dir_prefix); + if (strncmp (path, dir_prefix, prefix_len) == 0) + { + path += prefix_len; + while (IS_DIR_SEPARATOR (path[0])) + path++; + return path; + } + + return 0; +} + +/* Most strings will be in the existing debug string table. But to + replace the base/dest directory prefix we need some new storage. + Keep new strings somewhat close together for faster comparison and + copying. SIZE should be at least one (and includes space for the + zero terminator). The returned pointer points to uninitialized + data. */ +static char * +new_string_storage (struct strings *strings, size_t size) +{ + assert (size > 0); + + /* If the string is extra long just create a whole block for + it. Normally strings are much smaller than STRMEMSIZE. */ + if (strings->last_block == NULL + || size > STRMEMSIZE + || strings->stridx > STRMEMSIZE + || (STRMEMSIZE - strings->stridx) < size) + { + struct strmemblock *newblock = malloc (sizeof (struct strmemblock) + + MAX (STRMEMSIZE, size)); + if (newblock == NULL) + return NULL; + + newblock->next = NULL; + + if (strings->blocks == NULL) + strings->blocks = newblock; + + if (strings->last_block != NULL) + strings->last_block->next = newblock; + + strings->last_block = newblock; + strings->stridx = 0; + } + + size_t stridx = strings->stridx; + strings->stridx += size + 1; + return &strings->last_block->memory[stridx]; +} + +/* Comparison function used for tsearch. */ static int -has_prefix (const char *str, - const char *prefix) +strent_compare (const void *a, const void *b) { - size_t str_len; - size_t prefix_len; + struct stridxentry *entry_a = (struct stridxentry *)a; + struct stridxentry *entry_b = (struct stridxentry *)b; + size_t idx_a = entry_a->idx; + size_t idx_b = entry_b->idx; - str_len = strlen (str); - prefix_len = strlen (prefix); + if (idx_a < idx_b) + return -1; - if (str_len < prefix_len) - return 0; + if (idx_a > idx_b) + return 1; - return strncmp (str, prefix, prefix_len) == 0; + return 0; } -static int dirty_elf; +/* Allocates and inserts a new entry for the old index if not yet + seen. Returns a stridxentry if the given index has not yet been + seen and needs to be filled in with the associated string (either + the original string or the replacement string). Returns NULL if the + idx is already known. Use in phase 0 to add all strings seen. In + phase 1 use string_find_entry instead to get existing entries. */ +static struct stridxentry * +string_find_new_entry (struct strings *strings, size_t old_idx) +{ + /* Use next entry in the pool for lookup so we can use it directly + if this is a new index. */ + struct stridxentry *entry; + + /* Keep entries close together to make key comparison fast. */ + if (strings->last_entries == NULL || strings->entryidx >= STRIDXENTRIES) + { + size_t entriessz = (sizeof (struct strentblock) + + (STRIDXENTRIES * sizeof (struct stridxentry))); + struct strentblock *newentries = malloc (entriessz); + if (newentries == NULL) + error (1, errno, "Couldn't allocate new string entries block"); + else + { + if (strings->entries == NULL) + strings->entries = newentries; + + if (strings->last_entries != NULL) + strings->last_entries->next = newentries; + + strings->last_entries = newentries; + strings->last_entries->next = NULL; + strings->entryidx = 0; + } + } + + entry = &strings->last_entries->entry[strings->entryidx]; + entry->idx = old_idx; + struct stridxentry **tres = tsearch (entry, &strings->strent_root, + strent_compare); + if (tres == NULL) + error (1, ENOMEM, "Couldn't insert new strtab idx"); + else if (*tres == entry) + { + /* idx not yet seen, must add actual str. */ + strings->entryidx++; + return entry; + } + + return NULL; /* We already know about this idx, entry already complete. */ +} + +static struct stridxentry * +string_find_entry (struct strings *strings, size_t old_idx) +{ + struct stridxentry **ret; + struct stridxentry key; + key.idx = old_idx; + ret = tfind (&key, &strings->strent_root, strent_compare); + assert (ret != NULL); /* Can only happen for a bad/non-existing old_idx. */ + return *ret; +} + +/* Adds a string_idx_entry given an index into the old/existing string + table. Should be used in phase 0. Does nothing if the index was + already registered. Otherwise it checks the string associated with + the index. If the old string doesn't start with base_dir an entry + will be recorded for the index with the same string. Otherwise a + string will be recorded where the base_dir prefix will be replaced + by dest_dir. Returns true if this is a not yet seen index and there + a replacement file string has been recorded for it, otherwise + returns false. */ +static bool +record_file_string_entry_idx (struct strings *strings, size_t old_idx) +{ + bool ret = false; + struct stridxentry *entry = string_find_new_entry (strings, old_idx); + if (entry != NULL) + { + Strent *strent; + const char *old_str = (char *)debug_sections[DEBUG_STR].data + old_idx; + const char *file = skip_dir_prefix (old_str, base_dir); + if (file == NULL) + { + /* Just record the existing string. */ + strent = strtab_add_len (strings->str_tab, old_str, + strlen (old_str) + 1); + } + else + { + /* Create and record the altered file path. */ + size_t dest_len = strlen (dest_dir); + size_t file_len = strlen (file); + size_t nsize = dest_len + 1; /* + '\0' */ + if (file_len > 0) + nsize += 1 + file_len; /* + '/' */ + char *nname = new_string_storage (strings, nsize); + if (nname == NULL) + error (1, ENOMEM, "Couldn't allocate new string storage"); + memcpy (nname, dest_dir, dest_len); + if (file_len > 0) + { + nname[dest_len] = '/'; + memcpy (nname + dest_len + 1, file, file_len + 1); + } + else + nname[dest_len] = '\0'; + + strent = strtab_add_len (strings->str_tab, nname, nsize); + ret = true; + } + if (strent == NULL) + error (1, ENOMEM, "Could not create new string table entry"); + else + entry->entry = strent; + } + + return ret; +} + +/* Same as record_new_string_file_string_entry_idx but doesn't replace + base_dir with dest_dir, just records the existing string associated + with the index. */ static void -dirty_section (unsigned int sec) +record_existing_string_entry_idx (struct strings *strings, size_t old_idx) { - elf_flagdata (debug_sections[sec].elf_data, ELF_C_SET, ELF_F_DIRTY); - dirty_elf = 1; + struct stridxentry *entry = string_find_new_entry (strings, old_idx); + if (entry != NULL) + { + const char *str = (char *)debug_sections[DEBUG_STR].data + old_idx; + Strent *strent = strtab_add_len (strings->str_tab, + str, strlen (str) + 1); + if (strent == NULL) + error (1, ENOMEM, "Could not create new string table entry"); + else + entry->entry = strent; + } } -static int -edit_dwarf2_line (DSO *dso, uint32_t off, char *comp_dir, int phase) +static void +setup_strings (struct strings *strings) { - unsigned char *ptr = debug_sections[DEBUG_LINE].data, *dir; - unsigned char **dirt; - unsigned char *endsec = ptr + debug_sections[DEBUG_LINE].size; - unsigned char *endcu, *endprol; - unsigned char opcode_base; - uint32_t value, dirt_cnt; - size_t comp_dir_len = !comp_dir ? 0 : strlen (comp_dir); - size_t abs_file_cnt = 0, abs_dir_cnt = 0; + strings->str_tab = strtab_init (false); + strings->str_buf = NULL; + strings->blocks = NULL; + strings->last_block = NULL; + strings->entries = NULL; + strings->last_entries = NULL; + strings->strent_root = NULL; +} - if (phase != 0) - return 0; +/* Noop for tdestroy. */ +static void free_node (void *p __attribute__((__unused__))) { } - /* XXX: RhBug:929365, should we error out instead of ignoring? */ +static void +destroy_strings (struct strings *strings) +{ + struct strmemblock *smb = strings->blocks; + while (smb != NULL) + { + void *old = smb; + smb = smb->next; + free (old); + } + + struct strentblock *emb = strings->entries; + while (emb != NULL) + { + void *old = emb; + emb = emb->next; + free (old); + } + + strtab_free (strings->str_tab); + tdestroy (strings->strent_root, &free_node); + free (strings->str_buf); +} + +/* The minimum number of line tables we pre-allocate. */ +#define MIN_LINE_TABLES 64 + +/* Gets a line_table at offset. Returns true if not yet know and + successfully read, false otherwise. Sets *table to NULL and + outputs a warning if there was a problem reading the table at the + given offset. */ +static bool +get_line_table (DSO *dso, size_t off, struct line_table **table) +{ + struct debug_lines *lines = &dso->lines; + /* Assume there aren't that many, just do a linear search. The + array is probably already sorted because the stmt_lists are + probably inserted in order. But we cannot rely on that (maybe we + should check that to make searching quicker if possible?). Once + we have all line tables for phase 1 (rewriting) we do explicitly + sort the array.*/ + for (int i = 0; i < lines->used; i++) + if (lines->table[i].old_idx == off) + { + *table = &lines->table[i]; + return false; + } + + if (lines->size == lines->used) + { + struct line_table *new_table = realloc (lines->table, + (sizeof (struct line_table) + * (lines->size + + MIN_LINE_TABLES))); + if (new_table == NULL) + { + error (0, ENOMEM, "Couldn't add more debug_line tables"); + *table = NULL; + return false; + } + lines->table = new_table; + lines->size += MIN_LINE_TABLES; + } + + struct line_table *t = &lines->table[lines->used]; + *table = NULL; + + t->old_idx = off; + t->size_diff = 0; + t->replace_dirs = false; + t->replace_files = false; + + unsigned char *ptr = debug_sections[DEBUG_LINE].data; + unsigned char *endsec = ptr + debug_sections[DEBUG_LINE].size; if (ptr == NULL) - return 0; + { + error (0, 0, "%s: No .line_table section", dso->filename); + return false; + } + if (off > debug_sections[DEBUG_LINE].size) + { + error (0, 0, "%s: Invalid .line_table offset 0x%zx", + dso->filename, off); + return false; + } ptr += off; - endcu = ptr + 4; - endcu += read_32 (ptr); + /* unit_length */ + unsigned char *endcu = ptr + 4; + t->unit_length = read_32 (ptr); + endcu += t->unit_length; if (endcu == ptr + 0xffffffff) { error (0, 0, "%s: 64-bit DWARF not supported", dso->filename); - return 1; + return false; } if (endcu > endsec) { error (0, 0, "%s: .debug_line CU does not fit into section", dso->filename); - return 1; + return false; } - value = read_16 (ptr); - if (value != 2 && value != 3 && value != 4) + /* version */ + t->version = read_16 (ptr); + if (t->version != 2 && t->version != 3 && t->version != 4) { error (0, 0, "%s: DWARF version %d unhandled", dso->filename, - value); - return 1; + t->version); + return false; } - endprol = ptr + 4; - endprol += read_32 (ptr); + /* header_length */ + unsigned char *endprol = ptr + 4; + t->header_length = read_32 (ptr); + endprol += t->header_length; if (endprol > endcu) { error (0, 0, "%s: .debug_line CU prologue does not fit into CU", dso->filename); - return 1; + return false; + } + + /* min instr len */ + t->min_instr_len = *ptr++; + + /* max op per instr, if version >= 4 */ + if (t->version >= 4) + t->max_op_per_instr = *ptr++; + + /* default is stmt */ + t->default_is_stmt = *ptr++; + + /* line base */ + t->line_base = (*(int8_t *)ptr++); + + /* line range */ + t->line_range = *ptr++; + + /* opcode base */ + t->opcode_base = *ptr++; + + if (ptr + t->opcode_base - 1 >= endcu) + { + error (0, 0, "%s: .debug_line opcode table does not fit into CU", + dso->filename); + return false; } + lines->used++; + *table = t; + return true; +} - opcode_base = ptr[4 + (value >= 4)]; - ptr = dir = ptr + 4 + (value >= 4) + opcode_base; +static int dirty_elf; +static void +dirty_section (unsigned int sec) +{ + elf_flagdata (debug_sections[sec].elf_data, ELF_C_SET, ELF_F_DIRTY); + dirty_elf = 1; +} + +static int +line_table_cmp (const void *a, const void *b) +{ + struct line_table *ta = (struct line_table *) a; + struct line_table *tb = (struct line_table *) b; + + if (ta->old_idx < tb->old_idx) + return -1; + + if (ta->old_idx > tb->old_idx) + return 1; + + return 0; +} + + +/* Called after phase zero (which records all adjustments needed for + the line tables referenced from debug_info) and before phase one + starts (phase one will adjust the .debug_line section stmt + references using the updated data structures). */ +static void +edit_dwarf2_line (DSO *dso) +{ + Elf_Data *linedata = debug_sections[DEBUG_LINE].elf_data; + int linendx = debug_sections[DEBUG_LINE].sec; + Elf_Scn *linescn = dso->scn[linendx]; + unsigned char *old_buf = linedata->d_buf; + + /* Out with the old. */ + linedata->d_size = 0; + + /* In with the new. */ + linedata = elf_newdata (linescn); + + dso->lines.line_buf = malloc (dso->lines.debug_lines_len); + if (dso->lines.line_buf == NULL) + error (1, ENOMEM, "No memory for new .debug_line table (0x%zx bytes)", + dso->lines.debug_lines_len); + + linedata->d_size = dso->lines.debug_lines_len; + linedata->d_buf = dso->lines.line_buf; + debug_sections[DEBUG_LINE].size = linedata->d_size; + + /* Make sure the line tables are sorted on the old index. */ + qsort (dso->lines.table, dso->lines.used, sizeof (struct line_table), + line_table_cmp); + + unsigned char *ptr = linedata->d_buf; + for (int ldx = 0; ldx < dso->lines.used; ldx++) + { + struct line_table *t = &dso->lines.table[ldx]; + unsigned char *optr = old_buf + t->old_idx; + t->new_idx = ptr - (unsigned char *) linedata->d_buf; + + /* Just copy the whole table if nothing needs replacing. */ + if (! t->replace_dirs && ! t->replace_files) + { + assert (t->size_diff == 0); + memcpy (ptr, optr, t->unit_length + 4); + ptr += t->unit_length + 4; + continue; + } + + /* Header fields. */ + write_32 (ptr, t->unit_length + t->size_diff); + write_16 (ptr, t->version); + write_32 (ptr, t->header_length + t->size_diff); + write_8 (ptr, t->min_instr_len); + if (t->version >= 4) + write_8 (ptr, t->max_op_per_instr); + write_8 (ptr, t->default_is_stmt); + write_8 (ptr, t->line_base); + write_8 (ptr, t->line_range); + write_8 (ptr, t->opcode_base); + + optr += (4 /* unit len */ + + 2 /* version */ + + 4 /* header len */ + + 1 /* min instr len */ + + (t->version >= 4) /* max op per instr, if version >= 4 */ + + 1 /* default is stmt */ + + 1 /* line base */ + + 1 /* line range */ + + 1); /* opcode base */ + + /* opcode len table. */ + memcpy (ptr, optr, t->opcode_base - 1); + optr += t->opcode_base - 1; + ptr += t->opcode_base - 1; + + /* directory table. We need to find the end (start of file + table) anyway, so loop over all dirs, even if replace_dirs is + false. */ + while (*optr != 0) + { + const char *dir = (const char *) optr; + const char *file_path = NULL; + if (t->replace_dirs) + { + file_path = skip_dir_prefix (dir, base_dir); + if (file_path != NULL) + { + size_t dest_len = strlen (dest_dir); + size_t file_len = strlen (file_path); + memcpy (ptr, dest_dir, dest_len); + ptr += dest_len; + if (file_len > 0) + { + *ptr++ = '/'; + memcpy (ptr, file_path, file_len); + ptr += file_len; + } + *ptr++ = '\0'; + } + } + if (file_path == NULL) + { + size_t dir_len = strlen (dir); + memcpy (ptr, dir, dir_len + 1); + ptr += dir_len + 1; + } + + optr = (unsigned char *) strchr (dir, 0) + 1; + } + optr++; + *ptr++ = '\0'; + + /* file table */ + if (t->replace_files) + { + while (*optr != 0) + { + const char *file = (const char *) optr; + const char *file_path = NULL; + if (t->replace_dirs) + { + file_path = skip_dir_prefix (file, base_dir); + if (file_path != NULL) + { + size_t dest_len = strlen (dest_dir); + size_t file_len = strlen (file_path); + memcpy (ptr, dest_dir, dest_len); + ptr += dest_len; + if (file_len > 0) + { + *ptr++ = '/'; + memcpy (ptr, file_path, file_len); + ptr += file_len; + } + *ptr++ = '\0'; + } + } + if (file_path == NULL) + { + size_t file_len = strlen (file); + memcpy (ptr, file, file_len + 1); + ptr += file_len + 1; + } + + optr = (unsigned char *) strchr (file, 0) + 1; + + /* dir idx, time, len */ + uint32_t dir_idx = read_uleb128 (optr); + write_uleb128 (ptr, dir_idx); + uint32_t time = read_uleb128 (optr); + write_uleb128 (ptr, time); + uint32_t len = read_uleb128 (optr); + write_uleb128 (ptr, len); + } + optr++; + *ptr++ = '\0'; + } + + /* line number program (and file table if not copied above). */ + size_t remaining = (t->unit_length + 4 + - (optr - (old_buf + t->old_idx))); + memcpy (ptr, optr, remaining); + ptr += remaining; + } +} + +/* Called during phase zero for each debug_line table referenced from + .debug_info. Outputs all source files seen and records any + adjustments needed in the debug_list data structures. Returns true + if line_table needs to be rewrite either the dir or file paths. */ +static bool +read_dwarf2_line (DSO *dso, uint32_t off, char *comp_dir) +{ + unsigned char *ptr, *dir; + unsigned char **dirt; + uint32_t value, dirt_cnt; + size_t comp_dir_len = !comp_dir ? 0 : strlen (comp_dir); + struct line_table *table; + + if (get_line_table (dso, off, &table) == false + || table == NULL) + { + if (table != NULL) + error (0, 0, ".debug_line offset 0x%x referenced multiple times", + off); + return false; + } + + /* Skip to the directory table. The rest of the header has already + been read and checked by get_line_table. */ + ptr = debug_sections[DEBUG_LINE].data + off; + ptr += (4 /* unit len */ + + 2 /* version */ + + 4 /* header len */ + + 1 /* min instr len */ + + (table->version >= 4) /* max op per instr, if version >= 4 */ + + 1 /* default is stmt */ + + 1 /* line base */ + + 1 /* line range */ + + 1 /* opcode base */ + + table->opcode_base - 1); /* opcode len table */ + dir = ptr; /* dir table: */ value = 1; while (*ptr != 0) { + if (base_dir && dest_dir) + { + /* Do we need to replace any of the dirs? Calculate new size. */ + const char *file_path = skip_dir_prefix ((const char *)ptr, + base_dir); + if (file_path != NULL) + { + size_t old_size = strlen ((const char *)ptr) + 1; + size_t file_len = strlen (file_path); + size_t new_size = strlen (dest_dir) + 1; + if (file_len > 0) + new_size += 1 + file_len; + table->size_diff += (new_size - old_size); + table->replace_dirs = true; + } + } + ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1; ++value; } @@ -561,21 +1319,34 @@ edit_dwarf2_line (DSO *dso, uint32_t off, char *comp_dir, int phase) { error (0, 0, "%s: Wrong directory table index %u", dso->filename, value); - return 1; + return false; } file_len = strlen (file); + if (base_dir && dest_dir) + { + /* Do we need to replace any of the files? Calculate new size. */ + const char *file_path = skip_dir_prefix (file, base_dir); + if (file_path != NULL) + { + size_t old_size = file_len + 1; + size_t file_len = strlen (file_path); + size_t new_size = strlen (dest_dir) + 1; + if (file_len > 0) + new_size += 1 + file_len; + table->size_diff += (new_size - old_size); + table->replace_files = true; + } + } dir_len = strlen ((char *)dirt[value]); s = malloc (comp_dir_len + 1 + file_len + 1 + dir_len + 1); if (s == NULL) { error (0, ENOMEM, "%s: Reading file table", dso->filename); - return 1; + return false; } if (*file == '/') { memcpy (s, file, file_len + 1); - if (dest_dir && has_prefix (file, base_dir)) - ++abs_file_cnt; } else if (*dirt[value] == '/') { @@ -599,13 +1370,15 @@ edit_dwarf2_line (DSO *dso, uint32_t off, char *comp_dir, int phase) canonicalize_path (s, s); if (list_file_fd != -1) { - char *p = NULL; + const char *p = NULL; if (base_dir == NULL) p = s; - else if (has_prefix (s, base_dir)) - p = s + strlen (base_dir); - else if (has_prefix (s, dest_dir)) - p = s + strlen (dest_dir); + else + { + p = skip_dir_prefix (s, base_dir); + if (p == NULL && dest_dir != NULL) + p = skip_dir_prefix (s, dest_dir); + } if (p) { @@ -626,112 +1399,28 @@ edit_dwarf2_line (DSO *dso, uint32_t off, char *comp_dir, int phase) read_uleb128 (ptr); read_uleb128 (ptr); } - ++ptr; - - if (dest_dir) - { - unsigned char *srcptr, *buf = NULL; - size_t base_len = strlen (base_dir); - size_t dest_len = strlen (dest_dir); - size_t shrank = 0; - - if (dest_len == base_len) - abs_file_cnt = 0; - if (abs_file_cnt) - { - srcptr = buf = malloc (ptr - dir); - memcpy (srcptr, dir, ptr - dir); - ptr = dir; - } - else - ptr = srcptr = dir; - while (*srcptr != 0) - { - size_t len = strlen ((char *)srcptr) + 1; - const unsigned char *readptr = srcptr; - - char *orig = strdup ((const char *) srcptr); - - if (*srcptr == '/' && has_prefix ((char *)srcptr, base_dir)) - { - if (dest_len < base_len) - ++abs_dir_cnt; - memcpy (ptr, dest_dir, dest_len); - ptr += dest_len; - readptr += base_len; - } - srcptr += len; - shrank += srcptr - readptr; - canonicalize_path ((char *)readptr, (char *)ptr); - len = strlen ((char *)ptr) + 1; - shrank -= len; - ptr += len; - - if (memcmp (orig, ptr - len, len)) - dirty_section (DEBUG_STR); - free (orig); - } - - if (shrank > 0) - { - if (--shrank == 0) - error (EXIT_FAILURE, 0, - "canonicalization unexpectedly shrank by one character"); - else - { - memset (ptr, 'X', shrank); - ptr += shrank; - *ptr++ = '\0'; - } - } - - if (abs_dir_cnt + abs_file_cnt != 0) - { - size_t len = (abs_dir_cnt + abs_file_cnt) * (base_len - dest_len); - - if (len == 1) - error (EXIT_FAILURE, 0, "-b arg has to be either the same length as -d arg, or more than 1 char longer"); - memset (ptr, 'X', len - 1); - ptr += len - 1; - *ptr++ = '\0'; - } - *ptr++ = '\0'; - ++srcptr; - - while (*srcptr != 0) - { - size_t len = strlen ((char *)srcptr) + 1; + dso->lines.debug_lines_len += 4 + table->unit_length + table->size_diff; + return table->replace_dirs || table->replace_files; +} - if (*srcptr == '/' && has_prefix ((char *)srcptr, base_dir)) - { - memcpy (ptr, dest_dir, dest_len); - if (dest_len < base_len) - { - memmove (ptr + dest_len, srcptr + base_len, - len - base_len); - ptr += dest_len - base_len; - } - dirty_section (DEBUG_STR); - } - else if (ptr != srcptr) - memmove (ptr, srcptr, len); - srcptr += len; - ptr += len; - dir = srcptr; - read_uleb128 (srcptr); - read_uleb128 (srcptr); - read_uleb128 (srcptr); - if (ptr != dir) - memmove (ptr, dir, srcptr - dir); - ptr += srcptr - dir; - } - *ptr = '\0'; - free (buf); - } - return 0; +/* Called during phase one, after the table has been sorted. */ +static size_t +find_new_list_offs (struct debug_lines *lines, size_t idx) +{ + struct line_table key; + key.old_idx = idx; + struct line_table *table = bsearch (&key, lines->table, + lines->used, + sizeof (struct line_table), + line_table_cmp); + return table->new_idx; } +/* This scans the attributes of one DIE described by the given abbrev_tag. + PTR points to the data in the debug_info. It will be advanced till all + abbrev data is consumed. In phase zero data is collected, in phase one + data might be replaced/updated. */ static unsigned char * edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) { @@ -747,20 +1436,36 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) { uint32_t form = t->attr[i].form; size_t len = 0; - size_t base_len, dest_len; - while (1) { + /* Whether we already handled a string as file for this + attribute. If we did then we don't need to handle/record + it again when handling the DW_FORM_strp later. */ + bool handled_strp = false; + + /* A stmt_list points into the .debug_line section. In + phase zero record all offsets. Then in phase one replace + them with the new offsets if we rewrote the line + tables. */ if (t->attr[i].attr == DW_AT_stmt_list) { if (form == DW_FORM_data4 || form == DW_FORM_sec_offset) { list_offs = do_read_32_relocated (ptr); - found_list_offs = 1; + if (phase == 0) + found_list_offs = 1; + else if (need_stmt_update) /* phase one */ + { + size_t idx, new_idx; + idx = do_read_32_relocated (ptr); + new_idx = find_new_list_offs (&dso->lines, idx); + do_write_32_relocated (ptr, new_idx); + } } } + /* DW_AT_comp_dir is the current working directory. */ if (t->attr[i].attr == DW_AT_comp_dir) { if (form == DW_FORM_string) @@ -768,44 +1473,65 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) free (comp_dir); comp_dir = strdup ((char *)ptr); - if (phase == 1 && dest_dir && has_prefix ((char *)ptr, base_dir)) + if (dest_dir) { - base_len = strlen (base_dir); - dest_len = strlen (dest_dir); - - memcpy (ptr, dest_dir, dest_len); - if (dest_len < base_len) + /* In phase zero we are just collecting dir/file + names and check whether any need to be + adjusted. If so, in phase one we replace + those dir/files. */ + const char *file = skip_dir_prefix (comp_dir, base_dir); + if (file != NULL && phase == 0) + need_string_replacement = true; + else if (file != NULL && phase == 1) { - memset(ptr + dest_len, '/', - base_len - dest_len); - + size_t orig_len = strlen (comp_dir); + size_t dest_len = strlen (dest_dir); + size_t file_len = strlen (file); + size_t new_len = dest_len; + if (file_len > 0) + new_len += 1 + file_len; /* + '/' */ + + /* We don't want to rewrite the whole + debug_info section, so we only replace + the comp_dir with something equal or + smaller, possibly adding some slashes + at the end of the new compdir. This + normally doesn't happen since most + producers will use DW_FORM_strp which is + more efficient. */ + if (orig_len < new_len) + fprintf (stderr, "Warning, not replacing comp_dir " + "'%s' prefix ('%s' -> '%s') encoded as " + "DW_FORM_string. " + "Replacement too large.\n", + comp_dir, base_dir, dest_dir); + else + { + /* Add one or more slashes in between to + fill up all space (replacement must be + of the same length). */ + memcpy (ptr, dest_dir, dest_len); + memset (ptr + dest_len, '/', + orig_len - new_len + 1); + } } - dirty_section (DEBUG_INFO); } } else if (form == DW_FORM_strp && debug_sections[DEBUG_STR].data) { - char *dir; - - dir = (char *) debug_sections[DEBUG_STR].data - + do_read_32_relocated (ptr); + const char *dir; + size_t idx = do_read_32_relocated (ptr); + dir = (char *) debug_sections[DEBUG_STR].data + idx; free (comp_dir); comp_dir = strdup (dir); - if (phase == 1 && dest_dir && has_prefix (dir, base_dir)) + if (dest_dir != NULL && phase == 0) { - base_len = strlen (base_dir); - dest_len = strlen (dest_dir); - - memcpy (dir, dest_dir, dest_len); - if (dest_len < base_len) - { - memmove (dir + dest_len, dir + base_len, - strlen (dir + base_len) + 1); - } - dirty_section (DEBUG_STR); + if (record_file_string_entry_idx (&dso->strings, idx)) + need_strp_update = true; + handled_strp = true; } } } @@ -815,10 +1541,13 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) && form == DW_FORM_strp && debug_sections[DEBUG_STR].data) { + /* DW_AT_name is the primary file for this compile + unit. If starting with / it is a full path name. + Note that we don't handle DW_FORM_string in this + case. */ char *name; - - name = (char *) debug_sections[DEBUG_STR].data - + do_read_32_relocated (ptr); + size_t idx = do_read_32_relocated (ptr); + name = (char *) debug_sections[DEBUG_STR].data + idx; if (*name == '/' && comp_dir == NULL) { char *enddir = strrchr (name, '/'); @@ -833,18 +1562,14 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) comp_dir = strdup ("/"); } - if (phase == 1 && dest_dir && has_prefix (name, base_dir)) + /* First pass (0) records the new name to be + added to the debug string pool, the second + pass (1) stores it (the new index). */ + if (dest_dir && phase == 0) { - base_len = strlen (base_dir); - dest_len = strlen (dest_dir); - - memcpy (name, dest_dir, dest_len); - if (dest_len < base_len) - { - memmove (name + dest_len, name + base_len, - strlen (name + base_len) + 1); - } - dirty_section (DEBUG_STR); + if (record_file_string_entry_idx (&dso->strings, idx)) + need_strp_update = true; + handled_strp = true; } } @@ -886,6 +1611,29 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) read_uleb128 (ptr); break; case DW_FORM_strp: + /* In the first pass we collect all strings, in the + second we put the new references back (if there are + any changes). */ + if (phase == 0) + { + /* handled_strp is set for attributes refering to + files. If it is set the string is already + recorded. */ + if (! handled_strp) + { + size_t idx = do_read_32_relocated (ptr); + record_existing_string_entry_idx (&dso->strings, idx); + } + } + else if (need_strp_update) /* && phase == 1 */ + { + struct stridxentry *entry; + size_t idx, new_idx; + idx = do_read_32_relocated (ptr); + entry = string_find_entry (&dso->strings, idx); + new_idx = strent_offset (entry->entry); + do_write_32_relocated (ptr, new_idx); + } ptr += 4; break; case DW_FORM_string: @@ -930,14 +1678,17 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) CU current dir subdirectories. */ if (comp_dir && list_file_fd != -1) { - char *p; + const char *p = NULL; size_t size; - if (base_dir && has_prefix (comp_dir, base_dir)) - p = comp_dir + strlen (base_dir); - else if (dest_dir && has_prefix (comp_dir, dest_dir)) - p = comp_dir + strlen (dest_dir); - else + if (base_dir) + { + p = skip_dir_prefix (comp_dir, base_dir); + if (p == NULL && dest_dir != NULL) + p = skip_dir_prefix (comp_dir, dest_dir); + } + + if (p == NULL) p = comp_dir; size = strlen (p) + 1; @@ -951,8 +1702,13 @@ edit_attributes (DSO *dso, unsigned char *ptr, struct abbrev_tag *t, int phase) } } - if (found_list_offs) - edit_dwarf2_line (dso, list_offs, comp_dir, phase); + /* In phase zero we collect all file names (we need the comp_dir for + that). Note that calculating the new size and offsets is done + separately (at the end of phase zero after all CUs have been + scanned in dwarf2_edit). */ + if (phase == 0 && found_list_offs + && read_dwarf2_line (dso, list_offs, comp_dir)) + need_stmt_update = true; free (comp_dir); @@ -974,6 +1730,20 @@ rel_cmp (const void *a, const void *b) } static int +line_rel_cmp (const void *a, const void *b) +{ + LINE_REL *rela = (LINE_REL *) a, *relb = (LINE_REL *) b; + + if (rela->r_offset < relb->r_offset) + return -1; + + if (rela->r_offset > relb->r_offset) + return 1; + + return 0; +} + +static int edit_dwarf2 (DSO *dso) { Elf_Data *data; @@ -1009,9 +1779,9 @@ edit_dwarf2 (DSO *dso) } scn = dso->scn[i]; - data = elf_rawdata (scn, NULL); + data = elf_getdata (scn, NULL); assert (data != NULL && data->d_buf != NULL); - assert (elf_rawdata (scn, data) == NULL); + assert (elf_getdata (scn, data) == NULL); assert (data->d_off == 0); assert (data->d_size == dso->shdr[i].sh_size); debug_sections[j].data = data->d_buf; @@ -1050,13 +1820,15 @@ edit_dwarf2 (DSO *dso) { do_read_16 = buf_read_ule16; do_read_32 = buf_read_ule32; - write_32 = dwarf2_write_le32; + do_write_16 = dwarf2_write_le16; + do_write_32 = dwarf2_write_le32; } else if (dso->ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { do_read_16 = buf_read_ube16; do_read_32 = buf_read_ube32; - write_32 = dwarf2_write_be32; + do_write_16 = dwarf2_write_be16; + do_write_32 = dwarf2_write_be32; } else { @@ -1179,6 +1951,7 @@ edit_dwarf2 (DSO *dso) relend->ptr = debug_sections[DEBUG_INFO].data + (rela.r_offset - base); relend->addend = rela.r_addend; + relend->ndx = ndx; ++relend; } if (relbuf == relend) @@ -1193,6 +1966,13 @@ edit_dwarf2 (DSO *dso) for (phase = 0; phase < 2; phase++) { + /* If we don't need to update anyhing, skip phase 1. */ + if (phase == 1 + && !need_strp_update + && !need_string_replacement + && !need_stmt_update) + break; + ptr = debug_sections[DEBUG_INFO].data; relptr = relbuf; endsec = ptr + debug_sections[DEBUG_INFO].size; @@ -1240,7 +2020,7 @@ edit_dwarf2 (DSO *dso) if (ptr_size == 0) { - ptr_size = read_1 (ptr); + ptr_size = read_8 (ptr); if (ptr_size != 4 && ptr_size != 8) { error (0, 0, "%s: Invalid DWARF pointer size %d", @@ -1248,7 +2028,7 @@ edit_dwarf2 (DSO *dso) return 1; } } - else if (read_1 (ptr) != ptr_size) + else if (read_8 (ptr) != ptr_size) { error (0, 0, "%s: DWARF pointer size differs between CUs", dso->filename); @@ -1281,7 +2061,185 @@ edit_dwarf2 (DSO *dso) htab_delete (abbrev); } + + /* We might have to recalculate/rewrite the debug_line + section. We need to do that before going into phase one + so we have all new offsets. We do this separately from + scanning the dirs/file names because the DW_AT_stmt_lists + might not be in order or skip some padding we might have + to (re)move. */ + if (phase == 0 && need_stmt_update) + { + edit_dwarf2_line (dso); + + /* The line table programs will be moved + forward/backwards a bit in the new data. Update the + debug_line relocations to the new offsets. */ + int rndx = debug_sections[DEBUG_LINE].relsec; + if (rndx != 0) + { + LINE_REL *rbuf; + size_t rels; + Elf_Data *rdata = elf_getdata (dso->scn[rndx], NULL); + int rtype = dso->shdr[rndx].sh_type; + rels = dso->shdr[rndx].sh_size / dso->shdr[rndx].sh_entsize; + rbuf = malloc (rels * sizeof (LINE_REL)); + if (rbuf == NULL) + error (1, errno, "%s: Could not allocate line relocations", + dso->filename); + + /* Sort them by offset into section. */ + for (size_t i = 0; i < rels; i++) + { + if (rtype == SHT_RELA) + { + GElf_Rela rela; + if (gelf_getrela (rdata, i, &rela) == NULL) + error (1, 0, "Couldn't get relocation: %s", + elf_errmsg (-1)); + rbuf[i].r_offset = rela.r_offset; + rbuf[i].ndx = i; + } + else + { + GElf_Rel rel; + if (gelf_getrel (rdata, i, &rel) == NULL) + error (1, 0, "Couldn't get relocation: %s", + elf_errmsg (-1)); + rbuf[i].r_offset = rel.r_offset; + rbuf[i].ndx = i; + } + } + qsort (rbuf, rels, sizeof (LINE_REL), line_rel_cmp); + + size_t lndx = 0; + for (size_t i = 0; i < rels; i++) + { + /* These relocations only happen in ET_REL files + and are section offsets. */ + GElf_Addr r_offset; + size_t ndx = rbuf[i].ndx; + + GElf_Rel rel; + GElf_Rela rela; + if (rtype == SHT_RELA) + { + if (gelf_getrela (rdata, ndx, &rela) == NULL) + error (1, 0, "Couldn't get relocation: %s", + elf_errmsg (-1)); + r_offset = rela.r_offset; + } + else + { + if (gelf_getrel (rdata, ndx, &rel) == NULL) + error (1, 0, "Couldn't get relocation: %s", + elf_errmsg (-1)); + r_offset = rel.r_offset; + } + + while (r_offset > (dso->lines.table[lndx].old_idx + + 4 + + dso->lines.table[lndx].unit_length) + && lndx < dso->lines.used) + lndx++; + + if (lndx >= dso->lines.used) + error (1, 0, + ".debug_line relocation offset out of range"); + + /* Offset (pointing into the line program) moves + from old to new index including the header + size diff. */ + r_offset += ((dso->lines.table[lndx].new_idx + - dso->lines.table[lndx].old_idx) + + dso->lines.table[lndx].size_diff); + + if (rtype == SHT_RELA) + { + rela.r_offset = r_offset; + if (gelf_update_rela (rdata, ndx, &rela) == 0) + error (1, 0, "Couldn't update relocation: %s", + elf_errmsg (-1)); + } + else + { + rel.r_offset = r_offset; + if (gelf_update_rel (rdata, ndx, &rel) == 0) + error (1, 0, "Couldn't update relocation: %s", + elf_errmsg (-1)); + } + } + + elf_flagdata (rdata, ELF_C_SET, ELF_F_DIRTY); + free (rbuf); + } + } + + /* Same for the debug_str section. Make sure everything is + in place for phase 1 updating of debug_info + references. */ + if (phase == 0 && need_strp_update) + { + Strtab *strtab = dso->strings.str_tab; + Elf_Data *strdata = debug_sections[DEBUG_STR].elf_data; + int strndx = debug_sections[DEBUG_STR].sec; + Elf_Scn *strscn = dso->scn[strndx]; + + /* Out with the old. */ + strdata->d_size = 0; + /* In with the new. */ + strdata = elf_newdata (strscn); + + /* We really should check whether we had enough memory, + but the old ebl version will just abort on out of + memory... */ + strtab_finalize (strtab, strdata); + debug_sections[DEBUG_STR].size = strdata->d_size; + dso->strings.str_buf = strdata->d_buf; + } + + } + + /* After phase 1 we might have rewritten the debug_info with + new strp, strings and/or linep offsets. */ + if (need_strp_update || need_string_replacement || need_stmt_update) + dirty_section (DEBUG_INFO); + + /* Update any debug_info relocations addends we might have touched. */ + if (relbuf != NULL && reltype == SHT_RELA) + { + Elf_Data *symdata; + int relsec_ndx = debug_sections[DEBUG_INFO].relsec; + data = elf_getdata (dso->scn[relsec_ndx], NULL); + symdata = elf_getdata (dso->scn[dso->shdr[relsec_ndx].sh_link], + NULL); + + relptr = relbuf; + while (relptr < relend) + { + GElf_Sym sym; + GElf_Rela rela; + int ndx = relptr->ndx; + + if (gelf_getrela (data, ndx, &rela) == NULL) + error (1, 0, "Couldn't get relocation: %s", + elf_errmsg (-1)); + + if (gelf_getsym (symdata, GELF_R_SYM (rela.r_info), + &sym) == NULL) + error (1, 0, "Couldn't get symbol: %s", elf_errmsg (-1)); + + rela.r_addend = relptr->addend - sym.st_value; + + if (gelf_update_rela (data, ndx, &rela) == 0) + error (1, 0, "Couldn't update relocations: %s", + elf_errmsg (-1)); + + ++relptr; + } + elf_flagdata (data, ELF_C_SET, ELF_F_DIRTY); } + free (relbuf); } @@ -1310,8 +2268,9 @@ fdopen_dso (int fd, const char *name) GElf_Ehdr ehdr; int i; DSO *dso = NULL; + size_t phnum; - elf = elf_begin (fd, ELF_C_RDWR_MMAP, NULL); + elf = elf_begin (fd, ELF_C_RDWR, NULL); if (elf == NULL) { error (0, 0, "cannot open ELF file: %s", elf_errmsg (-1)); @@ -1348,10 +2307,20 @@ fdopen_dso (int fd, const char *name) goto error_out; } - elf_flagelf (elf, ELF_C_SET, ELF_F_LAYOUT); + if (elf_getphdrnum (elf, &phnum) != 0) + { + error (0, 0, "Couldn't get number of phdrs: %s", elf_errmsg (-1)); + goto error_out; + } + + /* If there are phdrs we want to maintain the layout of the + allocated sections in the file. */ + if (phnum != 0) + elf_flagelf (elf, ELF_C_SET, ELF_F_LAYOUT); memset (dso, 0, sizeof(DSO)); dso->elf = elf; + dso->phnum = phnum; dso->ehdr = ehdr; dso->scn = (Elf_Scn **) &dso->shdr[ehdr.e_shnum + 20]; @@ -1362,12 +2331,16 @@ fdopen_dso (int fd, const char *name) } dso->filename = (const char *) strdup (name); + setup_strings (&dso->strings); + setup_lines (&dso->lines); return dso; error_out: if (dso) { free ((char *) dso->filename); + destroy_strings (&dso->strings); + destroy_lines (&dso->lines); free (dso); } if (elf) @@ -1406,13 +2379,6 @@ handle_build_id (DSO *dso, Elf_Data *build_id, if (!dirty_elf && build_id_seed == NULL) goto print; - if (elf_update (dso->elf, ELF_C_NULL) < 0) - { - fprintf (stderr, "Failed to update file: %s\n", - elf_errmsg (elf_errno ())); - exit (1); - } - /* Clear the old bits so they do not affect the new hash. */ memset ((char *) build_id->d_buf + build_id_offset, 0, build_id_size); @@ -1475,7 +2441,7 @@ handle_build_id (DSO *dso, Elf_Data *build_id, if (u.shdr.sh_type != SHT_NOBITS) { - Elf_Data *d = elf_rawdata (dso->scn[i], NULL); + Elf_Data *d = elf_getdata (dso->scn[i], NULL); if (d == NULL) goto bad; rpmDigestUpdate(ctx, d->d_buf, d->d_size); @@ -1509,7 +2475,6 @@ main (int argc, char *argv[]) int nextopt; const char **args; struct stat stat_buf; - char *p; Elf_Data *build_id = NULL; size_t build_id_offset = 0, build_id_size = 0; @@ -1541,11 +2506,6 @@ main (int argc, char *argv[]) fprintf (stderr, "You must specify a base dir if you specify a dest dir\n"); exit (1); } - if (strlen (dest_dir) > strlen (base_dir)) - { - fprintf (stderr, "Dest dir longer than base dir is not supported\n"); - exit (1); - } } if (build_id_seed != NULL && do_build_id == 0) @@ -1561,30 +2521,13 @@ main (int argc, char *argv[]) exit (1); } - /* Ensure clean paths, users can muck with these */ + /* Ensure clean paths, users can muck with these. Also removes any + trailing '/' from the paths. */ if (base_dir) canonicalize_path(base_dir, base_dir); if (dest_dir) canonicalize_path(dest_dir, dest_dir); - /* Make sure there are trailing slashes in dirs */ - if (base_dir != NULL && base_dir[strlen (base_dir)-1] != '/') - { - p = malloc (strlen (base_dir) + 2); - strcpy (p, base_dir); - strcat (p, "/"); - free (base_dir); - base_dir = p; - } - if (dest_dir != NULL && dest_dir[strlen (dest_dir)-1] != '/') - { - p = malloc (strlen (dest_dir) + 2); - strcpy (p, dest_dir); - strcat (p, "/"); - free (dest_dir); - dest_dir = p; - } - if (list_file != NULL) { list_file_fd = open (list_file, O_WRONLY|O_CREAT|O_APPEND, 0644); @@ -1641,7 +2584,7 @@ main (int argc, char *argv[]) && build_id == NULL && (dso->shdr[i].sh_flags & SHF_ALLOC)) { /* Look for a build-ID note here. */ - Elf_Data *data = elf_rawdata (elf_getscn (dso->elf, i), NULL); + Elf_Data *data = elf_getdata (elf_getscn (dso->elf, i), NULL); Elf32_Nhdr nh; Elf_Data dst = { @@ -1679,6 +2622,123 @@ main (int argc, char *argv[]) } } + /* We might have changed the size of some debug sections. If so make + sure the section headers are updated and the data offsets are + correct. We set ELF_F_LAYOUT above because we don't want libelf + to move any allocated sections around itself if there are any + phdrs. Which means we are reponsible for setting the section size + and offset fields. Plus the shdr offsets. We don't want to change + anything for the phdrs allocated sections. Keep the offset of + allocated sections so they are at the same place in the file. Add + unallocated ones after the allocated ones. */ + if (dso->phnum != 0 && (need_strp_update || need_stmt_update)) + { + Elf *elf = dso->elf; + GElf_Off last_offset; + /* We position everything after the phdrs (which normally would + be at the start of the ELF file after the ELF header. */ + last_offset = (dso->ehdr.e_phoff + gelf_fsize (elf, ELF_T_PHDR, + dso->phnum, EV_CURRENT)); + + /* First find the last allocated section. */ + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn (elf, scn)) != NULL) + { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); + if (shdr == NULL) + error (1, 0, "Couldn't get shdr: %s\n", elf_errmsg (-1)); + + /* Any sections we have changed aren't allocated sections, + so we don't need to lookup any changed section sizes. */ + if ((shdr->sh_flags & SHF_ALLOC) != 0) + { + GElf_Off off = shdr->sh_offset + (shdr->sh_type != SHT_NOBITS + ? shdr->sh_size : 0); + if (last_offset < off) + last_offset = off; + } + } + + /* Now adjust any sizes and offsets for the unallocated sections. */ + scn = NULL; + while ((scn = elf_nextscn (elf, scn)) != NULL) + { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); + if (shdr == NULL) + error (1, 0, "Couldn't get shdr: %s\n", elf_errmsg (-1)); + + /* A bug in elfutils before 0.169 means we have to write out + all section data, even when nothing changed. + https://sourceware.org/bugzilla/show_bug.cgi?id=21199 */ +#if !_ELFUTILS_PREREQ (0, 169) + if (shdr->sh_type != SHT_NOBITS) + { + Elf_Data *d = elf_getdata (scn, NULL); + elf_flagdata (d, ELF_C_SET, ELF_F_DIRTY); + } +#endif + if ((shdr->sh_flags & SHF_ALLOC) == 0) + { + GElf_Off sec_offset = shdr->sh_offset; + GElf_Xword sec_size = shdr->sh_size; + + /* We might have changed the size (and content) of the + debug_str or debug_line section. */ + size_t secnum = elf_ndxscn (scn); + if (secnum == debug_sections[DEBUG_STR].sec) + sec_size = debug_sections[DEBUG_STR].size; + if (secnum == debug_sections[DEBUG_LINE].sec) + sec_size = debug_sections[DEBUG_LINE].size; + + /* Zero means one. No alignment constraints. */ + size_t addralign = shdr->sh_addralign ?: 1; + last_offset = (last_offset + addralign - 1) & ~(addralign - 1); + sec_offset = last_offset; + if (shdr->sh_type != SHT_NOBITS) + last_offset += sec_size; + + if (shdr->sh_size != sec_size + || shdr->sh_offset != sec_offset) + { + /* Make sure unchanged section data is written out + at the new location. */ + if (shdr->sh_offset != sec_offset + && shdr->sh_type != SHT_NOBITS) + { + Elf_Data *d = elf_getdata (scn, NULL); + elf_flagdata (d, ELF_C_SET, ELF_F_DIRTY); + } + + shdr->sh_size = sec_size; + shdr->sh_offset = sec_offset; + if (gelf_update_shdr (scn, shdr) == 0) + error (1, 0, "Couldn't update shdr: %s\n", + elf_errmsg (-1)); + } + } + } + + /* Position the shdrs after the last (unallocated) section. */ + const size_t offsize = gelf_fsize (elf, ELF_T_OFF, 1, EV_CURRENT); + GElf_Off new_offset = ((last_offset + offsize - 1) + & ~((GElf_Off) (offsize - 1))); + if (dso->ehdr.e_shoff != new_offset) + { + dso->ehdr.e_shoff = new_offset; + if (gelf_update_ehdr (elf, &dso->ehdr) == 0) + error (1, 0, "Couldn't update ehdr: %s\n", elf_errmsg (-1)); + } + } + + if (elf_update (dso->elf, ELF_C_NULL) < 0) + { + fprintf (stderr, "Failed to update file: %s\n", + elf_errmsg (elf_errno ())); + exit (1); + } + if (do_build_id && build_id != NULL) handle_build_id (dso, build_id, build_id_offset, build_id_size); @@ -1697,6 +2757,11 @@ main (int argc, char *argv[]) /* Restore old access rights */ chmod (file, stat_buf.st_mode); + free ((char *) dso->filename); + destroy_strings (&dso->strings); + destroy_lines (&dso->lines); + free (dso); + poptFreeContext (optCon); return 0; -- 2.13.2