Blob Blame History Raw
From FEDORA_PATCHES Mon Sep 17 00:00:00 2001
From: Sergio Durigan Junior <sergiodj@redhat.com>
Date: Fri, 11 Jan 2019 10:52:17 -0500
Subject: 
 gdb-rhbz1560010-fix-assertion-symbol-language-dict-language-1of5.patch

;; Fix 'Assertion `SYMBOL_LANGUAGE (sym) == DICT_LANGUAGE (dict)->la_language' failed.'
;; Keith Seitz, RHBZ#1560010.

gdb/23712: Introduce multidictionary's

gdb/23712 is a new manifestation of the now-infamous (at least to me)
symtab/23010 assertion failure (DICT_LANGUAGE == SYMBOL_LANGAUGE).

An example of the problem (using test case from symtab/23010):

Reading symbols from /home/rdiez/rdiez/arduino/JtagDue/BuildOutput/JtagDue-obj-release/firmware.elf...done.
(gdb) p SysTick_Handler
dwarf2read.c:9715: internal-error: void dw2_add_symbol_to_list(symbol*, pending**): Assertion `(*listhead) == NULL || (SYMBOL_LANGUAGE ((*listhead)->symbol[0]) == SYMBOL_LANGUAGE (symbol))' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n)

This assertion was added specifically to catch this condition (of adding
symbols of different languages to a single pending list).

The problems we're now seeing on systems utilizing DWARF debugging seem to
be caused by the use of LTO, which adds a CU with an artificial DIE of
language C99 which references DIEs in other CUs of language C++.

Thus, we create a dictionary containing symbols of C99 but end up
stuffing C++ symbols into it, and the dw2_add_symbol_to_list triggers.

The approach taken here to fix this is to introduce multi-language
dictionaries to "replace" the standard, single-language dictionaries
used today.

Note to reviewers: This patch introduces some temporary functions to
aide with review.  This and other artifacts (such as "See dictionary.h"
which appear incorrect) will all be valid at the end of the series.

This first patch introduces the new multidictionary and its API (which
is, by design, identical to the old dictionary interface).  It also
mutates dict_create_hashed and dict_create_linear so that they take
a std::vector instead of the usual struct pending linked list.  This will
be needed later on.

This patch does /not/ actually enable multidictionary's.  That is left
for a subsequent patch in the series.

I've done exhaustive performance testing with this approach, and I've
attempted to minimize the overhead for the (overwhelmingly) most common
one-language scenario.

On average, a -g3 -O0 GDB (the one we developers use) will see
approximately a 4% slowdown when initially reading symbols. [I've
tested only GDB and firefox with -readnow.]  When using -O2, this
difference shrinks to ~0.5%.  Since a number of runs with these
patches actually run /faster/ than unpatched GDB, I conclude that
these tests have at least a 0.5% error margin.

On our own gdb.perf test suite, again, results appear to be pretty
negligible.  Differences to unpatched GDB range from -7.8% (yes,
patched version is again faster than unpatched) to 27%.  All tests
lying outside "negligible," such as the 27% slowdown, involve a total
run time of 0.0007 (or less) with smaller numbers of CUs/DSOs (usually 10
or 100).  In all cases, the follow-up tests with more CUs/DSOs is never
more than 3% difference to the baseline, unpatched GDB.

In my opinion, these results are satisfactory.

gdb/ChangeLog:

	PR gdb/23712
	PR symtab/23010
	* dictionary.c: Include unordered_map.
	(pending_to_vector): New function.
	(dict_create_hashed_1, dict_create_linear_1, dict_add_pending_1):
	Rewrite the non-"_1" functions to take vector instead
	of linked list.
	(dict_create_hashed, dict_create_linear, dict_add_pending): Use the
	"new" _1 versions of the same name.
	(multidictionary): Define.
	(std::hash<enum language): New definition.
	(collate_pending_symbols_by_language, mdict_create_hashed)
	(mdict_create_hashed_expandable, mdict_create_linear)
	(mdict_create_linear_expandable, mdict_free)
	(find_language_dictionary, create_new_language_dictionary)
	(mdict_add_symbol, mdict_add_pending, mdict_iterator_first)
	(mdict_iterator_next, mdict_iter_match_first, mdict_iter_match_next)
	(mdict_size, mdict_empty): New functions.
	* dictionary.h (mdict_iterator): Define.

diff --git a/gdb/dictionary.c b/gdb/dictionary.c
--- a/gdb/dictionary.c
+++ b/gdb/dictionary.c
@@ -27,6 +27,7 @@
 #include "buildsym.h"
 #include "dictionary.h"
 #include "safe-ctype.h"
+#include <unordered_map>
 
 /* This file implements dictionaries, which are tables that associate
    symbols to names.  They are represented by an opaque type 'struct
@@ -341,53 +342,66 @@ static void insert_symbol_hashed (struct dictionary *dict,
 
 static void expand_hashtable (struct dictionary *dict);
 
+/* A function to convert a linked list into a vector.  */
+
+static std::vector<symbol *>
+pending_to_vector (const struct pending *symbol_list)
+{
+  std::vector<symbol *> symlist;
+
+  for (const struct pending *list_counter = symbol_list;
+       list_counter != nullptr; list_counter = list_counter->next)
+    {
+      for (int i = list_counter->nsyms - 1; i >= 0; --i)
+	symlist.push_back (list_counter->symbol[i]);
+    }
+
+  return symlist;
+}
+
 /* The creation functions.  */
 
-/* See dictionary.h.  */
+/* A function to transition dict_create_hashed to new API.  */
 
-struct dictionary *
-dict_create_hashed (struct obstack *obstack,
-		    enum language language,
-		    const struct pending *symbol_list)
+static struct dictionary *
+dict_create_hashed_1 (struct obstack *obstack,
+		      enum language language,
+		      const std::vector<symbol *> &symbol_list)
 {
-  struct dictionary *retval;
-  int nsyms = 0, nbuckets, i;
-  struct symbol **buckets;
-  const struct pending *list_counter;
-
-  retval = XOBNEW (obstack, struct dictionary);
+  /* Allocate the dictionary.  */
+  struct dictionary *retval = XOBNEW (obstack, struct dictionary);
   DICT_VECTOR (retval) = &dict_hashed_vector;
   DICT_LANGUAGE (retval) = language_def (language);
 
-  /* Calculate the number of symbols, and allocate space for them.  */
-  for (list_counter = symbol_list;
-       list_counter != NULL;
-       list_counter = list_counter->next)
-    {
-      nsyms += list_counter->nsyms;
-    }
-  nbuckets = DICT_HASHTABLE_SIZE (nsyms);
+  /* Allocate space for symbols.  */
+  int nsyms = symbol_list.size ();
+  int nbuckets = DICT_HASHTABLE_SIZE (nsyms);
   DICT_HASHED_NBUCKETS (retval) = nbuckets;
-  buckets = XOBNEWVEC (obstack, struct symbol *, nbuckets);
+  struct symbol **buckets = XOBNEWVEC (obstack, struct symbol *, nbuckets);
   memset (buckets, 0, nbuckets * sizeof (struct symbol *));
   DICT_HASHED_BUCKETS (retval) = buckets;
 
   /* Now fill the buckets.  */
-  for (list_counter = symbol_list;
-       list_counter != NULL;
-       list_counter = list_counter->next)
-    {
-      for (i = list_counter->nsyms - 1; i >= 0; --i)
-	{
-	  insert_symbol_hashed (retval, list_counter->symbol[i]);
-	}
-    }
+  for (const auto &sym : symbol_list)
+    insert_symbol_hashed (retval, sym);
 
   return retval;
 }
 
 /* See dictionary.h.  */
 
+struct dictionary *
+dict_create_hashed (struct obstack *obstack,
+		    enum language language,
+		    const struct pending *symbol_list)
+{
+  std::vector<symbol *> symlist = pending_to_vector (symbol_list);
+
+  return dict_create_hashed_1 (obstack, language, symlist);
+}
+
+/* See dictionary.h.  */
+
 extern struct dictionary *
 dict_create_hashed_expandable (enum language language)
 {
@@ -403,52 +417,45 @@ dict_create_hashed_expandable (enum language language)
   return retval;
 }
 
-/* See dictionary.h.  */
+/* A function to transition dict_create_linear to new API.  */
 
-struct dictionary *
-dict_create_linear (struct obstack *obstack,
-		    enum language language,
-		    const struct pending *symbol_list)
+static struct dictionary *
+dict_create_linear_1 (struct obstack *obstack,
+		      enum language language,
+		      const std::vector<symbol *> &symbol_list)
 {
-  struct dictionary *retval;
-  int nsyms = 0, i, j;
-  struct symbol **syms;
-  const struct pending *list_counter;
-
-  retval = XOBNEW (obstack, struct dictionary);
+  struct dictionary *retval = XOBNEW (obstack, struct dictionary);
   DICT_VECTOR (retval) = &dict_linear_vector;
   DICT_LANGUAGE (retval) = language_def (language);
 
-  /* Calculate the number of symbols, and allocate space for them.  */
-  for (list_counter = symbol_list;
-       list_counter != NULL;
-       list_counter = list_counter->next)
-    {
-      nsyms += list_counter->nsyms;
-    }
+  /* Allocate space for symbols.  */
+  int nsyms = symbol_list.size ();
   DICT_LINEAR_NSYMS (retval) = nsyms;
-  syms = XOBNEWVEC (obstack, struct symbol *, nsyms );
+  struct symbol **syms = XOBNEWVEC (obstack, struct symbol *, nsyms);
   DICT_LINEAR_SYMS (retval) = syms;
 
-  /* Now fill in the symbols.  Start filling in from the back, so as
-     to preserve the original order of the symbols.  */
-  for (list_counter = symbol_list, j = nsyms - 1;
-       list_counter != NULL;
-       list_counter = list_counter->next)
-    {
-      for (i = list_counter->nsyms - 1;
-	   i >= 0;
-	   --i, --j)
-	{
-	  syms[j] = list_counter->symbol[i];
-	}
-    }
+  /* Now fill in the symbols.  */
+  int idx = nsyms - 1;
+  for (const auto &sym : symbol_list)
+    syms[idx--] = sym;
 
   return retval;
 }
 
 /* See dictionary.h.  */
 
+struct dictionary *
+dict_create_linear (struct obstack *obstack,
+		    enum language language,
+		    const struct pending *symbol_list)
+{
+  std::vector<symbol *> symlist = pending_to_vector (symbol_list);
+
+  return dict_create_linear_1 (obstack, language, symlist);
+}
+
+/* See dictionary.h.  */
+
 struct dictionary *
 dict_create_linear_expandable (enum language language)
 {
@@ -483,20 +490,26 @@ dict_add_symbol (struct dictionary *dict, struct symbol *sym)
   (DICT_VECTOR (dict))->add_symbol (dict, sym);
 }
 
+/* A function to transition dict_add_pending to new API.  */
+
+static void
+dict_add_pending_1 (struct dictionary *dict,
+		    const std::vector<symbol *> &symbol_list)
+{
+  /* Preserve ordering by reversing the list.  */
+  for (auto sym = symbol_list.rbegin (); sym != symbol_list.rend (); ++sym)
+    dict_add_symbol (dict, *sym);
+}
+
 /* Utility to add a list of symbols to a dictionary.
    DICT must be an expandable dictionary.  */
 
 void
 dict_add_pending (struct dictionary *dict, const struct pending *symbol_list)
 {
-  const struct pending *list;
-  int i;
+  std::vector<symbol *> symlist = pending_to_vector (symbol_list);
 
-  for (list = symbol_list; list != NULL; list = list->next)
-    {
-      for (i = 0; i < list->nsyms; ++i)
-	dict_add_symbol (dict, list->symbol[i]);
-    }
+  dict_add_pending_1 (dict, symlist);
 }
 
 /* Initialize ITERATOR to point at the first symbol in DICT, and
@@ -929,3 +942,408 @@ add_symbol_linear_expandable (struct dictionary *dict,
 
   DICT_LINEAR_SYM (dict, nsyms - 1) = sym;
 }
+
+/* Multi-language dictionary support.  */
+
+/* The structure describing a multi-language dictionary.  */
+
+struct multidictionary
+{
+  /* An array of dictionaries, one per language.  All dictionaries
+     must be of the same type.  This should be free'd for expandable
+     dictionary types.  */
+  struct dictionary **dictionaries;
+
+  /* The number of language dictionaries currently allocated.
+     Only used for expandable dictionaries.  */
+  unsigned short n_allocated_dictionaries;
+};
+
+/* A hasher for enum language.  Injecting this into std is a convenience
+   when using unordered_map with C++11.  */
+
+namespace std
+{
+  template<> struct hash<enum language>
+  {
+    typedef enum language argument_type;
+    typedef std::size_t result_type;
+
+    result_type operator() (const argument_type &l) const noexcept
+    {
+      return static_cast<result_type> (l);
+    }
+  };
+} /* namespace std */
+
+/* A helper function to collate symbols on the pending list by language.  */
+
+static std::unordered_map<enum language, std::vector<symbol *>>
+collate_pending_symbols_by_language (const struct pending *symbol_list)
+{
+  std::unordered_map<enum language, std::vector<symbol *>> nsyms;
+
+  for (const struct pending *list_counter = symbol_list;
+       list_counter != nullptr; list_counter = list_counter->next)
+    {
+      for (int i = list_counter->nsyms - 1; i >= 0; --i)
+	{
+	  enum language language = SYMBOL_LANGUAGE (list_counter->symbol[i]);
+	  nsyms[language].push_back (list_counter->symbol[i]);
+	}
+    }
+
+  return nsyms;
+}
+
+/* See dictionary.h.  */
+
+struct multidictionary *
+mdict_create_hashed (struct obstack *obstack,
+		     const struct pending *symbol_list)
+{
+  struct multidictionary *retval
+    = XOBNEW (obstack, struct multidictionary);
+  std::unordered_map<enum language, std::vector<symbol *>> nsyms
+    = collate_pending_symbols_by_language (symbol_list);
+
+  /* Loop over all languages and create/populate dictionaries.  */
+  retval->dictionaries
+    = XOBNEWVEC (obstack, struct dictionary *, nsyms.size ());
+  retval->n_allocated_dictionaries = nsyms.size ();
+
+  int idx = 0;
+  for (const auto &pair : nsyms)
+    {
+      enum language language = pair.first;
+      std::vector<symbol *> symlist = pair.second;
+
+      retval->dictionaries[idx++]
+	= dict_create_hashed_1 (obstack, language, symlist);
+    }
+
+  return retval;
+}
+
+/* See dictionary.h.  */
+
+struct multidictionary *
+mdict_create_hashed_expandable (enum language language)
+{
+  struct multidictionary *retval = XNEW (struct multidictionary);
+
+  /* We have no symbol list to populate, but we create an empty
+     dictionary of the requested language to populate later.  */
+  retval->n_allocated_dictionaries = 1;
+  retval->dictionaries = XNEW (struct dictionary *);
+  retval->dictionaries[0] = dict_create_hashed_expandable (language);
+
+  return retval;
+}
+
+/* See dictionary.h.  */
+
+struct multidictionary *
+mdict_create_linear (struct obstack *obstack,
+		     const struct pending *symbol_list)
+{
+  struct multidictionary *retval
+    = XOBNEW (obstack, struct multidictionary);
+  std::unordered_map<enum language, std::vector<symbol *>> nsyms
+    = collate_pending_symbols_by_language (symbol_list);
+
+  /* Loop over all languages and create/populate dictionaries.  */
+  retval->dictionaries
+    = XOBNEWVEC (obstack, struct dictionary *, nsyms.size ());
+  retval->n_allocated_dictionaries = nsyms.size ();
+
+  int idx = 0;
+  for (const auto &pair : nsyms)
+    {
+      enum language language = pair.first;
+      std::vector<symbol *> symlist = pair.second;
+
+      retval->dictionaries[idx++]
+	= dict_create_linear_1 (obstack, language, symlist);
+    }
+
+  return retval;
+}
+
+/* See dictionary.h.  */
+
+struct multidictionary *
+mdict_create_linear_expandable (enum language language)
+{
+  struct multidictionary *retval = XNEW (struct multidictionary);
+
+  /* We have no symbol list to populate, but we create an empty
+     dictionary to populate later.  */
+  retval->n_allocated_dictionaries = 1;
+  retval->dictionaries = XNEW (struct dictionary *);
+  retval->dictionaries[0] = dict_create_linear_expandable (language);
+
+  return retval;
+}
+
+/* See dictionary.h.  */
+
+void
+mdict_free (struct multidictionary *mdict)
+{
+  /* Grab the type of dictionary being used.  */
+  enum dict_type type = mdict->dictionaries[0]->vector->type;
+
+  /* Loop over all dictionaries and free them.  */
+  for (unsigned short idx = 0; idx < mdict->n_allocated_dictionaries; ++idx)
+    dict_free (mdict->dictionaries[idx]);
+
+  /* Free the dictionary list, if needed.  */
+  switch (type)
+    {
+    case DICT_HASHED:
+    case DICT_LINEAR:
+      /* Memory was allocated on an obstack when created.  */
+      break;
+
+    case DICT_HASHED_EXPANDABLE:
+    case DICT_LINEAR_EXPANDABLE:
+      xfree (mdict->dictionaries);
+      break;
+    }
+}
+
+/* Helper function to find the dictionary associated with LANGUAGE
+   or NULL if there is no dictionary of that language.  */
+
+static struct dictionary *
+find_language_dictionary (const struct multidictionary *mdict,
+			  enum language language)
+{
+  for (unsigned short idx = 0; idx < mdict->n_allocated_dictionaries; ++idx)
+    {
+      if (DICT_LANGUAGE (mdict->dictionaries[idx])->la_language == language)
+	return mdict->dictionaries[idx];
+    }
+
+  return nullptr;
+}
+
+/* Create a new language dictionary for LANGUAGE and add it to the
+   multidictionary MDICT's list of dictionaries.  If MDICT is not
+   based on expandable dictionaries, this function throws an
+   internal error.  */
+
+static struct dictionary *
+create_new_language_dictionary (struct multidictionary *mdict,
+				enum language language)
+{
+  struct dictionary *retval = nullptr;
+
+  /* We use the first dictionary entry to decide what create function
+     to call.  Not optimal but sufficient.  */
+  gdb_assert (mdict->dictionaries[0] != nullptr);
+  switch (mdict->dictionaries[0]->vector->type)
+    {
+    case DICT_HASHED:
+    case DICT_LINEAR:
+      internal_error (__FILE__, __LINE__,
+		      _("create_new_language_dictionary: attempted to expand "
+			"non-expandable multidictionary"));
+
+    case DICT_HASHED_EXPANDABLE:
+      retval = dict_create_hashed_expandable (language);
+      break;
+
+    case DICT_LINEAR_EXPANDABLE:
+      retval = dict_create_linear_expandable (language);
+      break;
+    }
+
+  /* Grow the dictionary vector and save the new dictionary.  */
+  mdict->dictionaries
+    = (struct dictionary **) xrealloc (mdict->dictionaries,
+				       (++mdict->n_allocated_dictionaries
+					* sizeof (struct dictionary *)));
+  mdict->dictionaries[mdict->n_allocated_dictionaries - 1] = retval;
+
+  return retval;
+}
+
+/* See dictionary.h.  */
+
+void
+mdict_add_symbol (struct multidictionary *mdict, struct symbol *sym)
+{
+  struct dictionary *dict
+    = find_language_dictionary (mdict, SYMBOL_LANGUAGE (sym));
+
+  if (dict == nullptr)
+    {
+      /* SYM is of a new language that we haven't previously seen.
+	 Create a new dictionary for it.  */
+      dict = create_new_language_dictionary (mdict, SYMBOL_LANGUAGE (sym));
+    }
+
+  dict_add_symbol (dict, sym);
+}
+
+/* See dictionary.h.  */
+
+void
+mdict_add_pending (struct multidictionary *mdict,
+		   const struct pending *symbol_list)
+{
+  std::unordered_map<enum language, std::vector<symbol *>> nsyms
+    = collate_pending_symbols_by_language (symbol_list);
+
+  for (const auto &pair : nsyms)
+    {
+      enum language language = pair.first;
+      std::vector<symbol *> symlist = pair.second;
+      struct dictionary *dict = find_language_dictionary (mdict, language);
+
+      if (dict == nullptr)
+	{
+	  /* The language was not previously seen.  Create a new dictionary
+	     for it.  */
+	  dict = create_new_language_dictionary (mdict, language);
+	}
+
+      dict_add_pending_1 (dict, symlist);
+    }
+}
+
+/* See dictionary.h.  */
+
+struct symbol *
+mdict_iterator_first (const multidictionary *mdict,
+		      struct mdict_iterator *miterator)
+{
+  miterator->mdict = mdict;
+  miterator->current_idx = 0;
+
+  for (unsigned short idx = miterator->current_idx;
+       idx < mdict->n_allocated_dictionaries; ++idx)
+    {
+      struct symbol *result
+	= dict_iterator_first (mdict->dictionaries[idx], &miterator->iterator);
+
+      if (result != nullptr)
+	{
+	  miterator->current_idx = idx;
+	  return result;
+	}
+    }
+
+  return nullptr;
+}
+
+/* See dictionary.h.  */
+
+struct symbol *
+mdict_iterator_next (struct mdict_iterator *miterator)
+{
+  struct symbol *result = dict_iterator_next (&miterator->iterator);
+
+  if (result != nullptr)
+    return result;
+
+  /* The current dictionary had no matches -- move to the next
+     dictionary, if any.  */
+  for (unsigned short idx = ++miterator->current_idx;
+       idx < miterator->mdict->n_allocated_dictionaries; ++idx)
+    {
+      result
+	= dict_iterator_first (miterator->mdict->dictionaries[idx],
+			       &miterator->iterator);
+      if (result != nullptr)
+	{
+	  miterator->current_idx = idx;
+	  return result;
+	}
+    }
+
+  return nullptr;
+}
+
+/* See dictionary.h.  */
+
+struct symbol *
+mdict_iter_match_first (const struct multidictionary *mdict,
+			const lookup_name_info &name,
+			struct mdict_iterator *miterator)
+{
+  miterator->mdict = mdict;
+  miterator->current_idx = 0;
+
+  for (unsigned short idx = miterator->current_idx;
+       idx < mdict->n_allocated_dictionaries; ++idx)
+    {
+      struct symbol *result
+	= dict_iter_match_first (mdict->dictionaries[idx], name,
+				 &miterator->iterator);
+
+      if (result != nullptr)
+	return result;
+    }
+
+  return nullptr;
+}
+
+/* See dictionary.h.  */
+
+struct symbol *
+mdict_iter_match_next (const lookup_name_info &name,
+		       struct mdict_iterator *miterator)
+{
+  /* Search the current dictionary.  */
+  struct symbol *result = dict_iter_match_next (name, &miterator->iterator);
+
+  if (result != nullptr)
+    return result;
+
+  /* The current dictionary had no matches -- move to the next
+     dictionary, if any.  */
+  for (unsigned short idx = ++miterator->current_idx;
+       idx < miterator->mdict->n_allocated_dictionaries; ++idx)
+    {
+      result
+	= dict_iter_match_first (miterator->mdict->dictionaries[idx],
+				 name, &miterator->iterator);
+      if (result != nullptr)
+	{
+	  miterator->current_idx = idx;
+	  return result;
+	}
+    }
+
+  return nullptr;
+}
+
+/* See dictionary.h.  */
+
+int
+mdict_size (const struct multidictionary *mdict)
+{
+  int size = 0;
+
+  for (unsigned short idx = 0; idx < mdict->n_allocated_dictionaries; ++idx)
+    size += dict_size (mdict->dictionaries[idx]);
+
+  return size;
+}
+
+/* See dictionary.h.  */
+
+bool
+mdict_empty (const struct multidictionary *mdict)
+{
+  for (unsigned short idx = 0; idx < mdict->n_allocated_dictionaries; ++idx)
+    {
+      if (!dict_empty (mdict->dictionaries[idx]))
+	return false;
+    }
+
+  return true;
+}
diff --git a/gdb/dictionary.h b/gdb/dictionary.h
--- a/gdb/dictionary.h
+++ b/gdb/dictionary.h
@@ -113,6 +113,21 @@ struct dict_iterator
   struct symbol *current;
 };
 
+/* The multi-language dictionary iterator.  Like dict_iterator above,
+   these contents should be considered private.  */
+
+struct mdict_iterator
+{
+  /* The multidictionary with whcih this iterator is associated.  */
+  const struct multidictionary *mdict;
+
+  /* The iterator used to iterate through individual dictionaries.  */
+  struct dict_iterator iterator;
+
+  /* The current index of the dictionary being iterated over.  */
+  unsigned short current_idx;
+};
+
 /* Initialize ITERATOR to point at the first symbol in DICT, and
    return that first symbol, or NULL if DICT is empty.  */