00db10
commit 5aad5f617892e75d91d4c8fb7594ff35b610c042
00db10
Author: Carlos O'Donell <carlos@redhat.com>
00db10
Date:   Tue Jun 5 23:55:17 2018 -0400
00db10
00db10
    Improve DST handling (Bug 23102, Bug 21942, Bug 18018, Bug 23259).
00db10
    
00db10
    This commit improves DST handling significantly in the following
00db10
    ways: firstly is_dst () is overhauled to correctly process DST
00db10
    sequences that would be accepted given the ELF gABI.  This means that
00db10
    we actually now accept slightly more sequences than before.  Now we
00db10
    accept $ORIGIN$ORIGIN, but in the past we accepted only $ORIGIN\0 or
00db10
    $ORIGIN/..., but this kind of behaviour results in unexpected
00db10
    and uninterpreted DST sequences being used as literal search paths
00db10
    leading to security defects.  Therefore the first step in correcting
00db10
    this defect is making is_dst () properly account for all DSTs
00db10
    and making the function context free in the sense that it counts
00db10
    DSTs without knowledge of path, or AT_SECURE.  Next, _dl_dst_count ()
00db10
    is also simplified to count all DSTs regardless of context.
00db10
    Then in _dl_dst_substitute () we reintroduce context-dependent
00db10
    processing for such things as AT_SECURE handling.  At the level of
00db10
    _dl_dst_substitute we can have access to things like the true start
00db10
    of the string sequence to validate $ORIGIN-based paths rooted in
00db10
    trusted directories.  Lastly, we tighten up the accepted sequences
00db10
    in AT_SECURE, and avoid leaving known unexpanded DSTs, this is
00db10
    noted in the NEWS entry.
00db10
    
00db10
    Verified with a sequence of 68 tests on x86_64 that cover
00db10
    non-AT_SECURE and AT_SECURE testing using a sysroot (requires root
00db10
    to run).  The tests cover cases for bug 23102, bug 21942, bug 18018,
00db10
    and bug 23259.  These tests are not yet appropriate for the glibc
00db10
    regression testsuite, but with the upcoming test-in-container testing
00db10
    framework it should be possible to include these tests upstream soon.
00db10
    
00db10
    See the mailing list for the tests:
00db10
    https://www.sourceware.org/ml/libc-alpha/2018-06/msg00251.html
00db10
00db10
Index: glibc-2.17-c758a686/elf/dl-deps.c
00db10
===================================================================
00db10
--- glibc-2.17-c758a686.orig/elf/dl-deps.c
00db10
+++ glibc-2.17-c758a686/elf/dl-deps.c
00db10
@@ -101,7 +101,7 @@ struct list
00db10
   ({									      \
00db10
     const char *__str = (str);						      \
00db10
     const char *__result = __str;					      \
00db10
-    size_t __dst_cnt = DL_DST_COUNT (__str);				      \
00db10
+    size_t __dst_cnt = _dl_dst_count (__str);				      \
00db10
 									      \
00db10
     if (__dst_cnt != 0)							      \
00db10
       {									      \
00db10
Index: glibc-2.17-c758a686/elf/dl-dst.h
00db10
===================================================================
00db10
--- glibc-2.17-c758a686.orig/elf/dl-dst.h
00db10
+++ glibc-2.17-c758a686/elf/dl-dst.h
00db10
@@ -18,19 +18,6 @@
00db10
 
00db10
 #include "trusted-dirs.h"
00db10
 
00db10
-/* Determine the number of DST elements in the name.  Only if IS_PATH is
00db10
-   nonzero paths are recognized (i.e., multiple, ':' separated filenames).  */
00db10
-#define DL_DST_COUNT(name) \
00db10
-  ({									      \
00db10
-    size_t __cnt = 0;							      \
00db10
-    const char *__sf = strchr (name, '$');				      \
00db10
-									      \
00db10
-    if (__builtin_expect (__sf != NULL, 0))				      \
00db10
-      __cnt = _dl_dst_count (__sf);					      \
00db10
-									      \
00db10
-    __cnt; })
00db10
-
00db10
-
00db10
 #ifdef SHARED
00db10
 # define IS_RTLD(l) (l) == &GL(dl_rtld_map)
00db10
 #else
00db10
Index: glibc-2.17-c758a686/elf/dl-load.c
00db10
===================================================================
00db10
--- glibc-2.17-c758a686.orig/elf/dl-load.c
00db10
+++ glibc-2.17-c758a686/elf/dl-load.c
00db10
@@ -174,12 +174,6 @@ is_trusted_path_normalize (const char *p
00db10
   if (len == 0)
00db10
     return false;
00db10
 
00db10
-  if (*path == ':')
00db10
-    {
00db10
-      ++path;
00db10
-      --len;
00db10
-    }
00db10
-
00db10
   char *npath = (char *) alloca (len + 2);
00db10
   char *wnp = npath;
00db10
   while (*path != '\0')
00db10
@@ -230,120 +224,172 @@ is_trusted_path_normalize (const char *p
00db10
   return false;
00db10
 }
00db10
 
00db10
+/* Given a substring starting at INPUT, just after the DST '$' start
00db10
+   token, determine if INPUT contains DST token REF, following the
00db10
+   ELF gABI rules for DSTs:
00db10
+
00db10
+   * Longest possible sequence using the rules (greedy).
00db10
+
00db10
+   * Must start with a $ (enforced by caller).
00db10
+
00db10
+   * Must follow $ with one underscore or ASCII [A-Za-z] (caller
00db10
+     follows these rules for REF) or '{' (start curly quoted name).
00db10
+
00db10
+   * Must follow first two characters with zero or more [A-Za-z0-9_]
00db10
+     (enforced by caller) or '}' (end curly quoted name).
00db10
 
00db10
+   If the sequence is a DST matching REF then the length of the DST
00db10
+   (excluding the $ sign but including curly braces, if any) is
00db10
+   returned, otherwise 0.  */
00db10
 static size_t
00db10
-is_dst (const char *start, const char *name, const char *str, int secure)
00db10
+is_dst (const char *input, const char *ref)
00db10
 {
00db10
-  size_t len;
00db10
   bool is_curly = false;
00db10
 
00db10
-  if (name[0] == '{')
00db10
+  /* Is a ${...} input sequence?  */
00db10
+  if (input[0] == '{')
00db10
     {
00db10
       is_curly = true;
00db10
-      ++name;
00db10
+      ++input;
00db10
     }
00db10
 
00db10
-  len = 0;
00db10
-  while (name[len] == str[len] && name[len] != '\0')
00db10
-    ++len;
00db10
-
00db10
-  if (is_curly)
00db10
-    {
00db10
-      if (name[len] != '}')
00db10
-	return 0;
00db10
-
00db10
-      /* Point again at the beginning of the name.  */
00db10
-      --name;
00db10
-      /* Skip over closing curly brace and adjust for the --name.  */
00db10
-      len += 2;
00db10
-    }
00db10
-  else if (name[len] != '\0' && name[len] != '/')
00db10
-    return 0;
00db10
-
00db10
-  if (__builtin_expect (secure, 0)
00db10
-      && ((name[len] != '\0' && name[len] != '/')
00db10
-	  || (name != start + 1)))
00db10
+  /* Check for matching name, following closing curly brace (if
00db10
+     required), or trailing characters which are part of an
00db10
+     identifier.  */
00db10
+  size_t rlen = strlen (ref);
00db10
+  if (strncmp (input, ref, rlen) != 0
00db10
+      || (is_curly && input[rlen] != '}')
00db10
+      || ((input[rlen] >= 'A' && input[rlen] <= 'Z')
00db10
+	  || (input[rlen] >= 'a' && input[rlen] <= 'z')
00db10
+	  || (input[rlen] >= '0' && input[rlen] <= '9')
00db10
+	  || (input[rlen] == '_')))
00db10
     return 0;
00db10
 
00db10
-  return len;
00db10
+  if (is_curly)
00db10
+    /* Count the two curly braces.  */
00db10
+    return rlen + 2;
00db10
+  else
00db10
+    return rlen;
00db10
 }
00db10
 
00db10
-
00db10
+/* INPUT is the start of a DST sequence at the first '$' occurrence.
00db10
+   If there is a DST we call into _dl_dst_count to count the number of
00db10
+   DSTs.  We count all known DSTs regardless of __libc_enable_secure;
00db10
+   the caller is responsible for enforcing the security of the
00db10
+   substitution rules (usually _dl_dst_substitute).  */
00db10
 size_t
00db10
-_dl_dst_count (const char *name)
00db10
+_dl_dst_count (const char *input)
00db10
 {
00db10
-  const char *const start = name;
00db10
   size_t cnt = 0;
00db10
 
00db10
+  input = strchr (input, '$');
00db10
+
00db10
+  /* Most likely there is no DST.  */
00db10
+  if (__glibc_likely (input == NULL))
00db10
+    return 0;
00db10
+
00db10
   do
00db10
     {
00db10
       size_t len;
00db10
 
00db10
-      /* $ORIGIN is not expanded for SUID/GUID programs (except if it
00db10
-	 is $ORIGIN alone) and it must always appear first in path.  */
00db10
-      ++name;
00db10
-      if ((len = is_dst (start, name, "ORIGIN", INTUSE(__libc_enable_secure))) != 0
00db10
-	  || (len = is_dst (start, name, "PLATFORM", 0)) != 0
00db10
-	  || (len = is_dst (start, name, "LIB", 0)) != 0)
00db10
+      ++input;
00db10
+      /* All DSTs must follow ELF gABI rules, see is_dst ().  */
00db10
+      if ((len = is_dst (input, "ORIGIN")) != 0
00db10
+	  || (len = is_dst (input, "PLATFORM")) != 0
00db10
+	  || (len = is_dst (input, "LIB")) != 0)
00db10
 	++cnt;
00db10
 
00db10
-      name = strchr (name + len, '$');
00db10
+      /* There may be more than one DST in the input.  */
00db10
+      input = strchr (input + len, '$');
00db10
     }
00db10
-  while (name != NULL);
00db10
+  while (input != NULL);
00db10
 
00db10
   return cnt;
00db10
 }
00db10
 
00db10
-
00db10
+/* Process INPUT for DSTs and store in RESULT using the information
00db10
+   from link map L to resolve the DSTs. This function only handles one
00db10
+   path at a time and does not handle colon-separated path lists (see
00db10
+   fillin_rpath ()).  Lastly the size of result in bytes should be at
00db10
+   least equal to the value returned by DL_DST_REQUIRED.  Note that it
00db10
+   is possible for a DT_NEEDED, DT_AUXILIARY, and DT_FILTER entries to
00db10
+   have colons, but we treat those as literal colons here, not as path
00db10
+   list delimeters.  */
00db10
 char *
00db10
-_dl_dst_substitute (struct link_map *l, const char *name, char *result)
00db10
+_dl_dst_substitute (struct link_map *l, const char *input, char *result)
00db10
 {
00db10
-  const char *const start = name;
00db10
-
00db10
-  /* Now fill the result path.  While copying over the string we keep
00db10
-     track of the start of the last path element.  When we come accross
00db10
-     a DST we copy over the value or (if the value is not available)
00db10
-     leave the entire path element out.  */
00db10
+  /* Copy character-by-character from input into the working pointer
00db10
+     looking for any DSTs.  We track the start of input and if we are
00db10
+     going to check for trusted paths, all of which are part of $ORIGIN
00db10
+     handling in SUID/SGID cases (see below).  In some cases, like when
00db10
+     a DST cannot be replaced, we may set result to an empty string and
00db10
+     return.  */
00db10
   char *wp = result;
00db10
-  char *last_elem = result;
00db10
+  const char *start = input;
00db10
   bool check_for_trusted = false;
00db10
 
00db10
   do
00db10
     {
00db10
-      if (__builtin_expect (*name == '$', 0))
00db10
+      if (__glibc_unlikely (*input == '$'))
00db10
 	{
00db10
 	  const char *repl = NULL;
00db10
 	  size_t len;
00db10
 
00db10
-	  ++name;
00db10
-	  if ((len = is_dst (start, name, "ORIGIN", INTUSE(__libc_enable_secure))) != 0)
00db10
+	  ++input;
00db10
+	  if ((len = is_dst (input, "ORIGIN")) != 0)
00db10
 	    {
00db10
-#ifndef SHARED
00db10
-	      if (l == NULL)
00db10
-		repl = _dl_get_origin ();
00db10
+	      /* For SUID/GUID programs we normally ignore the path with
00db10
+		 $ORIGIN in DT_RUNPATH, or DT_RPATH.  However, there is
00db10
+		 one exception to this rule, and it is:
00db10
+
00db10
+		   * $ORIGIN appears as the first path element, and is
00db10
+		     the only string in the path or is immediately
00db10
+		     followed by a path separator and the rest of the
00db10
+		     path.
00db10
+
00db10
+		   * The path is rooted in a trusted directory.
00db10
+
00db10
+		 This exception allows such programs to reference
00db10
+		 shared libraries in subdirectories of trusted
00db10
+		 directories.  The use case is one of general
00db10
+		 organization and deployment flexibility.
00db10
+		 Trusted directories are usually such paths as "/lib64"
00db10
+		 or "/usr/lib64", and the usual RPATHs take the form of
00db10
+		 [$ORIGIN/../$LIB/somedir].  */
00db10
+	      if (__glibc_unlikely (__libc_enable_secure)
00db10
+		  && !(input == start + 1
00db10
+		       && (input[len] == '\0' || input[len] == '/')))
00db10
+		repl = (const char *) -1;
00db10
 	      else
00db10
+		{
00db10
+#ifndef SHARED
00db10
+		  if (l == NULL)
00db10
+		    repl = _dl_get_origin ();
00db10
+		  else
00db10
 #endif
00db10
-		repl = l->l_origin;
00db10
+		    repl = l->l_origin;
00db10
+		}
00db10
 
00db10
 	      check_for_trusted = (INTUSE(__libc_enable_secure)
00db10
 				   && l->l_type == lt_executable);
00db10
 	    }
00db10
-	  else if ((len = is_dst (start, name, "PLATFORM", 0)) != 0)
00db10
+	  else if ((len = is_dst (input, "PLATFORM")) != 0)
00db10
 	    repl = GLRO(dl_platform);
00db10
-	  else if ((len = is_dst (start, name, "LIB", 0)) != 0)
00db10
+	  else if ((len = is_dst (input, "LIB")) != 0)
00db10
 	    repl = DL_DST_LIB;
00db10
 
00db10
 	  if (repl != NULL && repl != (const char *) -1)
00db10
 	    {
00db10
 	      wp = __stpcpy (wp, repl);
00db10
-	      name += len;
00db10
+	      input += len;
00db10
 	    }
00db10
-	  else if (len > 1)
00db10
+	  else if (len != 0)
00db10
 	    {
00db10
-	      /* We cannot use this path element, the value of the
00db10
-		 replacement is unknown.  */
00db10
-	      wp = last_elem;
00db10
-	      break;
00db10
+	      /* We found a valid DST that we know about, but we could
00db10
+	         not find a replacement value for it, therefore we
00db10
+		 cannot use this path and discard it.  */
00db10
+	      *result = '\0';
00db10
+	      return result;
00db10
 	    }
00db10
 	  else
00db10
 	    /* No DST we recognize.  */
00db10
@@ -351,16 +397,26 @@ _dl_dst_substitute (struct link_map *l,
00db10
 	}
00db10
       else
00db10
 	{
00db10
-	  *wp++ = *name++;
00db10
+	  *wp++ = *input++;
00db10
 	}
00db10
     }
00db10
-  while (*name != '\0');
00db10
+  while (*input != '\0');
00db10
 
00db10
   /* In SUID/SGID programs, after $ORIGIN expansion the normalized
00db10
-     path must be rooted in one of the trusted directories.  */
00db10
-  if (__builtin_expect (check_for_trusted, false)
00db10
-      && !is_trusted_path_normalize (last_elem, wp - last_elem))
00db10
-    wp = last_elem;
00db10
+     path must be rooted in one of the trusted directories.  The $LIB
00db10
+     and $PLATFORM DST cannot in any way be manipulated by the caller
00db10
+     because they are fixed values that are set by the dynamic loader
00db10
+     and therefore any paths using just $LIB or $PLATFORM need not be
00db10
+     checked for trust, the authors of the binaries themselves are
00db10
+     trusted to have designed this correctly.  Only $ORIGIN is tested in
00db10
+     this way because it may be manipulated in some ways with hard
00db10
+     links.  */
00db10
+  if (__glibc_unlikely (check_for_trusted)
00db10
+      && !is_trusted_path_normalize (result, wp - result))
00db10
+    {
00db10
+      *result = '\0';
00db10
+      return result;
00db10
+    }
00db10
 
00db10
   *wp = '\0';
00db10
 
00db10
@@ -368,13 +424,13 @@ _dl_dst_substitute (struct link_map *l,
00db10
 }
00db10
 
00db10
 
00db10
-/* Return copy of argument with all recognized dynamic string tokens
00db10
-   ($ORIGIN and $PLATFORM for now) replaced.  On some platforms it
00db10
-   might not be possible to determine the path from which the object
00db10
-   belonging to the map is loaded.  In this case the path element
00db10
-   containing $ORIGIN is left out.  */
00db10
+/* Return a malloc allocated copy of INPUT with all recognized DSTs
00db10
+   replaced. On some platforms it might not be possible to determine the
00db10
+   path from which the object belonging to the map is loaded.  In this
00db10
+   case the path containing the DST is left out.  On error NULL
00db10
+   is returned.  */
00db10
 static char *
00db10
-expand_dynamic_string_token (struct link_map *l, const char *s)
00db10
+expand_dynamic_string_token (struct link_map *l, const char *input)
00db10
 {
00db10
   /* We make two runs over the string.  First we determine how large the
00db10
      resulting string is and then we copy it over.  Since this is no
00db10
@@ -384,22 +440,22 @@ expand_dynamic_string_token (struct link
00db10
   size_t total;
00db10
   char *result;
00db10
 
00db10
-  /* Determine the number of DST elements.  */
00db10
-  cnt = DL_DST_COUNT (s);
00db10
+  /* Determine the number of DSTs.  */
00db10
+  cnt = _dl_dst_count (input);
00db10
 
00db10
   /* If we do not have to replace anything simply copy the string.  */
00db10
   if (__builtin_expect (cnt, 0) == 0)
00db10
-    return local_strdup (s);
00db10
+    return local_strdup (input);
00db10
 
00db10
   /* Determine the length of the substituted string.  */
00db10
-  total = DL_DST_REQUIRED (l, s, strlen (s), cnt);
00db10
+  total = DL_DST_REQUIRED (l, input, strlen (input), cnt);
00db10
 
00db10
   /* Allocate the necessary memory.  */
00db10
   result = (char *) malloc (total + 1);
00db10
   if (result == NULL)
00db10
     return NULL;
00db10
 
00db10
-  return _dl_dst_substitute (l, s, result);
00db10
+  return _dl_dst_substitute (l, input, result);
00db10
 }
00db10
 
00db10