c6d234
commit 5aad5f617892e75d91d4c8fb7594ff35b610c042
c6d234
Author: Carlos O'Donell <carlos@redhat.com>
c6d234
Date:   Tue Jun 5 23:55:17 2018 -0400
c6d234
c6d234
    Improve DST handling (Bug 23102, Bug 21942, Bug 18018, Bug 23259).
c6d234
    
c6d234
    This commit improves DST handling significantly in the following
c6d234
    ways: firstly is_dst () is overhauled to correctly process DST
c6d234
    sequences that would be accepted given the ELF gABI.  This means that
c6d234
    we actually now accept slightly more sequences than before.  Now we
c6d234
    accept $ORIGIN$ORIGIN, but in the past we accepted only $ORIGIN\0 or
c6d234
    $ORIGIN/..., but this kind of behaviour results in unexpected
c6d234
    and uninterpreted DST sequences being used as literal search paths
c6d234
    leading to security defects.  Therefore the first step in correcting
c6d234
    this defect is making is_dst () properly account for all DSTs
c6d234
    and making the function context free in the sense that it counts
c6d234
    DSTs without knowledge of path, or AT_SECURE.  Next, _dl_dst_count ()
c6d234
    is also simplified to count all DSTs regardless of context.
c6d234
    Then in _dl_dst_substitute () we reintroduce context-dependent
c6d234
    processing for such things as AT_SECURE handling.  At the level of
c6d234
    _dl_dst_substitute we can have access to things like the true start
c6d234
    of the string sequence to validate $ORIGIN-based paths rooted in
c6d234
    trusted directories.  Lastly, we tighten up the accepted sequences
c6d234
    in AT_SECURE, and avoid leaving known unexpanded DSTs, this is
c6d234
    noted in the NEWS entry.
c6d234
    
c6d234
    Verified with a sequence of 68 tests on x86_64 that cover
c6d234
    non-AT_SECURE and AT_SECURE testing using a sysroot (requires root
c6d234
    to run).  The tests cover cases for bug 23102, bug 21942, bug 18018,
c6d234
    and bug 23259.  These tests are not yet appropriate for the glibc
c6d234
    regression testsuite, but with the upcoming test-in-container testing
c6d234
    framework it should be possible to include these tests upstream soon.
c6d234
    
c6d234
    See the mailing list for the tests:
c6d234
    https://www.sourceware.org/ml/libc-alpha/2018-06/msg00251.html
c6d234
c6d234
Index: glibc-2.17-c758a686/elf/dl-deps.c
c6d234
===================================================================
c6d234
--- glibc-2.17-c758a686.orig/elf/dl-deps.c
c6d234
+++ glibc-2.17-c758a686/elf/dl-deps.c
c6d234
@@ -101,7 +101,7 @@ struct list
c6d234
   ({									      \
c6d234
     const char *__str = (str);						      \
c6d234
     const char *__result = __str;					      \
c6d234
-    size_t __dst_cnt = DL_DST_COUNT (__str);				      \
c6d234
+    size_t __dst_cnt = _dl_dst_count (__str);				      \
c6d234
 									      \
c6d234
     if (__dst_cnt != 0)							      \
c6d234
       {									      \
c6d234
Index: glibc-2.17-c758a686/elf/dl-dst.h
c6d234
===================================================================
c6d234
--- glibc-2.17-c758a686.orig/elf/dl-dst.h
c6d234
+++ glibc-2.17-c758a686/elf/dl-dst.h
c6d234
@@ -18,19 +18,6 @@
c6d234
 
c6d234
 #include "trusted-dirs.h"
c6d234
 
c6d234
-/* Determine the number of DST elements in the name.  Only if IS_PATH is
c6d234
-   nonzero paths are recognized (i.e., multiple, ':' separated filenames).  */
c6d234
-#define DL_DST_COUNT(name) \
c6d234
-  ({									      \
c6d234
-    size_t __cnt = 0;							      \
c6d234
-    const char *__sf = strchr (name, '$');				      \
c6d234
-									      \
c6d234
-    if (__builtin_expect (__sf != NULL, 0))				      \
c6d234
-      __cnt = _dl_dst_count (__sf);					      \
c6d234
-									      \
c6d234
-    __cnt; })
c6d234
-
c6d234
-
c6d234
 #ifdef SHARED
c6d234
 # define IS_RTLD(l) (l) == &GL(dl_rtld_map)
c6d234
 #else
c6d234
Index: glibc-2.17-c758a686/elf/dl-load.c
c6d234
===================================================================
c6d234
--- glibc-2.17-c758a686.orig/elf/dl-load.c
c6d234
+++ glibc-2.17-c758a686/elf/dl-load.c
c6d234
@@ -174,12 +174,6 @@ is_trusted_path_normalize (const char *p
c6d234
   if (len == 0)
c6d234
     return false;
c6d234
 
c6d234
-  if (*path == ':')
c6d234
-    {
c6d234
-      ++path;
c6d234
-      --len;
c6d234
-    }
c6d234
-
c6d234
   char *npath = (char *) alloca (len + 2);
c6d234
   char *wnp = npath;
c6d234
   while (*path != '\0')
c6d234
@@ -230,120 +224,172 @@ is_trusted_path_normalize (const char *p
c6d234
   return false;
c6d234
 }
c6d234
 
c6d234
+/* Given a substring starting at INPUT, just after the DST '$' start
c6d234
+   token, determine if INPUT contains DST token REF, following the
c6d234
+   ELF gABI rules for DSTs:
c6d234
+
c6d234
+   * Longest possible sequence using the rules (greedy).
c6d234
+
c6d234
+   * Must start with a $ (enforced by caller).
c6d234
+
c6d234
+   * Must follow $ with one underscore or ASCII [A-Za-z] (caller
c6d234
+     follows these rules for REF) or '{' (start curly quoted name).
c6d234
+
c6d234
+   * Must follow first two characters with zero or more [A-Za-z0-9_]
c6d234
+     (enforced by caller) or '}' (end curly quoted name).
c6d234
 
c6d234
+   If the sequence is a DST matching REF then the length of the DST
c6d234
+   (excluding the $ sign but including curly braces, if any) is
c6d234
+   returned, otherwise 0.  */
c6d234
 static size_t
c6d234
-is_dst (const char *start, const char *name, const char *str, int secure)
c6d234
+is_dst (const char *input, const char *ref)
c6d234
 {
c6d234
-  size_t len;
c6d234
   bool is_curly = false;
c6d234
 
c6d234
-  if (name[0] == '{')
c6d234
+  /* Is a ${...} input sequence?  */
c6d234
+  if (input[0] == '{')
c6d234
     {
c6d234
       is_curly = true;
c6d234
-      ++name;
c6d234
+      ++input;
c6d234
     }
c6d234
 
c6d234
-  len = 0;
c6d234
-  while (name[len] == str[len] && name[len] != '\0')
c6d234
-    ++len;
c6d234
-
c6d234
-  if (is_curly)
c6d234
-    {
c6d234
-      if (name[len] != '}')
c6d234
-	return 0;
c6d234
-
c6d234
-      /* Point again at the beginning of the name.  */
c6d234
-      --name;
c6d234
-      /* Skip over closing curly brace and adjust for the --name.  */
c6d234
-      len += 2;
c6d234
-    }
c6d234
-  else if (name[len] != '\0' && name[len] != '/')
c6d234
-    return 0;
c6d234
-
c6d234
-  if (__builtin_expect (secure, 0)
c6d234
-      && ((name[len] != '\0' && name[len] != '/')
c6d234
-	  || (name != start + 1)))
c6d234
+  /* Check for matching name, following closing curly brace (if
c6d234
+     required), or trailing characters which are part of an
c6d234
+     identifier.  */
c6d234
+  size_t rlen = strlen (ref);
c6d234
+  if (strncmp (input, ref, rlen) != 0
c6d234
+      || (is_curly && input[rlen] != '}')
c6d234
+      || ((input[rlen] >= 'A' && input[rlen] <= 'Z')
c6d234
+	  || (input[rlen] >= 'a' && input[rlen] <= 'z')
c6d234
+	  || (input[rlen] >= '0' && input[rlen] <= '9')
c6d234
+	  || (input[rlen] == '_')))
c6d234
     return 0;
c6d234
 
c6d234
-  return len;
c6d234
+  if (is_curly)
c6d234
+    /* Count the two curly braces.  */
c6d234
+    return rlen + 2;
c6d234
+  else
c6d234
+    return rlen;
c6d234
 }
c6d234
 
c6d234
-
c6d234
+/* INPUT is the start of a DST sequence at the first '$' occurrence.
c6d234
+   If there is a DST we call into _dl_dst_count to count the number of
c6d234
+   DSTs.  We count all known DSTs regardless of __libc_enable_secure;
c6d234
+   the caller is responsible for enforcing the security of the
c6d234
+   substitution rules (usually _dl_dst_substitute).  */
c6d234
 size_t
c6d234
-_dl_dst_count (const char *name)
c6d234
+_dl_dst_count (const char *input)
c6d234
 {
c6d234
-  const char *const start = name;
c6d234
   size_t cnt = 0;
c6d234
 
c6d234
+  input = strchr (input, '$');
c6d234
+
c6d234
+  /* Most likely there is no DST.  */
c6d234
+  if (__glibc_likely (input == NULL))
c6d234
+    return 0;
c6d234
+
c6d234
   do
c6d234
     {
c6d234
       size_t len;
c6d234
 
c6d234
-      /* $ORIGIN is not expanded for SUID/GUID programs (except if it
c6d234
-	 is $ORIGIN alone) and it must always appear first in path.  */
c6d234
-      ++name;
c6d234
-      if ((len = is_dst (start, name, "ORIGIN", INTUSE(__libc_enable_secure))) != 0
c6d234
-	  || (len = is_dst (start, name, "PLATFORM", 0)) != 0
c6d234
-	  || (len = is_dst (start, name, "LIB", 0)) != 0)
c6d234
+      ++input;
c6d234
+      /* All DSTs must follow ELF gABI rules, see is_dst ().  */
c6d234
+      if ((len = is_dst (input, "ORIGIN")) != 0
c6d234
+	  || (len = is_dst (input, "PLATFORM")) != 0
c6d234
+	  || (len = is_dst (input, "LIB")) != 0)
c6d234
 	++cnt;
c6d234
 
c6d234
-      name = strchr (name + len, '$');
c6d234
+      /* There may be more than one DST in the input.  */
c6d234
+      input = strchr (input + len, '$');
c6d234
     }
c6d234
-  while (name != NULL);
c6d234
+  while (input != NULL);
c6d234
 
c6d234
   return cnt;
c6d234
 }
c6d234
 
c6d234
-
c6d234
+/* Process INPUT for DSTs and store in RESULT using the information
c6d234
+   from link map L to resolve the DSTs. This function only handles one
c6d234
+   path at a time and does not handle colon-separated path lists (see
c6d234
+   fillin_rpath ()).  Lastly the size of result in bytes should be at
c6d234
+   least equal to the value returned by DL_DST_REQUIRED.  Note that it
c6d234
+   is possible for a DT_NEEDED, DT_AUXILIARY, and DT_FILTER entries to
c6d234
+   have colons, but we treat those as literal colons here, not as path
c6d234
+   list delimeters.  */
c6d234
 char *
c6d234
-_dl_dst_substitute (struct link_map *l, const char *name, char *result)
c6d234
+_dl_dst_substitute (struct link_map *l, const char *input, char *result)
c6d234
 {
c6d234
-  const char *const start = name;
c6d234
-
c6d234
-  /* Now fill the result path.  While copying over the string we keep
c6d234
-     track of the start of the last path element.  When we come accross
c6d234
-     a DST we copy over the value or (if the value is not available)
c6d234
-     leave the entire path element out.  */
c6d234
+  /* Copy character-by-character from input into the working pointer
c6d234
+     looking for any DSTs.  We track the start of input and if we are
c6d234
+     going to check for trusted paths, all of which are part of $ORIGIN
c6d234
+     handling in SUID/SGID cases (see below).  In some cases, like when
c6d234
+     a DST cannot be replaced, we may set result to an empty string and
c6d234
+     return.  */
c6d234
   char *wp = result;
c6d234
-  char *last_elem = result;
c6d234
+  const char *start = input;
c6d234
   bool check_for_trusted = false;
c6d234
 
c6d234
   do
c6d234
     {
c6d234
-      if (__builtin_expect (*name == '$', 0))
c6d234
+      if (__glibc_unlikely (*input == '$'))
c6d234
 	{
c6d234
 	  const char *repl = NULL;
c6d234
 	  size_t len;
c6d234
 
c6d234
-	  ++name;
c6d234
-	  if ((len = is_dst (start, name, "ORIGIN", INTUSE(__libc_enable_secure))) != 0)
c6d234
+	  ++input;
c6d234
+	  if ((len = is_dst (input, "ORIGIN")) != 0)
c6d234
 	    {
c6d234
-#ifndef SHARED
c6d234
-	      if (l == NULL)
c6d234
-		repl = _dl_get_origin ();
c6d234
+	      /* For SUID/GUID programs we normally ignore the path with
c6d234
+		 $ORIGIN in DT_RUNPATH, or DT_RPATH.  However, there is
c6d234
+		 one exception to this rule, and it is:
c6d234
+
c6d234
+		   * $ORIGIN appears as the first path element, and is
c6d234
+		     the only string in the path or is immediately
c6d234
+		     followed by a path separator and the rest of the
c6d234
+		     path.
c6d234
+
c6d234
+		   * The path is rooted in a trusted directory.
c6d234
+
c6d234
+		 This exception allows such programs to reference
c6d234
+		 shared libraries in subdirectories of trusted
c6d234
+		 directories.  The use case is one of general
c6d234
+		 organization and deployment flexibility.
c6d234
+		 Trusted directories are usually such paths as "/lib64"
c6d234
+		 or "/usr/lib64", and the usual RPATHs take the form of
c6d234
+		 [$ORIGIN/../$LIB/somedir].  */
c6d234
+	      if (__glibc_unlikely (__libc_enable_secure)
c6d234
+		  && !(input == start + 1
c6d234
+		       && (input[len] == '\0' || input[len] == '/')))
c6d234
+		repl = (const char *) -1;
c6d234
 	      else
c6d234
+		{
c6d234
+#ifndef SHARED
c6d234
+		  if (l == NULL)
c6d234
+		    repl = _dl_get_origin ();
c6d234
+		  else
c6d234
 #endif
c6d234
-		repl = l->l_origin;
c6d234
+		    repl = l->l_origin;
c6d234
+		}
c6d234
 
c6d234
 	      check_for_trusted = (INTUSE(__libc_enable_secure)
c6d234
 				   && l->l_type == lt_executable);
c6d234
 	    }
c6d234
-	  else if ((len = is_dst (start, name, "PLATFORM", 0)) != 0)
c6d234
+	  else if ((len = is_dst (input, "PLATFORM")) != 0)
c6d234
 	    repl = GLRO(dl_platform);
c6d234
-	  else if ((len = is_dst (start, name, "LIB", 0)) != 0)
c6d234
+	  else if ((len = is_dst (input, "LIB")) != 0)
c6d234
 	    repl = DL_DST_LIB;
c6d234
 
c6d234
 	  if (repl != NULL && repl != (const char *) -1)
c6d234
 	    {
c6d234
 	      wp = __stpcpy (wp, repl);
c6d234
-	      name += len;
c6d234
+	      input += len;
c6d234
 	    }
c6d234
-	  else if (len > 1)
c6d234
+	  else if (len != 0)
c6d234
 	    {
c6d234
-	      /* We cannot use this path element, the value of the
c6d234
-		 replacement is unknown.  */
c6d234
-	      wp = last_elem;
c6d234
-	      break;
c6d234
+	      /* We found a valid DST that we know about, but we could
c6d234
+	         not find a replacement value for it, therefore we
c6d234
+		 cannot use this path and discard it.  */
c6d234
+	      *result = '\0';
c6d234
+	      return result;
c6d234
 	    }
c6d234
 	  else
c6d234
 	    /* No DST we recognize.  */
c6d234
@@ -351,16 +397,26 @@ _dl_dst_substitute (struct link_map *l,
c6d234
 	}
c6d234
       else
c6d234
 	{
c6d234
-	  *wp++ = *name++;
c6d234
+	  *wp++ = *input++;
c6d234
 	}
c6d234
     }
c6d234
-  while (*name != '\0');
c6d234
+  while (*input != '\0');
c6d234
 
c6d234
   /* In SUID/SGID programs, after $ORIGIN expansion the normalized
c6d234
-     path must be rooted in one of the trusted directories.  */
c6d234
-  if (__builtin_expect (check_for_trusted, false)
c6d234
-      && !is_trusted_path_normalize (last_elem, wp - last_elem))
c6d234
-    wp = last_elem;
c6d234
+     path must be rooted in one of the trusted directories.  The $LIB
c6d234
+     and $PLATFORM DST cannot in any way be manipulated by the caller
c6d234
+     because they are fixed values that are set by the dynamic loader
c6d234
+     and therefore any paths using just $LIB or $PLATFORM need not be
c6d234
+     checked for trust, the authors of the binaries themselves are
c6d234
+     trusted to have designed this correctly.  Only $ORIGIN is tested in
c6d234
+     this way because it may be manipulated in some ways with hard
c6d234
+     links.  */
c6d234
+  if (__glibc_unlikely (check_for_trusted)
c6d234
+      && !is_trusted_path_normalize (result, wp - result))
c6d234
+    {
c6d234
+      *result = '\0';
c6d234
+      return result;
c6d234
+    }
c6d234
 
c6d234
   *wp = '\0';
c6d234
 
c6d234
@@ -368,13 +424,13 @@ _dl_dst_substitute (struct link_map *l,
c6d234
 }
c6d234
 
c6d234
 
c6d234
-/* Return copy of argument with all recognized dynamic string tokens
c6d234
-   ($ORIGIN and $PLATFORM for now) replaced.  On some platforms it
c6d234
-   might not be possible to determine the path from which the object
c6d234
-   belonging to the map is loaded.  In this case the path element
c6d234
-   containing $ORIGIN is left out.  */
c6d234
+/* Return a malloc allocated copy of INPUT with all recognized DSTs
c6d234
+   replaced. On some platforms it might not be possible to determine the
c6d234
+   path from which the object belonging to the map is loaded.  In this
c6d234
+   case the path containing the DST is left out.  On error NULL
c6d234
+   is returned.  */
c6d234
 static char *
c6d234
-expand_dynamic_string_token (struct link_map *l, const char *s)
c6d234
+expand_dynamic_string_token (struct link_map *l, const char *input)
c6d234
 {
c6d234
   /* We make two runs over the string.  First we determine how large the
c6d234
      resulting string is and then we copy it over.  Since this is no
c6d234
@@ -384,22 +440,22 @@ expand_dynamic_string_token (struct link
c6d234
   size_t total;
c6d234
   char *result;
c6d234
 
c6d234
-  /* Determine the number of DST elements.  */
c6d234
-  cnt = DL_DST_COUNT (s);
c6d234
+  /* Determine the number of DSTs.  */
c6d234
+  cnt = _dl_dst_count (input);
c6d234
 
c6d234
   /* If we do not have to replace anything simply copy the string.  */
c6d234
   if (__builtin_expect (cnt, 0) == 0)
c6d234
-    return local_strdup (s);
c6d234
+    return local_strdup (input);
c6d234
 
c6d234
   /* Determine the length of the substituted string.  */
c6d234
-  total = DL_DST_REQUIRED (l, s, strlen (s), cnt);
c6d234
+  total = DL_DST_REQUIRED (l, input, strlen (input), cnt);
c6d234
 
c6d234
   /* Allocate the necessary memory.  */
c6d234
   result = (char *) malloc (total + 1);
c6d234
   if (result == NULL)
c6d234
     return NULL;
c6d234
 
c6d234
-  return _dl_dst_substitute (l, s, result);
c6d234
+  return _dl_dst_substitute (l, input, result);
c6d234
 }
c6d234
 
c6d234