76b6d9
commit c6fad4fa149485a307207f707e5851216f190fc8
76b6d9
Author: Florian Weimer <fweimer@redhat.com>
76b6d9
Date:   Thu Mar 19 18:32:28 2020 -0300
76b6d9
76b6d9
    stdio: Remove memory leak from multibyte convertion [BZ#25691]
76b6d9
    
76b6d9
    This is an updated version of a previous patch [1] with the
76b6d9
    following changes:
76b6d9
    
76b6d9
      - Use compiler overflow builtins on done_add_func function.
76b6d9
      - Define the scratch +utstring_converted_wide_string using
76b6d9
        CHAR_T.
76b6d9
      - Added a testcase and mention the bug report.
76b6d9
    
76b6d9
    Both default and wide printf functions might leak memory when
76b6d9
    manipulate multibyte characters conversion depending of the size
76b6d9
    of the input (whether __libc_use_alloca trigger or not the fallback
76b6d9
    heap allocation).
76b6d9
    
76b6d9
    This patch fixes it by removing the extra memory allocation on
76b6d9
    string formatting with conversion parts.
76b6d9
    
76b6d9
    The testcase uses input argument size that trigger memory leaks
76b6d9
    on unpatched code (using a scratch buffer the threashold to use
76b6d9
    heap allocation is lower).
76b6d9
    
76b6d9
    Checked on x86_64-linux-gnu and i686-linux-gnu.
76b6d9
    
76b6d9
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
76b6d9
    
76b6d9
    [1] https://sourceware.org/pipermail/libc-alpha/2017-June/082098.html
76b6d9
    
76b6d9
    (cherry picked from commit 3cc4a8367c23582b7db14cf4e150e4068b7fd461)
76b6d9
76b6d9
diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
76b6d9
index ae412e4b8444aea2..dab56b6ba2c7bdbe 100644
76b6d9
--- a/stdio-common/vfprintf.c
76b6d9
+++ b/stdio-common/vfprintf.c
76b6d9
@@ -31,6 +31,7 @@
76b6d9
 #include <locale/localeinfo.h>
76b6d9
 #include <stdio.h>
76b6d9
 #include <scratch_buffer.h>
76b6d9
+#include <intprops.h>
76b6d9
 
76b6d9
 /* This code is shared between the standard stdio implementation found
76b6d9
    in GNU C library and the libio implementation originally found in
76b6d9
@@ -64,23 +65,40 @@
76b6d9
     } while (0)
76b6d9
 #define UNBUFFERED_P(S) ((S)->_flags & _IO_UNBUFFERED)
76b6d9
 
76b6d9
-#define done_add(val) \
76b6d9
-  do {									      \
76b6d9
-    unsigned int _val = val;						      \
76b6d9
-    assert ((unsigned int) done < (unsigned int) INT_MAX);		      \
76b6d9
-    if (__glibc_unlikely (INT_MAX - done < _val))			      \
76b6d9
-      {									      \
76b6d9
-	done = -1;							      \
76b6d9
-	 __set_errno (EOVERFLOW);					      \
76b6d9
-	goto all_done;							      \
76b6d9
-      }									      \
76b6d9
-    done += _val;							      \
76b6d9
-  } while (0)
76b6d9
+/* Add LENGTH to DONE.  Return the new value of DONE, or -1 on
76b6d9
+   overflow (and set errno accordingly).  */
76b6d9
+static inline int
76b6d9
+done_add_func (size_t length, int done)
76b6d9
+{
76b6d9
+  if (done < 0)
76b6d9
+    return done;
76b6d9
+  int ret;
76b6d9
+  if (INT_ADD_WRAPV (done, length, &ret))
76b6d9
+    {
76b6d9
+      __set_errno (EOVERFLOW);
76b6d9
+      return -1;
76b6d9
+    }
76b6d9
+  return ret;
76b6d9
+}
76b6d9
+
76b6d9
+#define done_add(val)							\
76b6d9
+  do									\
76b6d9
+    {									\
76b6d9
+      /* Ensure that VAL has a type similar to int.  */			\
76b6d9
+      _Static_assert (sizeof (val) == sizeof (int), "value int size");	\
76b6d9
+      _Static_assert ((__typeof__ (val)) -1 < 0, "value signed");	\
76b6d9
+      done = done_add_func ((val), done);				\
76b6d9
+      if (done < 0)							\
76b6d9
+	goto all_done;							\
76b6d9
+    }									\
76b6d9
+  while (0)
76b6d9
 
76b6d9
 #ifndef COMPILE_WPRINTF
76b6d9
 # define vfprintf	_IO_vfprintf_internal
76b6d9
 # define CHAR_T		char
76b6d9
+# define CHAR_T		char
76b6d9
 # define UCHAR_T	unsigned char
76b6d9
+# define OTHER_CHAR_T   wchar_t
76b6d9
 # define INT_T		int
76b6d9
 typedef const char *THOUSANDS_SEP_T;
76b6d9
 # define L_(Str)	Str
76b6d9
@@ -88,22 +106,10 @@ typedef const char *THOUSANDS_SEP_T;
76b6d9
 # define STR_LEN(Str)	strlen (Str)
76b6d9
 
76b6d9
 # define PUT(F, S, N)	_IO_sputn ((F), (S), (N))
76b6d9
-# define PAD(Padchar) \
76b6d9
-  do {									      \
76b6d9
-    if (width > 0)							      \
76b6d9
-      {									      \
76b6d9
-	ssize_t written = _IO_padn (s, (Padchar), width);		      \
76b6d9
-	if (__glibc_unlikely (written != width))			      \
76b6d9
-	  {								      \
76b6d9
-	    done = -1;							      \
76b6d9
-	    goto all_done;						      \
76b6d9
-	  }								      \
76b6d9
-	done_add (written);						      \
76b6d9
-      }									      \
76b6d9
-  } while (0)
76b6d9
 # define PUTC(C, F)	_IO_putc_unlocked (C, F)
76b6d9
 # define ORIENT		if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\
76b6d9
 			  return -1
76b6d9
+# define CONVERT_FROM_OTHER_STRING __wcsrtombs
76b6d9
 #else
76b6d9
 # define vfprintf	_IO_vfwprintf
76b6d9
 # define CHAR_T		wchar_t
76b6d9
@@ -118,21 +124,11 @@ typedef wchar_t THOUSANDS_SEP_T;
76b6d9
 # include <_itowa.h>
76b6d9
 
76b6d9
 # define PUT(F, S, N)	_IO_sputn ((F), (S), (N))
76b6d9
-# define PAD(Padchar) \
76b6d9
-  do {									      \
76b6d9
-    if (width > 0)							      \
76b6d9
-      {									      \
76b6d9
-	ssize_t written = _IO_wpadn (s, (Padchar), width);		      \
76b6d9
-	if (__glibc_unlikely (written != width))			      \
76b6d9
-	  {								      \
76b6d9
-	    done = -1;							      \
76b6d9
-	    goto all_done;						      \
76b6d9
-	  }								      \
76b6d9
-	done_add (written);						      \
76b6d9
-      }									      \
76b6d9
-  } while (0)
76b6d9
 # define PUTC(C, F)	_IO_putwc_unlocked (C, F)
76b6d9
 # define ORIENT		if (_IO_fwide (s, 1) != 1) return -1
76b6d9
+# define CONVERT_FROM_OTHER_STRING __mbsrtowcs
76b6d9
+# define CHAR_T		wchar_t
76b6d9
+# define OTHER_CHAR_T   char
76b6d9
 
76b6d9
 # undef _itoa
76b6d9
 # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case)
76b6d9
@@ -141,6 +137,33 @@ typedef wchar_t THOUSANDS_SEP_T;
76b6d9
 # define EOF WEOF
76b6d9
 #endif
76b6d9
 
76b6d9
+static inline int
76b6d9
+pad_func (FILE *s, CHAR_T padchar, int width, int done)
76b6d9
+{
76b6d9
+  if (width > 0)
76b6d9
+    {
76b6d9
+      ssize_t written;
76b6d9
+#ifndef COMPILE_WPRINTF
76b6d9
+      written = _IO_padn (s, padchar, width);
76b6d9
+#else
76b6d9
+      written = _IO_wpadn (s, padchar, width);
76b6d9
+#endif
76b6d9
+      if (__glibc_unlikely (written != width))
76b6d9
+	return -1;
76b6d9
+      return done_add_func (width, done);
76b6d9
+    }
76b6d9
+  return done;
76b6d9
+}
76b6d9
+
76b6d9
+#define PAD(Padchar)							\
76b6d9
+  do									\
76b6d9
+    {									\
76b6d9
+      done = pad_func (s, (Padchar), width, done);			\
76b6d9
+      if (done < 0)							\
76b6d9
+	goto all_done;							\
76b6d9
+    }									\
76b6d9
+  while (0)
76b6d9
+
76b6d9
 #include "_i18n_number.h"
76b6d9
 
76b6d9
 /* Include the shared code for parsing the format string.  */
76b6d9
@@ -160,24 +183,115 @@ typedef wchar_t THOUSANDS_SEP_T;
76b6d9
     }									      \
76b6d9
   while (0)
76b6d9
 
76b6d9
-#define outstring(String, Len)						      \
76b6d9
-  do									      \
76b6d9
-    {									      \
76b6d9
-      assert ((size_t) done <= (size_t) INT_MAX);			      \
76b6d9
-      if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len))		      \
76b6d9
-	{								      \
76b6d9
-	  done = -1;							      \
76b6d9
-	  goto all_done;						      \
76b6d9
-	}								      \
76b6d9
-      if (__glibc_unlikely (INT_MAX - done < (Len)))			      \
76b6d9
-      {									      \
76b6d9
-	done = -1;							      \
76b6d9
-	 __set_errno (EOVERFLOW);					      \
76b6d9
-	goto all_done;							      \
76b6d9
-      }									      \
76b6d9
-      done += (Len);							      \
76b6d9
-    }									      \
76b6d9
-  while (0)
76b6d9
+static inline int
76b6d9
+outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done)
76b6d9
+{
76b6d9
+  assert ((size_t) done <= (size_t) INT_MAX);
76b6d9
+  if ((size_t) PUT (s, string, length) != (size_t) (length))
76b6d9
+    return -1;
76b6d9
+  return done_add_func (length, done);
76b6d9
+}
76b6d9
+
76b6d9
+#define outstring(String, Len)						\
76b6d9
+  do									\
76b6d9
+    {									\
76b6d9
+      const void *string_ = (String);					\
76b6d9
+      done = outstring_func (s, string_, (Len), done);			\
76b6d9
+      if (done < 0)							\
76b6d9
+	goto all_done;							\
76b6d9
+    }									\
76b6d9
+   while (0)
76b6d9
+
76b6d9
+/* Write the string SRC to S.  If PREC is non-negative, write at most
76b6d9
+   PREC bytes.  If LEFT is true, perform left justification.  */
76b6d9
+static int
76b6d9
+outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec,
76b6d9
+				 int width, bool left, int done)
76b6d9
+{
76b6d9
+  /* Use a small buffer to combine processing of multiple characters.
76b6d9
+     CONVERT_FROM_OTHER_STRING expects the buffer size in (wide)
76b6d9
+     characters, and buf_length counts that.  */
76b6d9
+  enum { buf_length = 256 / sizeof (CHAR_T) };
76b6d9
+  CHAR_T buf[buf_length];
76b6d9
+  _Static_assert (sizeof (buf) > MB_LEN_MAX,
76b6d9
+		  "buffer is large enough for a single multi-byte character");
76b6d9
+
76b6d9
+  /* Add the initial padding if needed.  */
76b6d9
+  if (width > 0 && !left)
76b6d9
+    {
76b6d9
+      /* Make a first pass to find the output width, so that we can
76b6d9
+	 add the required padding.  */
76b6d9
+      mbstate_t mbstate = { 0 };
76b6d9
+      const OTHER_CHAR_T *src_copy = src;
76b6d9
+      size_t total_written;
76b6d9
+      if (prec < 0)
76b6d9
+	total_written = CONVERT_FROM_OTHER_STRING
76b6d9
+	  (NULL, &src_copy, 0, &mbstate);
76b6d9
+      else
76b6d9
+	{
76b6d9
+	  /* The source might not be null-terminated.  Enforce the
76b6d9
+	     limit manually, based on the output length.  */
76b6d9
+	  total_written = 0;
76b6d9
+	  size_t limit = prec;
76b6d9
+	  while (limit > 0 && src_copy != NULL)
76b6d9
+	    {
76b6d9
+	      size_t write_limit = buf_length;
76b6d9
+	      if (write_limit > limit)
76b6d9
+		write_limit = limit;
76b6d9
+	      size_t written = CONVERT_FROM_OTHER_STRING
76b6d9
+		(buf, &src_copy, write_limit, &mbstate);
76b6d9
+	      if (written == (size_t) -1)
76b6d9
+		return -1;
76b6d9
+	      if (written == 0)
76b6d9
+		break;
76b6d9
+	      total_written += written;
76b6d9
+	      limit -= written;
76b6d9
+	    }
76b6d9
+	}
76b6d9
+
76b6d9
+      /* Output initial padding.  */
76b6d9
+      if (total_written < width)
76b6d9
+	{
76b6d9
+	  done = pad_func (s, L_(' '), width - total_written, done);
76b6d9
+	  if (done < 0)
76b6d9
+	    return done;
76b6d9
+	}
76b6d9
+    }
76b6d9
+
76b6d9
+  /* Convert the input string, piece by piece.  */
76b6d9
+  size_t total_written = 0;
76b6d9
+  {
76b6d9
+    mbstate_t mbstate = { 0 };
76b6d9
+    /* If prec is negative, remaining is not decremented, otherwise,
76b6d9
+      it serves as the write limit.  */
76b6d9
+    size_t remaining = -1;
76b6d9
+    if (prec >= 0)
76b6d9
+      remaining = prec;
76b6d9
+    while (remaining > 0 && src != NULL)
76b6d9
+      {
76b6d9
+	size_t write_limit = buf_length;
76b6d9
+	if (remaining < write_limit)
76b6d9
+	  write_limit = remaining;
76b6d9
+	size_t written = CONVERT_FROM_OTHER_STRING
76b6d9
+	  (buf, &src, write_limit, &mbstate);
76b6d9
+	if (written == (size_t) -1)
76b6d9
+	  return -1;
76b6d9
+	if (written == 0)
76b6d9
+	  break;
76b6d9
+	done = outstring_func (s, (const UCHAR_T *) buf, written, done);
76b6d9
+	if (done < 0)
76b6d9
+	  return done;
76b6d9
+	total_written += written;
76b6d9
+	if (prec >= 0)
76b6d9
+	  remaining -= written;
76b6d9
+      }
76b6d9
+  }
76b6d9
+
76b6d9
+  /* Add final padding.  */
76b6d9
+  if (width > 0 && left && total_written < width)
76b6d9
+    return pad_func (s, L_(' '), width - total_written, done);
76b6d9
+  return done;
76b6d9
+}
76b6d9
 
76b6d9
 /* For handling long_double and longlong we use the same flag.  If
76b6d9
    `long' and `long long' are effectively the same type define it to
76b6d9
@@ -975,7 +1089,6 @@ static const uint8_t jump_table[] =
76b6d9
     LABEL (form_string):						      \
76b6d9
       {									      \
76b6d9
 	size_t len;							      \
76b6d9
-	int string_malloced;						      \
76b6d9
 									      \
76b6d9
 	/* The string argument could in fact be `char *' or `wchar_t *'.      \
76b6d9
 	   But this should not make a difference here.  */		      \
76b6d9
@@ -987,7 +1100,6 @@ static const uint8_t jump_table[] =
76b6d9
 	/* Entry point for printing other strings.  */			      \
76b6d9
       LABEL (print_string):						      \
76b6d9
 									      \
76b6d9
-	string_malloced = 0;						      \
76b6d9
 	if (string == NULL)						      \
76b6d9
 	  {								      \
76b6d9
 	    /* Write "(null)" if there's space.  */			      \
76b6d9
@@ -1004,41 +1116,12 @@ static const uint8_t jump_table[] =
76b6d9
 	  }								      \
76b6d9
 	else if (!is_long && spec != L_('S'))				      \
76b6d9
 	  {								      \
76b6d9
-	    /* This is complicated.  We have to transform the multibyte	      \
76b6d9
-	       string into a wide character string.  */			      \
76b6d9
-	    const char *mbs = (const char *) string;			      \
76b6d9
-	    mbstate_t mbstate;						      \
76b6d9
-									      \
76b6d9
-	    len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \
76b6d9
-									      \
76b6d9
-	    /* Allocate dynamically an array which definitely is long	      \
76b6d9
-	       enough for the wide character version.  Each byte in the	      \
76b6d9
-	       multi-byte string can produce at most one wide character.  */  \
76b6d9
-	    if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t)))	      \
76b6d9
-	      {								      \
76b6d9
-		__set_errno (EOVERFLOW);				      \
76b6d9
-		done = -1;						      \
76b6d9
-		goto all_done;						      \
76b6d9
-	      }								      \
76b6d9
-	    else if (__libc_use_alloca (len * sizeof (wchar_t)))	      \
76b6d9
-	      string = (CHAR_T *) alloca (len * sizeof (wchar_t));	      \
76b6d9
-	    else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t)))    \
76b6d9
-		     == NULL)						      \
76b6d9
-	      {								      \
76b6d9
-		done = -1;						      \
76b6d9
-		goto all_done;						      \
76b6d9
-	      }								      \
76b6d9
-	    else							      \
76b6d9
-	      string_malloced = 1;					      \
76b6d9
-									      \
76b6d9
-	    memset (&mbstate, '\0', sizeof (mbstate_t));		      \
76b6d9
-	    len = __mbsrtowcs (string, &mbs, len, &mbstate);		      \
76b6d9
-	    if (len == (size_t) -1)					      \
76b6d9
-	      {								      \
76b6d9
-		/* Illegal multibyte character.  */			      \
76b6d9
-		done = -1;						      \
76b6d9
-		goto all_done;						      \
76b6d9
-	      }								      \
76b6d9
+	    done = outstring_converted_wide_string			      \
76b6d9
+	      (s, (const char *) string, prec, width, left, done);	      \
76b6d9
+	    if (done < 0)						      \
76b6d9
+	      goto all_done;						      \
76b6d9
+	    /* The padding has already been written.  */		      \
76b6d9
+	    break;							      \
76b6d9
 	  }								      \
76b6d9
 	else								      \
76b6d9
 	  {								      \
76b6d9
@@ -1061,8 +1144,6 @@ static const uint8_t jump_table[] =
76b6d9
 	outstring (string, len);					      \
76b6d9
 	if (left)							      \
76b6d9
 	  PAD (L' ');							      \
76b6d9
-	if (__glibc_unlikely (string_malloced))				      \
76b6d9
-	  free (string);						      \
76b6d9
       }									      \
76b6d9
       break;
76b6d9
 #else
76b6d9
@@ -1111,7 +1192,6 @@ static const uint8_t jump_table[] =
76b6d9
     LABEL (form_string):						      \
76b6d9
       {									      \
76b6d9
 	size_t len;							      \
76b6d9
-	int string_malloced;						      \
76b6d9
 									      \
76b6d9
 	/* The string argument could in fact be `char *' or `wchar_t *'.      \
76b6d9
 	   But this should not make a difference here.  */		      \
76b6d9
@@ -1123,7 +1203,6 @@ static const uint8_t jump_table[] =
76b6d9
 	/* Entry point for printing other strings.  */			      \
76b6d9
       LABEL (print_string):						      \
76b6d9
 									      \
76b6d9
-	string_malloced = 0;						      \
76b6d9
 	if (string == NULL)						      \
76b6d9
 	  {								      \
76b6d9
 	    /* Write "(null)" if there's space.  */			      \
76b6d9
@@ -1149,51 +1228,12 @@ static const uint8_t jump_table[] =
76b6d9
 	  }								      \
76b6d9
 	else								      \
76b6d9
 	  {								      \
76b6d9
-	    const wchar_t *s2 = (const wchar_t *) string;		      \
76b6d9
-	    mbstate_t mbstate;						      \
76b6d9
-									      \
76b6d9
-	    memset (&mbstate, '\0', sizeof (mbstate_t));		      \
76b6d9
-									      \
76b6d9
-	    if (prec >= 0)						      \
76b6d9
-	      {								      \
76b6d9
-		/* The string `s2' might not be NUL terminated.  */	      \
76b6d9
-		if (__libc_use_alloca (prec))				      \
76b6d9
-		  string = (char *) alloca (prec);			      \
76b6d9
-		else if ((string = (char *) malloc (prec)) == NULL)	      \
76b6d9
-		  {							      \
76b6d9
-		    done = -1;						      \
76b6d9
-		    goto all_done;					      \
76b6d9
-		  }							      \
76b6d9
-		else							      \
76b6d9
-		  string_malloced = 1;					      \
76b6d9
-		len = __wcsrtombs (string, &s2, prec, &mbstate);	      \
76b6d9
-	      }								      \
76b6d9
-	    else							      \
76b6d9
-	      {								      \
76b6d9
-		len = __wcsrtombs (NULL, &s2, 0, &mbstate);		      \
76b6d9
-		if (len != (size_t) -1)					      \
76b6d9
-		  {							      \
76b6d9
-		    assert (__mbsinit (&mbstate));			      \
76b6d9
-		    s2 = (const wchar_t *) string;			      \
76b6d9
-		    if (__libc_use_alloca (len + 1))			      \
76b6d9
-		      string = (char *) alloca (len + 1);		      \
76b6d9
-		    else if ((string = (char *) malloc (len + 1)) == NULL)    \
76b6d9
-		      {							      \
76b6d9
-			done = -1;					      \
76b6d9
-			goto all_done;					      \
76b6d9
-		      }							      \
76b6d9
-		    else						      \
76b6d9
-		      string_malloced = 1;				      \
76b6d9
-		    (void) __wcsrtombs (string, &s2, len + 1, &mbstate);      \
76b6d9
-		  }							      \
76b6d9
-	      }								      \
76b6d9
-									      \
76b6d9
-	    if (len == (size_t) -1)					      \
76b6d9
-	      {								      \
76b6d9
-		/* Illegal wide-character string.  */			      \
76b6d9
-		done = -1;						      \
76b6d9
-		goto all_done;						      \
76b6d9
-	      }								      \
76b6d9
+	    done = outstring_converted_wide_string			      \
76b6d9
+	      (s, (const wchar_t *) string, prec, width, left, done);	      \
76b6d9
+	    if (done < 0)						      \
76b6d9
+	      goto all_done;						      \
76b6d9
+	    /* The padding has already been written.  */		      \
76b6d9
+	    break;							      \
76b6d9
 	  }								      \
76b6d9
 									      \
76b6d9
 	if ((width -= len) < 0)						      \
76b6d9
@@ -1207,8 +1247,6 @@ static const uint8_t jump_table[] =
76b6d9
 	outstring (string, len);					      \
76b6d9
 	if (left)							      \
76b6d9
 	  PAD (' ');							      \
76b6d9
-	if (__glibc_unlikely (string_malloced))			              \
76b6d9
-	  free (string);						      \
76b6d9
       }									      \
76b6d9
       break;
76b6d9
 #endif