Karsten Hopp 81c43e
To: vim-dev@vim.org
Karsten Hopp 81c43e
Subject: Patch 7.2.312
Karsten Hopp 81c43e
Fcc: outbox
Karsten Hopp 81c43e
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp 81c43e
Mime-Version: 1.0
Karsten Hopp 81c43e
Content-Type: text/plain; charset=UTF-8
Karsten Hopp 81c43e
Content-Transfer-Encoding: 8bit
Karsten Hopp 81c43e
------------
Karsten Hopp 81c43e
Karsten Hopp 81c43e
Patch 7.2.312
Karsten Hopp 81c43e
Problem:    iconv() returns an invalid character sequence when conversion
Karsten Hopp 81c43e
	    fails.  It should return an empty string. (Yongwei Wu)
Karsten Hopp 81c43e
Solution:   Be more strict about invalid characters in the input.
Karsten Hopp 81c43e
Files:	    src/mbyte.c
Karsten Hopp 81c43e
Karsten Hopp 81c43e
Karsten Hopp 81c43e
*** ../vim-7.2.311/src/mbyte.c	2009-06-16 15:23:07.000000000 +0200
Karsten Hopp 81c43e
--- src/mbyte.c	2009-11-25 16:10:44.000000000 +0100
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 133,154 ****
Karsten Hopp 81c43e
  static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
Karsten Hopp 81c43e
  static int dbcs_ptr2char __ARGS((char_u *p));
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
! /* Lookup table to quickly get the length in bytes of a UTF-8 character from
Karsten Hopp 81c43e
!  * the first byte of a UTF-8 string.  Bytes which are illegal when used as the
Karsten Hopp 81c43e
!  * first byte have a one, because these will be used separately. */
Karsten Hopp 81c43e
  static char utf8len_tab[256] =
Karsten Hopp 81c43e
  {
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
!     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
Karsten Hopp 81c43e
!     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
Karsten Hopp 81c43e
      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
Karsten Hopp 81c43e
      3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
Karsten Hopp 81c43e
  };
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
  /*
Karsten Hopp 81c43e
   * XIM often causes trouble.  Define XIM_DEBUG to get a log of XIM callbacks
Karsten Hopp 81c43e
   * in the "xim.log" file.
Karsten Hopp 81c43e
   */
Karsten Hopp 81c43e
--- 133,172 ----
Karsten Hopp 81c43e
  static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
Karsten Hopp 81c43e
  static int dbcs_ptr2char __ARGS((char_u *p));
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
! /*
Karsten Hopp 81c43e
!  * Lookup table to quickly get the length in bytes of a UTF-8 character from
Karsten Hopp 81c43e
!  * the first byte of a UTF-8 string.
Karsten Hopp 81c43e
!  * Bytes which are illegal when used as the first byte have a 1.
Karsten Hopp 81c43e
!  * The NUL byte has length 1.
Karsten Hopp 81c43e
!  */
Karsten Hopp 81c43e
  static char utf8len_tab[256] =
Karsten Hopp 81c43e
  {
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
!     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
!     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
Karsten Hopp 81c43e
      3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
Karsten Hopp 81c43e
  };
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
  /*
Karsten Hopp 81c43e
+  * Like utf8len_tab above, but using a zero for illegal lead bytes.
Karsten Hopp 81c43e
+  */
Karsten Hopp 81c43e
+ static char utf8len_tab_zero[256] =
Karsten Hopp 81c43e
+ {
Karsten Hopp 81c43e
+     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
+     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
+     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
+     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
Karsten Hopp 81c43e
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
Karsten Hopp 81c43e
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
Karsten Hopp 81c43e
+     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
Karsten Hopp 81c43e
+     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
Karsten Hopp 81c43e
+ };
Karsten Hopp 81c43e
+ 
Karsten Hopp 81c43e
+ /*
Karsten Hopp 81c43e
   * XIM often causes trouble.  Define XIM_DEBUG to get a log of XIM callbacks
Karsten Hopp 81c43e
   * in the "xim.log" file.
Karsten Hopp 81c43e
   */
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 1352,1358 ****
Karsten Hopp 81c43e
      if (size > 0 && *p >= 0x80)
Karsten Hopp 81c43e
      {
Karsten Hopp 81c43e
  	if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
Karsten Hopp 81c43e
! 	    return 1;
Karsten Hopp 81c43e
  	c = utf_ptr2char(p);
Karsten Hopp 81c43e
  	/* An illegal byte is displayed as <xx>. */
Karsten Hopp 81c43e
  	if (utf_ptr2len(p) == 1 || c == NUL)
Karsten Hopp 81c43e
--- 1370,1376 ----
Karsten Hopp 81c43e
      if (size > 0 && *p >= 0x80)
Karsten Hopp 81c43e
      {
Karsten Hopp 81c43e
  	if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
Karsten Hopp 81c43e
! 	    return 1;  /* truncated */
Karsten Hopp 81c43e
  	c = utf_ptr2char(p);
Karsten Hopp 81c43e
  	/* An illegal byte is displayed as <xx>. */
Karsten Hopp 81c43e
  	if (utf_ptr2len(p) == 1 || c == NUL)
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 1473,1479 ****
Karsten Hopp 81c43e
      if (p[0] < 0x80)	/* be quick for ASCII */
Karsten Hopp 81c43e
  	return p[0];
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
!     len = utf8len_tab[p[0]];
Karsten Hopp 81c43e
      if (len > 1 && (p[1] & 0xc0) == 0x80)
Karsten Hopp 81c43e
      {
Karsten Hopp 81c43e
  	if (len == 2)
Karsten Hopp 81c43e
--- 1491,1497 ----
Karsten Hopp 81c43e
      if (p[0] < 0x80)	/* be quick for ASCII */
Karsten Hopp 81c43e
  	return p[0];
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
!     len = utf8len_tab_zero[p[0]];
Karsten Hopp 81c43e
      if (len > 1 && (p[1] & 0xc0) == 0x80)
Karsten Hopp 81c43e
      {
Karsten Hopp 81c43e
  	if (len == 2)
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 1723,1728 ****
Karsten Hopp 81c43e
--- 1741,1747 ----
Karsten Hopp 81c43e
  /*
Karsten Hopp 81c43e
   * Return length of UTF-8 character, obtained from the first byte.
Karsten Hopp 81c43e
   * "b" must be between 0 and 255!
Karsten Hopp 81c43e
+  * Returns 1 for an invalid first byte value.
Karsten Hopp 81c43e
   */
Karsten Hopp 81c43e
      int
Karsten Hopp 81c43e
  utf_byte2len(b)
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 1737,1742 ****
Karsten Hopp 81c43e
--- 1756,1762 ----
Karsten Hopp 81c43e
   * Returns 1 for "".
Karsten Hopp 81c43e
   * Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
Karsten Hopp 81c43e
   * Returns number > "size" for an incomplete byte sequence.
Karsten Hopp 81c43e
+  * Never returns zero.
Karsten Hopp 81c43e
   */
Karsten Hopp 81c43e
      int
Karsten Hopp 81c43e
  utf_ptr2len_len(p, size)
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 1747,1757 ****
Karsten Hopp 81c43e
      int		i;
Karsten Hopp 81c43e
      int		m;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
!     if (*p == NUL)
Karsten Hopp 81c43e
! 	return 1;
Karsten Hopp 81c43e
!     m = len = utf8len_tab[*p];
Karsten Hopp 81c43e
      if (len > size)
Karsten Hopp 81c43e
  	m = size;	/* incomplete byte sequence. */
Karsten Hopp 81c43e
      for (i = 1; i < m; ++i)
Karsten Hopp 81c43e
  	if ((p[i] & 0xc0) != 0x80)
Karsten Hopp 81c43e
  	    return 1;
Karsten Hopp 81c43e
--- 1767,1779 ----
Karsten Hopp 81c43e
      int		i;
Karsten Hopp 81c43e
      int		m;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
!     len = utf8len_tab[*p];
Karsten Hopp 81c43e
!     if (len == 1)
Karsten Hopp 81c43e
! 	return 1;	/* NUL, ascii or illegal lead byte */
Karsten Hopp 81c43e
      if (len > size)
Karsten Hopp 81c43e
  	m = size;	/* incomplete byte sequence. */
Karsten Hopp 81c43e
+     else
Karsten Hopp 81c43e
+ 	m = len;
Karsten Hopp 81c43e
      for (i = 1; i < m; ++i)
Karsten Hopp 81c43e
  	if ((p[i] & 0xc0) != 0x80)
Karsten Hopp 81c43e
  	    return 1;
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 2505,2510 ****
Karsten Hopp 81c43e
--- 2527,2533 ----
Karsten Hopp 81c43e
  /*
Karsten Hopp 81c43e
   * mb_head_off() function pointer.
Karsten Hopp 81c43e
   * Return offset from "p" to the first byte of the character it points into.
Karsten Hopp 81c43e
+  * If "p" points to the NUL at the end of the string return 0.
Karsten Hopp 81c43e
   * Returns 0 when already at the first byte of a character.
Karsten Hopp 81c43e
   */
Karsten Hopp 81c43e
      int
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 2524,2530 ****
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      /* It can't be a trailing byte when not using DBCS, at the start of the
Karsten Hopp 81c43e
       * string or the previous byte can't start a double-byte. */
Karsten Hopp 81c43e
!     if (p <= base || MB_BYTE2LEN(p[-1]) == 1)
Karsten Hopp 81c43e
  	return 0;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      /* This is slow: need to start at the base and go forward until the
Karsten Hopp 81c43e
--- 2547,2553 ----
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      /* It can't be a trailing byte when not using DBCS, at the start of the
Karsten Hopp 81c43e
       * string or the previous byte can't start a double-byte. */
Karsten Hopp 81c43e
!     if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
Karsten Hopp 81c43e
  	return 0;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      /* This is slow: need to start at the base and go forward until the
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 2552,2558 ****
Karsten Hopp 81c43e
       * lead byte in the current cell. */
Karsten Hopp 81c43e
      if (p <= base
Karsten Hopp 81c43e
  	    || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
Karsten Hopp 81c43e
! 	    || MB_BYTE2LEN(p[-1]) == 1)
Karsten Hopp 81c43e
  	return 0;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      /* This is slow: need to start at the base and go forward until the
Karsten Hopp 81c43e
--- 2575,2582 ----
Karsten Hopp 81c43e
       * lead byte in the current cell. */
Karsten Hopp 81c43e
      if (p <= base
Karsten Hopp 81c43e
  	    || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
Karsten Hopp 81c43e
! 	    || MB_BYTE2LEN(p[-1]) == 1
Karsten Hopp 81c43e
! 	    || *p == NUL)
Karsten Hopp 81c43e
  	return 0;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      /* This is slow: need to start at the base and go forward until the
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 2578,2583 ****
Karsten Hopp 81c43e
--- 2602,2608 ----
Karsten Hopp 81c43e
      char_u	*q;
Karsten Hopp 81c43e
      char_u	*s;
Karsten Hopp 81c43e
      int		c;
Karsten Hopp 81c43e
+     int		len;
Karsten Hopp 81c43e
  #ifdef FEAT_ARABIC
Karsten Hopp 81c43e
      char_u	*j;
Karsten Hopp 81c43e
  #endif
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 2597,2604 ****
Karsten Hopp 81c43e
  	    --q;
Karsten Hopp 81c43e
  	/* Check for illegal sequence. Do allow an illegal byte after where we
Karsten Hopp 81c43e
  	 * started. */
Karsten Hopp 81c43e
! 	if (utf8len_tab[*q] != (int)(s - q + 1)
Karsten Hopp 81c43e
! 				       && utf8len_tab[*q] != (int)(p - q + 1))
Karsten Hopp 81c43e
  	    return 0;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
  	if (q <= base)
Karsten Hopp 81c43e
--- 2622,2629 ----
Karsten Hopp 81c43e
  	    --q;
Karsten Hopp 81c43e
  	/* Check for illegal sequence. Do allow an illegal byte after where we
Karsten Hopp 81c43e
  	 * started. */
Karsten Hopp 81c43e
! 	len = utf8len_tab[*q];
Karsten Hopp 81c43e
! 	if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
Karsten Hopp 81c43e
  	    return 0;
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
  	if (q <= base)
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 2810,2818 ****
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      while (end == NULL ? *p != NUL : p < end)
Karsten Hopp 81c43e
      {
Karsten Hopp 81c43e
! 	if ((*p & 0xc0) == 0x80)
Karsten Hopp 81c43e
  	    return FALSE;	/* invalid lead byte */
Karsten Hopp 81c43e
- 	l = utf8len_tab[*p];
Karsten Hopp 81c43e
  	if (end != NULL && p + l > end)
Karsten Hopp 81c43e
  	    return FALSE;	/* incomplete byte sequence */
Karsten Hopp 81c43e
  	++p;
Karsten Hopp 81c43e
--- 2835,2843 ----
Karsten Hopp 81c43e
  
Karsten Hopp 81c43e
      while (end == NULL ? *p != NUL : p < end)
Karsten Hopp 81c43e
      {
Karsten Hopp 81c43e
! 	l = utf8len_tab_zero[*p];
Karsten Hopp 81c43e
! 	if (l == 0)
Karsten Hopp 81c43e
  	    return FALSE;	/* invalid lead byte */
Karsten Hopp 81c43e
  	if (end != NULL && p + l > end)
Karsten Hopp 81c43e
  	    return FALSE;	/* incomplete byte sequence */
Karsten Hopp 81c43e
  	++p;
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 6117,6128 ****
Karsten Hopp 81c43e
  	    d = retval;
Karsten Hopp 81c43e
  	    for (i = 0; i < len; ++i)
Karsten Hopp 81c43e
  	    {
Karsten Hopp 81c43e
! 		l = utf_ptr2len(ptr + i);
Karsten Hopp 81c43e
  		if (l == 0)
Karsten Hopp 81c43e
  		    *d++ = NUL;
Karsten Hopp 81c43e
  		else if (l == 1)
Karsten Hopp 81c43e
  		{
Karsten Hopp 81c43e
! 		    if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i)
Karsten Hopp 81c43e
  		    {
Karsten Hopp 81c43e
  			/* Incomplete sequence at the end. */
Karsten Hopp 81c43e
  			*unconvlenp = len - i;
Karsten Hopp 81c43e
--- 6142,6161 ----
Karsten Hopp 81c43e
  	    d = retval;
Karsten Hopp 81c43e
  	    for (i = 0; i < len; ++i)
Karsten Hopp 81c43e
  	    {
Karsten Hopp 81c43e
! 		l = utf_ptr2len_len(ptr + i, len - i);
Karsten Hopp 81c43e
  		if (l == 0)
Karsten Hopp 81c43e
  		    *d++ = NUL;
Karsten Hopp 81c43e
  		else if (l == 1)
Karsten Hopp 81c43e
  		{
Karsten Hopp 81c43e
! 		    int l_w = utf8len_tab_zero[ptr[i]];
Karsten Hopp 81c43e
! 
Karsten Hopp 81c43e
! 		    if (l_w == 0)
Karsten Hopp 81c43e
! 		    {
Karsten Hopp 81c43e
! 			/* Illegal utf-8 byte cannot be converted */
Karsten Hopp 81c43e
! 			vim_free(retval);
Karsten Hopp 81c43e
! 			return NULL;
Karsten Hopp 81c43e
! 		    }
Karsten Hopp 81c43e
! 		    if (unconvlenp != NULL && l_w > len - i)
Karsten Hopp 81c43e
  		    {
Karsten Hopp 81c43e
  			/* Incomplete sequence at the end. */
Karsten Hopp 81c43e
  			*unconvlenp = len - i;
Karsten Hopp 81c43e
*** ../vim-7.2.311/src/version.c	2009-12-02 13:32:10.000000000 +0100
Karsten Hopp 81c43e
--- src/version.c	2009-12-02 15:00:23.000000000 +0100
Karsten Hopp 81c43e
***************
Karsten Hopp 81c43e
*** 683,684 ****
Karsten Hopp 81c43e
--- 683,686 ----
Karsten Hopp 81c43e
  {   /* Add new patch number below this line */
Karsten Hopp 81c43e
+ /**/
Karsten Hopp 81c43e
+     312,
Karsten Hopp 81c43e
  /**/
Karsten Hopp 81c43e
Karsten Hopp 81c43e
-- 
Karsten Hopp 81c43e
hundred-and-one symptoms of being an internet addict:
Karsten Hopp 81c43e
6. You refuse to go to a vacation spot with no electricity and no phone lines.
Karsten Hopp 81c43e
Karsten Hopp 81c43e
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp 81c43e
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp 81c43e
\\\        download, build and distribute -- http://www.A-A-P.org        ///
Karsten Hopp 81c43e
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///