Blob Blame History Raw
To: vim-dev@vim.org
Subject: Patch 7.1.310
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
------------

Patch 7.1.310
Problem:    Incomplete utf-8 byte sequence at end of the file is not detected.
	    Accessing memory that wasn't written.
Solution:   Check the last bytes in the buffer for being a valid utf-8
	    character. (mostly by Ben Schmidt)
	    Also fix that the reported line number of the error was wrong.
Files:	    src/fileio.c


*** ../vim-7.1.309/src/fileio.c	Wed May  7 19:05:55 2008
--- src/fileio.c	Wed Jun  4 18:28:48 2008
***************
*** 1288,1299 ****
  #ifdef FEAT_MBYTE
  		    else if (conv_restlen > 0)
  		    {
! 			/* Reached end-of-file but some trailing bytes could
! 			 * not be converted.  Truncated file? */
! 			if (conv_error == 0)
! 			    conv_error = linecnt;
! 			if (bad_char_behavior != BAD_DROP)
  			{
  			    fio_flags = 0;	/* don't convert this */
  # ifdef USE_ICONV
  			    if (iconv_fd != (iconv_t)-1)
--- 1288,1336 ----
  #ifdef FEAT_MBYTE
  		    else if (conv_restlen > 0)
  		    {
! 			/*
! 			 * Reached end-of-file but some trailing bytes could
! 			 * not be converted.  Truncated file?
! 			 */
! 
! 			/* When we did a conversion report an error. */
! 			if (fio_flags != 0
! # ifdef USE_ICONV
! 				|| iconv_fd != (iconv_t)-1
! # endif
! 			   )
  			{
+ 			    if (conv_error == 0)
+ 				conv_error = curbuf->b_ml.ml_line_count
+ 								- linecnt + 1;
+ 			}
+ 			/* Remember the first linenr with an illegal byte */
+ 			else if (illegal_byte == 0)
+ 			    illegal_byte = curbuf->b_ml.ml_line_count
+ 								- linecnt + 1;
+ 			if (bad_char_behavior == BAD_DROP)
+ 			{
+ 			    *(ptr - conv_restlen) = NUL;
+ 			    conv_restlen = 0;
+ 			}
+ 			else
+ 			{
+ 			    /* Replace the trailing bytes with the replacement
+ 			     * character if we were converting; if we weren't,
+ 			     * leave the UTF8 checking code to do it, as it
+ 			     * works slightly differently. */
+ 			    if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
+ # ifdef USE_ICONV
+ 				    || iconv_fd != (iconv_t)-1
+ # endif
+ 			       ))
+ 			    {
+ 				while (conv_restlen > 0)
+ 				{
+ 				    *(--ptr) = bad_char_behavior;
+ 				    --conv_restlen;
+ 				}
+ 			    }
  			    fio_flags = 0;	/* don't convert this */
  # ifdef USE_ICONV
  			    if (iconv_fd != (iconv_t)-1)
***************
*** 1302,1321 ****
  				iconv_fd = (iconv_t)-1;
  			    }
  # endif
- 			    if (bad_char_behavior == BAD_KEEP)
- 			    {
- 				/* Keep the trailing bytes as-is. */
- 				size = conv_restlen;
- 				ptr -= conv_restlen;
- 			    }
- 			    else
- 			    {
- 				/* Replace the trailing bytes with the
- 				 * replacement character. */
- 				size = 1;
- 				*--ptr = bad_char_behavior;
- 			    }
- 			    conv_restlen = 0;
  			}
  		    }
  #endif
--- 1339,1344 ----
***************
*** 1397,1402 ****
--- 1420,1430 ----
  		    goto retry;
  		}
  	    }
+ 
+ 	    /* Include not converted bytes. */
+ 	    ptr -= conv_restlen;
+ 	    size += conv_restlen;
+ 	    conv_restlen = 0;
  #endif
  	    /*
  	     * Break here for a read error or end-of-file.
***************
*** 1406,1416 ****
  
  #ifdef FEAT_MBYTE
  
- 	    /* Include not converted bytes. */
- 	    ptr -= conv_restlen;
- 	    size += conv_restlen;
- 	    conv_restlen = 0;
- 
  # ifdef USE_ICONV
  	    if (iconv_fd != (iconv_t)-1)
  	    {
--- 1434,1439 ----
***************
*** 1872,1883 ****
  		size = (long)((ptr + real_size) - dest);
  		ptr = dest;
  	    }
! 	    else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
  	    {
! 		/* Reading UTF-8: Check if the bytes are valid UTF-8.
! 		 * Need to start before "ptr" when part of the character was
! 		 * read in the previous read() call. */
! 		for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
  		{
  		    int	 todo = (int)((ptr + size) - p);
  		    int	 l;
--- 1895,1906 ----
  		size = (long)((ptr + real_size) - dest);
  		ptr = dest;
  	    }
! 	    else if (enc_utf8 && !curbuf->b_p_bin)
  	    {
! 		int  incomplete_tail = FALSE;
! 
! 		/* Reading UTF-8: Check if the bytes are valid UTF-8. */
! 		for (p = ptr; ; ++p)
  		{
  		    int	 todo = (int)((ptr + size) - p);
  		    int	 l;
***************
*** 1891,1933 ****
  			 * read() will get the next bytes, we'll check it
  			 * then. */
  			l = utf_ptr2len_len(p, todo);
! 			if (l > todo)
  			{
! 			    /* Incomplete byte sequence, the next read()
! 			     * should get them and check the bytes. */
! 			    p += todo;
! 			    break;
  			}
! 			if (l == 1)
  			{
  			    /* Illegal byte.  If we can try another encoding
! 			     * do that. */
! 			    if (can_retry)
  				break;
- 
- 			    /* Remember the first linenr with an illegal byte */
- 			    if (illegal_byte == 0)
- 				illegal_byte = readfile_linenr(linecnt, ptr, p);
  # ifdef USE_ICONV
  			    /* When we did a conversion report an error. */
  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
  				conv_error = readfile_linenr(linecnt, ptr, p);
  # endif
  
  			    /* Drop, keep or replace the bad byte. */
  			    if (bad_char_behavior == BAD_DROP)
  			    {
! 				mch_memmove(p, p+1, todo - 1);
  				--p;
  				--size;
  			    }
  			    else if (bad_char_behavior != BAD_KEEP)
  				*p = bad_char_behavior;
  			}
! 			p += l - 1;
  		    }
  		}
! 		if (p < ptr + size)
  		{
  		    /* Detected a UTF-8 error. */
  rewind_retry:
--- 1914,1969 ----
  			 * read() will get the next bytes, we'll check it
  			 * then. */
  			l = utf_ptr2len_len(p, todo);
! 			if (l > todo && !incomplete_tail)
  			{
! 			    /* Avoid retrying with a different encoding when
! 			     * a truncated file is more likely, or attempting
! 			     * to read the rest of an incomplete sequence when
! 			     * we have already done so. */
! 			    if (p > ptr || filesize > 0)
! 				incomplete_tail = TRUE;
! 			    /* Incomplete byte sequence, move it to conv_rest[]
! 			     * and try to read the rest of it, unless we've
! 			     * already done so. */
! 			    if (p > ptr)
! 			    {
! 				conv_restlen = todo;
! 				mch_memmove(conv_rest, p, conv_restlen);
! 				size -= conv_restlen;
! 				break;
! 			    }
  			}
! 			if (l == 1 || l > todo)
  			{
  			    /* Illegal byte.  If we can try another encoding
! 			     * do that, unless at EOF where a truncated
! 			     * file is more likely than a conversion error. */
! 			    if (can_retry && !incomplete_tail)
  				break;
  # ifdef USE_ICONV
  			    /* When we did a conversion report an error. */
  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
  				conv_error = readfile_linenr(linecnt, ptr, p);
  # endif
+ 			    /* Remember the first linenr with an illegal byte */
+ 			    if (conv_error == 0 && illegal_byte == 0)
+ 				illegal_byte = readfile_linenr(linecnt, ptr, p);
  
  			    /* Drop, keep or replace the bad byte. */
  			    if (bad_char_behavior == BAD_DROP)
  			    {
! 				mch_memmove(p, p + 1, todo - 1);
  				--p;
  				--size;
  			    }
  			    else if (bad_char_behavior != BAD_KEEP)
  				*p = bad_char_behavior;
  			}
! 			else
! 			    p += l - 1;
  		    }
  		}
! 		if (p < ptr + size && !incomplete_tail)
  		{
  		    /* Detected a UTF-8 error. */
  rewind_retry:
*** ../vim-7.1.309/src/version.c	Wed Jun  4 15:27:43 2008
--- src/version.c	Wed Jun  4 19:35:16 2008
***************
*** 668,669 ****
--- 673,676 ----
  {   /* Add new patch number below this line */
+ /**/
+     310,
  /**/

-- 
Normal people believe that if it ain't broke, don't fix it.  Engineers believe
that if it ain't broke, it doesn't have enough features yet.
				(Scott Adams - The Dilbert principle)

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\        download, build and distribute -- http://www.A-A-P.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///