Karsten Hopp e1217a
To: vim-dev@vim.org
Karsten Hopp e1217a
Subject: Patch 7.1.310
Karsten Hopp e1217a
Fcc: outbox
Karsten Hopp e1217a
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp e1217a
Mime-Version: 1.0
Karsten Hopp e1217a
Content-Type: text/plain; charset=ISO-8859-1
Karsten Hopp e1217a
Content-Transfer-Encoding: 8bit
Karsten Hopp e1217a
------------
Karsten Hopp e1217a
Karsten Hopp e1217a
Patch 7.1.310
Karsten Hopp e1217a
Problem:    Incomplete utf-8 byte sequence at end of the file is not detected.
Karsten Hopp e1217a
	    Accessing memory that wasn't written.
Karsten Hopp e1217a
Solution:   Check the last bytes in the buffer for being a valid utf-8
Karsten Hopp e1217a
	    character. (mostly by Ben Schmidt)
Karsten Hopp e1217a
	    Also fix that the reported line number of the error was wrong.
Karsten Hopp e1217a
Files:	    src/fileio.c
Karsten Hopp e1217a
Karsten Hopp e1217a
Karsten Hopp e1217a
*** ../vim-7.1.309/src/fileio.c	Wed May  7 19:05:55 2008
Karsten Hopp e1217a
--- src/fileio.c	Wed Jun  4 18:28:48 2008
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 1288,1299 ****
Karsten Hopp e1217a
  #ifdef FEAT_MBYTE
Karsten Hopp e1217a
  		    else if (conv_restlen > 0)
Karsten Hopp e1217a
  		    {
Karsten Hopp e1217a
! 			/* Reached end-of-file but some trailing bytes could
Karsten Hopp e1217a
! 			 * not be converted.  Truncated file? */
Karsten Hopp e1217a
! 			if (conv_error == 0)
Karsten Hopp e1217a
! 			    conv_error = linecnt;
Karsten Hopp e1217a
! 			if (bad_char_behavior != BAD_DROP)
Karsten Hopp e1217a
  			{
Karsten Hopp e1217a
  			    fio_flags = 0;	/* don't convert this */
Karsten Hopp e1217a
  # ifdef USE_ICONV
Karsten Hopp e1217a
  			    if (iconv_fd != (iconv_t)-1)
Karsten Hopp e1217a
--- 1288,1336 ----
Karsten Hopp e1217a
  #ifdef FEAT_MBYTE
Karsten Hopp e1217a
  		    else if (conv_restlen > 0)
Karsten Hopp e1217a
  		    {
Karsten Hopp e1217a
! 			/*
Karsten Hopp e1217a
! 			 * Reached end-of-file but some trailing bytes could
Karsten Hopp e1217a
! 			 * not be converted.  Truncated file?
Karsten Hopp e1217a
! 			 */
Karsten Hopp e1217a
! 
Karsten Hopp e1217a
! 			/* When we did a conversion report an error. */
Karsten Hopp e1217a
! 			if (fio_flags != 0
Karsten Hopp e1217a
! # ifdef USE_ICONV
Karsten Hopp e1217a
! 				|| iconv_fd != (iconv_t)-1
Karsten Hopp e1217a
! # endif
Karsten Hopp e1217a
! 			   )
Karsten Hopp e1217a
  			{
Karsten Hopp e1217a
+ 			    if (conv_error == 0)
Karsten Hopp e1217a
+ 				conv_error = curbuf->b_ml.ml_line_count
Karsten Hopp e1217a
+ 								- linecnt + 1;
Karsten Hopp e1217a
+ 			}
Karsten Hopp e1217a
+ 			/* Remember the first linenr with an illegal byte */
Karsten Hopp e1217a
+ 			else if (illegal_byte == 0)
Karsten Hopp e1217a
+ 			    illegal_byte = curbuf->b_ml.ml_line_count
Karsten Hopp e1217a
+ 								- linecnt + 1;
Karsten Hopp e1217a
+ 			if (bad_char_behavior == BAD_DROP)
Karsten Hopp e1217a
+ 			{
Karsten Hopp e1217a
+ 			    *(ptr - conv_restlen) = NUL;
Karsten Hopp e1217a
+ 			    conv_restlen = 0;
Karsten Hopp e1217a
+ 			}
Karsten Hopp e1217a
+ 			else
Karsten Hopp e1217a
+ 			{
Karsten Hopp e1217a
+ 			    /* Replace the trailing bytes with the replacement
Karsten Hopp e1217a
+ 			     * character if we were converting; if we weren't,
Karsten Hopp e1217a
+ 			     * leave the UTF8 checking code to do it, as it
Karsten Hopp e1217a
+ 			     * works slightly differently. */
Karsten Hopp e1217a
+ 			    if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
Karsten Hopp e1217a
+ # ifdef USE_ICONV
Karsten Hopp e1217a
+ 				    || iconv_fd != (iconv_t)-1
Karsten Hopp e1217a
+ # endif
Karsten Hopp e1217a
+ 			       ))
Karsten Hopp e1217a
+ 			    {
Karsten Hopp e1217a
+ 				while (conv_restlen > 0)
Karsten Hopp e1217a
+ 				{
Karsten Hopp e1217a
+ 				    *(--ptr) = bad_char_behavior;
Karsten Hopp e1217a
+ 				    --conv_restlen;
Karsten Hopp e1217a
+ 				}
Karsten Hopp e1217a
+ 			    }
Karsten Hopp e1217a
  			    fio_flags = 0;	/* don't convert this */
Karsten Hopp e1217a
  # ifdef USE_ICONV
Karsten Hopp e1217a
  			    if (iconv_fd != (iconv_t)-1)
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 1302,1321 ****
Karsten Hopp e1217a
  				iconv_fd = (iconv_t)-1;
Karsten Hopp e1217a
  			    }
Karsten Hopp e1217a
  # endif
Karsten Hopp e1217a
- 			    if (bad_char_behavior == BAD_KEEP)
Karsten Hopp e1217a
- 			    {
Karsten Hopp e1217a
- 				/* Keep the trailing bytes as-is. */
Karsten Hopp e1217a
- 				size = conv_restlen;
Karsten Hopp e1217a
- 				ptr -= conv_restlen;
Karsten Hopp e1217a
- 			    }
Karsten Hopp e1217a
- 			    else
Karsten Hopp e1217a
- 			    {
Karsten Hopp e1217a
- 				/* Replace the trailing bytes with the
Karsten Hopp e1217a
- 				 * replacement character. */
Karsten Hopp e1217a
- 				size = 1;
Karsten Hopp e1217a
- 				*--ptr = bad_char_behavior;
Karsten Hopp e1217a
- 			    }
Karsten Hopp e1217a
- 			    conv_restlen = 0;
Karsten Hopp e1217a
  			}
Karsten Hopp e1217a
  		    }
Karsten Hopp e1217a
  #endif
Karsten Hopp e1217a
--- 1339,1344 ----
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 1397,1402 ****
Karsten Hopp e1217a
--- 1420,1430 ----
Karsten Hopp e1217a
  		    goto retry;
Karsten Hopp e1217a
  		}
Karsten Hopp e1217a
  	    }
Karsten Hopp e1217a
+ 
Karsten Hopp e1217a
+ 	    /* Include not converted bytes. */
Karsten Hopp e1217a
+ 	    ptr -= conv_restlen;
Karsten Hopp e1217a
+ 	    size += conv_restlen;
Karsten Hopp e1217a
+ 	    conv_restlen = 0;
Karsten Hopp e1217a
  #endif
Karsten Hopp e1217a
  	    /*
Karsten Hopp e1217a
  	     * Break here for a read error or end-of-file.
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 1406,1416 ****
Karsten Hopp e1217a
  
Karsten Hopp e1217a
  #ifdef FEAT_MBYTE
Karsten Hopp e1217a
  
Karsten Hopp e1217a
- 	    /* Include not converted bytes. */
Karsten Hopp e1217a
- 	    ptr -= conv_restlen;
Karsten Hopp e1217a
- 	    size += conv_restlen;
Karsten Hopp e1217a
- 	    conv_restlen = 0;
Karsten Hopp e1217a
- 
Karsten Hopp e1217a
  # ifdef USE_ICONV
Karsten Hopp e1217a
  	    if (iconv_fd != (iconv_t)-1)
Karsten Hopp e1217a
  	    {
Karsten Hopp e1217a
--- 1434,1439 ----
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 1872,1883 ****
Karsten Hopp e1217a
  		size = (long)((ptr + real_size) - dest);
Karsten Hopp e1217a
  		ptr = dest;
Karsten Hopp e1217a
  	    }
Karsten Hopp e1217a
! 	    else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
Karsten Hopp e1217a
  	    {
Karsten Hopp e1217a
! 		/* Reading UTF-8: Check if the bytes are valid UTF-8.
Karsten Hopp e1217a
! 		 * Need to start before "ptr" when part of the character was
Karsten Hopp e1217a
! 		 * read in the previous read() call. */
Karsten Hopp e1217a
! 		for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
Karsten Hopp e1217a
  		{
Karsten Hopp e1217a
  		    int	 todo = (int)((ptr + size) - p);
Karsten Hopp e1217a
  		    int	 l;
Karsten Hopp e1217a
--- 1895,1906 ----
Karsten Hopp e1217a
  		size = (long)((ptr + real_size) - dest);
Karsten Hopp e1217a
  		ptr = dest;
Karsten Hopp e1217a
  	    }
Karsten Hopp e1217a
! 	    else if (enc_utf8 && !curbuf->b_p_bin)
Karsten Hopp e1217a
  	    {
Karsten Hopp e1217a
! 		int  incomplete_tail = FALSE;
Karsten Hopp e1217a
! 
Karsten Hopp e1217a
! 		/* Reading UTF-8: Check if the bytes are valid UTF-8. */
Karsten Hopp e1217a
! 		for (p = ptr; ; ++p)
Karsten Hopp e1217a
  		{
Karsten Hopp e1217a
  		    int	 todo = (int)((ptr + size) - p);
Karsten Hopp e1217a
  		    int	 l;
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 1891,1933 ****
Karsten Hopp e1217a
  			 * read() will get the next bytes, we'll check it
Karsten Hopp e1217a
  			 * then. */
Karsten Hopp e1217a
  			l = utf_ptr2len_len(p, todo);
Karsten Hopp e1217a
! 			if (l > todo)
Karsten Hopp e1217a
  			{
Karsten Hopp e1217a
! 			    /* Incomplete byte sequence, the next read()
Karsten Hopp e1217a
! 			     * should get them and check the bytes. */
Karsten Hopp e1217a
! 			    p += todo;
Karsten Hopp e1217a
! 			    break;
Karsten Hopp e1217a
  			}
Karsten Hopp e1217a
! 			if (l == 1)
Karsten Hopp e1217a
  			{
Karsten Hopp e1217a
  			    /* Illegal byte.  If we can try another encoding
Karsten Hopp e1217a
! 			     * do that. */
Karsten Hopp e1217a
! 			    if (can_retry)
Karsten Hopp e1217a
  				break;
Karsten Hopp e1217a
- 
Karsten Hopp e1217a
- 			    /* Remember the first linenr with an illegal byte */
Karsten Hopp e1217a
- 			    if (illegal_byte == 0)
Karsten Hopp e1217a
- 				illegal_byte = readfile_linenr(linecnt, ptr, p);
Karsten Hopp e1217a
  # ifdef USE_ICONV
Karsten Hopp e1217a
  			    /* When we did a conversion report an error. */
Karsten Hopp e1217a
  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
Karsten Hopp e1217a
  				conv_error = readfile_linenr(linecnt, ptr, p);
Karsten Hopp e1217a
  # endif
Karsten Hopp e1217a
  
Karsten Hopp e1217a
  			    /* Drop, keep or replace the bad byte. */
Karsten Hopp e1217a
  			    if (bad_char_behavior == BAD_DROP)
Karsten Hopp e1217a
  			    {
Karsten Hopp e1217a
! 				mch_memmove(p, p+1, todo - 1);
Karsten Hopp e1217a
  				--p;
Karsten Hopp e1217a
  				--size;
Karsten Hopp e1217a
  			    }
Karsten Hopp e1217a
  			    else if (bad_char_behavior != BAD_KEEP)
Karsten Hopp e1217a
  				*p = bad_char_behavior;
Karsten Hopp e1217a
  			}
Karsten Hopp e1217a
! 			p += l - 1;
Karsten Hopp e1217a
  		    }
Karsten Hopp e1217a
  		}
Karsten Hopp e1217a
! 		if (p < ptr + size)
Karsten Hopp e1217a
  		{
Karsten Hopp e1217a
  		    /* Detected a UTF-8 error. */
Karsten Hopp e1217a
  rewind_retry:
Karsten Hopp e1217a
--- 1914,1969 ----
Karsten Hopp e1217a
  			 * read() will get the next bytes, we'll check it
Karsten Hopp e1217a
  			 * then. */
Karsten Hopp e1217a
  			l = utf_ptr2len_len(p, todo);
Karsten Hopp e1217a
! 			if (l > todo && !incomplete_tail)
Karsten Hopp e1217a
  			{
Karsten Hopp e1217a
! 			    /* Avoid retrying with a different encoding when
Karsten Hopp e1217a
! 			     * a truncated file is more likely, or attempting
Karsten Hopp e1217a
! 			     * to read the rest of an incomplete sequence when
Karsten Hopp e1217a
! 			     * we have already done so. */
Karsten Hopp e1217a
! 			    if (p > ptr || filesize > 0)
Karsten Hopp e1217a
! 				incomplete_tail = TRUE;
Karsten Hopp e1217a
! 			    /* Incomplete byte sequence, move it to conv_rest[]
Karsten Hopp e1217a
! 			     * and try to read the rest of it, unless we've
Karsten Hopp e1217a
! 			     * already done so. */
Karsten Hopp e1217a
! 			    if (p > ptr)
Karsten Hopp e1217a
! 			    {
Karsten Hopp e1217a
! 				conv_restlen = todo;
Karsten Hopp e1217a
! 				mch_memmove(conv_rest, p, conv_restlen);
Karsten Hopp e1217a
! 				size -= conv_restlen;
Karsten Hopp e1217a
! 				break;
Karsten Hopp e1217a
! 			    }
Karsten Hopp e1217a
  			}
Karsten Hopp e1217a
! 			if (l == 1 || l > todo)
Karsten Hopp e1217a
  			{
Karsten Hopp e1217a
  			    /* Illegal byte.  If we can try another encoding
Karsten Hopp e1217a
! 			     * do that, unless at EOF where a truncated
Karsten Hopp e1217a
! 			     * file is more likely than a conversion error. */
Karsten Hopp e1217a
! 			    if (can_retry && !incomplete_tail)
Karsten Hopp e1217a
  				break;
Karsten Hopp e1217a
  # ifdef USE_ICONV
Karsten Hopp e1217a
  			    /* When we did a conversion report an error. */
Karsten Hopp e1217a
  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
Karsten Hopp e1217a
  				conv_error = readfile_linenr(linecnt, ptr, p);
Karsten Hopp e1217a
  # endif
Karsten Hopp e1217a
+ 			    /* Remember the first linenr with an illegal byte */
Karsten Hopp e1217a
+ 			    if (conv_error == 0 && illegal_byte == 0)
Karsten Hopp e1217a
+ 				illegal_byte = readfile_linenr(linecnt, ptr, p);
Karsten Hopp e1217a
  
Karsten Hopp e1217a
  			    /* Drop, keep or replace the bad byte. */
Karsten Hopp e1217a
  			    if (bad_char_behavior == BAD_DROP)
Karsten Hopp e1217a
  			    {
Karsten Hopp e1217a
! 				mch_memmove(p, p + 1, todo - 1);
Karsten Hopp e1217a
  				--p;
Karsten Hopp e1217a
  				--size;
Karsten Hopp e1217a
  			    }
Karsten Hopp e1217a
  			    else if (bad_char_behavior != BAD_KEEP)
Karsten Hopp e1217a
  				*p = bad_char_behavior;
Karsten Hopp e1217a
  			}
Karsten Hopp e1217a
! 			else
Karsten Hopp e1217a
! 			    p += l - 1;
Karsten Hopp e1217a
  		    }
Karsten Hopp e1217a
  		}
Karsten Hopp e1217a
! 		if (p < ptr + size && !incomplete_tail)
Karsten Hopp e1217a
  		{
Karsten Hopp e1217a
  		    /* Detected a UTF-8 error. */
Karsten Hopp e1217a
  rewind_retry:
Karsten Hopp e1217a
*** ../vim-7.1.309/src/version.c	Wed Jun  4 15:27:43 2008
Karsten Hopp e1217a
--- src/version.c	Wed Jun  4 19:35:16 2008
Karsten Hopp e1217a
***************
Karsten Hopp e1217a
*** 668,669 ****
Karsten Hopp e1217a
--- 673,676 ----
Karsten Hopp e1217a
  {   /* Add new patch number below this line */
Karsten Hopp e1217a
+ /**/
Karsten Hopp e1217a
+     310,
Karsten Hopp e1217a
  /**/
Karsten Hopp e1217a
Karsten Hopp e1217a
-- 
Karsten Hopp e1217a
Normal people believe that if it ain't broke, don't fix it.  Engineers believe
Karsten Hopp e1217a
that if it ain't broke, it doesn't have enough features yet.
Karsten Hopp e1217a
				(Scott Adams - The Dilbert principle)
Karsten Hopp e1217a
Karsten Hopp e1217a
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp e1217a
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp e1217a
\\\        download, build and distribute -- http://www.A-A-P.org        ///
Karsten Hopp e1217a
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///