From e1217a468beafb9f4bc89efdb830b64c8332534d Mon Sep 17 00:00:00 2001
From: Karsten Hopp <karsten@fedoraproject.org>
Date: Jul 18 2008 11:34:51 +0000
Subject: - patchlevel 310


---

diff --git a/7.1.310 b/7.1.310
new file mode 100644
index 0000000..7877226
--- /dev/null
+++ b/7.1.310
@@ -0,0 +1,283 @@
+To: vim-dev@vim.org
+Subject: Patch 7.1.310
+Fcc: outbox
+From: Bram Moolenaar <Bram@moolenaar.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: 8bit
+------------
+
+Patch 7.1.310
+Problem:    Incomplete utf-8 byte sequence at end of the file is not detected.
+	    Accessing memory that wasn't written.
+Solution:   Check the last bytes in the buffer for being a valid utf-8
+	    character. (mostly by Ben Schmidt)
+	    Also fix that the reported line number of the error was wrong.
+Files:	    src/fileio.c
+
+
+*** ../vim-7.1.309/src/fileio.c	Wed May  7 19:05:55 2008
+--- src/fileio.c	Wed Jun  4 18:28:48 2008
+***************
+*** 1288,1299 ****
+  #ifdef FEAT_MBYTE
+  		    else if (conv_restlen > 0)
+  		    {
+! 			/* Reached end-of-file but some trailing bytes could
+! 			 * not be converted.  Truncated file? */
+! 			if (conv_error == 0)
+! 			    conv_error = linecnt;
+! 			if (bad_char_behavior != BAD_DROP)
+  			{
+  			    fio_flags = 0;	/* don't convert this */
+  # ifdef USE_ICONV
+  			    if (iconv_fd != (iconv_t)-1)
+--- 1288,1336 ----
+  #ifdef FEAT_MBYTE
+  		    else if (conv_restlen > 0)
+  		    {
+! 			/*
+! 			 * Reached end-of-file but some trailing bytes could
+! 			 * not be converted.  Truncated file?
+! 			 */
+! 
+! 			/* When we did a conversion report an error. */
+! 			if (fio_flags != 0
+! # ifdef USE_ICONV
+! 				|| iconv_fd != (iconv_t)-1
+! # endif
+! 			   )
+  			{
++ 			    if (conv_error == 0)
++ 				conv_error = curbuf->b_ml.ml_line_count
++ 								- linecnt + 1;
++ 			}
++ 			/* Remember the first linenr with an illegal byte */
++ 			else if (illegal_byte == 0)
++ 			    illegal_byte = curbuf->b_ml.ml_line_count
++ 								- linecnt + 1;
++ 			if (bad_char_behavior == BAD_DROP)
++ 			{
++ 			    *(ptr - conv_restlen) = NUL;
++ 			    conv_restlen = 0;
++ 			}
++ 			else
++ 			{
++ 			    /* Replace the trailing bytes with the replacement
++ 			     * character if we were converting; if we weren't,
++ 			     * leave the UTF8 checking code to do it, as it
++ 			     * works slightly differently. */
++ 			    if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
++ # ifdef USE_ICONV
++ 				    || iconv_fd != (iconv_t)-1
++ # endif
++ 			       ))
++ 			    {
++ 				while (conv_restlen > 0)
++ 				{
++ 				    *(--ptr) = bad_char_behavior;
++ 				    --conv_restlen;
++ 				}
++ 			    }
+  			    fio_flags = 0;	/* don't convert this */
+  # ifdef USE_ICONV
+  			    if (iconv_fd != (iconv_t)-1)
+***************
+*** 1302,1321 ****
+  				iconv_fd = (iconv_t)-1;
+  			    }
+  # endif
+- 			    if (bad_char_behavior == BAD_KEEP)
+- 			    {
+- 				/* Keep the trailing bytes as-is. */
+- 				size = conv_restlen;
+- 				ptr -= conv_restlen;
+- 			    }
+- 			    else
+- 			    {
+- 				/* Replace the trailing bytes with the
+- 				 * replacement character. */
+- 				size = 1;
+- 				*--ptr = bad_char_behavior;
+- 			    }
+- 			    conv_restlen = 0;
+  			}
+  		    }
+  #endif
+--- 1339,1344 ----
+***************
+*** 1397,1402 ****
+--- 1420,1430 ----
+  		    goto retry;
+  		}
+  	    }
++ 
++ 	    /* Include not converted bytes. */
++ 	    ptr -= conv_restlen;
++ 	    size += conv_restlen;
++ 	    conv_restlen = 0;
+  #endif
+  	    /*
+  	     * Break here for a read error or end-of-file.
+***************
+*** 1406,1416 ****
+  
+  #ifdef FEAT_MBYTE
+  
+- 	    /* Include not converted bytes. */
+- 	    ptr -= conv_restlen;
+- 	    size += conv_restlen;
+- 	    conv_restlen = 0;
+- 
+  # ifdef USE_ICONV
+  	    if (iconv_fd != (iconv_t)-1)
+  	    {
+--- 1434,1439 ----
+***************
+*** 1872,1883 ****
+  		size = (long)((ptr + real_size) - dest);
+  		ptr = dest;
+  	    }
+! 	    else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
+  	    {
+! 		/* Reading UTF-8: Check if the bytes are valid UTF-8.
+! 		 * Need to start before "ptr" when part of the character was
+! 		 * read in the previous read() call. */
+! 		for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
+  		{
+  		    int	 todo = (int)((ptr + size) - p);
+  		    int	 l;
+--- 1895,1906 ----
+  		size = (long)((ptr + real_size) - dest);
+  		ptr = dest;
+  	    }
+! 	    else if (enc_utf8 && !curbuf->b_p_bin)
+  	    {
+! 		int  incomplete_tail = FALSE;
+! 
+! 		/* Reading UTF-8: Check if the bytes are valid UTF-8. */
+! 		for (p = ptr; ; ++p)
+  		{
+  		    int	 todo = (int)((ptr + size) - p);
+  		    int	 l;
+***************
+*** 1891,1933 ****
+  			 * read() will get the next bytes, we'll check it
+  			 * then. */
+  			l = utf_ptr2len_len(p, todo);
+! 			if (l > todo)
+  			{
+! 			    /* Incomplete byte sequence, the next read()
+! 			     * should get them and check the bytes. */
+! 			    p += todo;
+! 			    break;
+  			}
+! 			if (l == 1)
+  			{
+  			    /* Illegal byte.  If we can try another encoding
+! 			     * do that. */
+! 			    if (can_retry)
+  				break;
+- 
+- 			    /* Remember the first linenr with an illegal byte */
+- 			    if (illegal_byte == 0)
+- 				illegal_byte = readfile_linenr(linecnt, ptr, p);
+  # ifdef USE_ICONV
+  			    /* When we did a conversion report an error. */
+  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
+  				conv_error = readfile_linenr(linecnt, ptr, p);
+  # endif
+  
+  			    /* Drop, keep or replace the bad byte. */
+  			    if (bad_char_behavior == BAD_DROP)
+  			    {
+! 				mch_memmove(p, p+1, todo - 1);
+  				--p;
+  				--size;
+  			    }
+  			    else if (bad_char_behavior != BAD_KEEP)
+  				*p = bad_char_behavior;
+  			}
+! 			p += l - 1;
+  		    }
+  		}
+! 		if (p < ptr + size)
+  		{
+  		    /* Detected a UTF-8 error. */
+  rewind_retry:
+--- 1914,1969 ----
+  			 * read() will get the next bytes, we'll check it
+  			 * then. */
+  			l = utf_ptr2len_len(p, todo);
+! 			if (l > todo && !incomplete_tail)
+  			{
+! 			    /* Avoid retrying with a different encoding when
+! 			     * a truncated file is more likely, or attempting
+! 			     * to read the rest of an incomplete sequence when
+! 			     * we have already done so. */
+! 			    if (p > ptr || filesize > 0)
+! 				incomplete_tail = TRUE;
+! 			    /* Incomplete byte sequence, move it to conv_rest[]
+! 			     * and try to read the rest of it, unless we've
+! 			     * already done so. */
+! 			    if (p > ptr)
+! 			    {
+! 				conv_restlen = todo;
+! 				mch_memmove(conv_rest, p, conv_restlen);
+! 				size -= conv_restlen;
+! 				break;
+! 			    }
+  			}
+! 			if (l == 1 || l > todo)
+  			{
+  			    /* Illegal byte.  If we can try another encoding
+! 			     * do that, unless at EOF where a truncated
+! 			     * file is more likely than a conversion error. */
+! 			    if (can_retry && !incomplete_tail)
+  				break;
+  # ifdef USE_ICONV
+  			    /* When we did a conversion report an error. */
+  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
+  				conv_error = readfile_linenr(linecnt, ptr, p);
+  # endif
++ 			    /* Remember the first linenr with an illegal byte */
++ 			    if (conv_error == 0 && illegal_byte == 0)
++ 				illegal_byte = readfile_linenr(linecnt, ptr, p);
+  
+  			    /* Drop, keep or replace the bad byte. */
+  			    if (bad_char_behavior == BAD_DROP)
+  			    {
+! 				mch_memmove(p, p + 1, todo - 1);
+  				--p;
+  				--size;
+  			    }
+  			    else if (bad_char_behavior != BAD_KEEP)
+  				*p = bad_char_behavior;
+  			}
+! 			else
+! 			    p += l - 1;
+  		    }
+  		}
+! 		if (p < ptr + size && !incomplete_tail)
+  		{
+  		    /* Detected a UTF-8 error. */
+  rewind_retry:
+*** ../vim-7.1.309/src/version.c	Wed Jun  4 15:27:43 2008
+--- src/version.c	Wed Jun  4 19:35:16 2008
+***************
+*** 668,669 ****
+--- 673,676 ----
+  {   /* Add new patch number below this line */
++ /**/
++     310,
+  /**/
+
+-- 
+Normal people believe that if it ain't broke, don't fix it.  Engineers believe
+that if it ain't broke, it doesn't have enough features yet.
+				(Scott Adams - The Dilbert principle)
+
+ /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
+///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
+\\\        download, build and distribute -- http://www.A-A-P.org        ///
+ \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///