To: vim-dev@vim.org
Subject: Patch 7.1.310
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
------------
Patch 7.1.310
Problem: Incomplete utf-8 byte sequence at end of the file is not detected.
Accessing memory that wasn't written.
Solution: Check the last bytes in the buffer for being a valid utf-8
character. (mostly by Ben Schmidt)
Also fix that the reported line number of the error was wrong.
Files: src/fileio.c
*** ../vim-7.1.309/src/fileio.c Wed May 7 19:05:55 2008
--- src/fileio.c Wed Jun 4 18:28:48 2008
***************
*** 1288,1299 ****
#ifdef FEAT_MBYTE
else if (conv_restlen > 0)
{
! /* Reached end-of-file but some trailing bytes could
! * not be converted. Truncated file? */
! if (conv_error == 0)
! conv_error = linecnt;
! if (bad_char_behavior != BAD_DROP)
{
fio_flags = 0; /* don't convert this */
# ifdef USE_ICONV
if (iconv_fd != (iconv_t)-1)
--- 1288,1336 ----
#ifdef FEAT_MBYTE
else if (conv_restlen > 0)
{
! /*
! * Reached end-of-file but some trailing bytes could
! * not be converted. Truncated file?
! */
!
! /* When we did a conversion report an error. */
! if (fio_flags != 0
! # ifdef USE_ICONV
! || iconv_fd != (iconv_t)-1
! # endif
! )
{
+ if (conv_error == 0)
+ conv_error = curbuf->b_ml.ml_line_count
+ - linecnt + 1;
+ }
+ /* Remember the first linenr with an illegal byte */
+ else if (illegal_byte == 0)
+ illegal_byte = curbuf->b_ml.ml_line_count
+ - linecnt + 1;
+ if (bad_char_behavior == BAD_DROP)
+ {
+ *(ptr - conv_restlen) = NUL;
+ conv_restlen = 0;
+ }
+ else
+ {
+ /* Replace the trailing bytes with the replacement
+ * character if we were converting; if we weren't,
+ * leave the UTF8 checking code to do it, as it
+ * works slightly differently. */
+ if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
+ # ifdef USE_ICONV
+ || iconv_fd != (iconv_t)-1
+ # endif
+ ))
+ {
+ while (conv_restlen > 0)
+ {
+ *(--ptr) = bad_char_behavior;
+ --conv_restlen;
+ }
+ }
fio_flags = 0; /* don't convert this */
# ifdef USE_ICONV
if (iconv_fd != (iconv_t)-1)
***************
*** 1302,1321 ****
iconv_fd = (iconv_t)-1;
}
# endif
- if (bad_char_behavior == BAD_KEEP)
- {
- /* Keep the trailing bytes as-is. */
- size = conv_restlen;
- ptr -= conv_restlen;
- }
- else
- {
- /* Replace the trailing bytes with the
- * replacement character. */
- size = 1;
- *--ptr = bad_char_behavior;
- }
- conv_restlen = 0;
}
}
#endif
--- 1339,1344 ----
***************
*** 1397,1402 ****
--- 1420,1430 ----
goto retry;
}
}
+
+ /* Include not converted bytes. */
+ ptr -= conv_restlen;
+ size += conv_restlen;
+ conv_restlen = 0;
#endif
/*
* Break here for a read error or end-of-file.
***************
*** 1406,1416 ****
#ifdef FEAT_MBYTE
- /* Include not converted bytes. */
- ptr -= conv_restlen;
- size += conv_restlen;
- conv_restlen = 0;
-
# ifdef USE_ICONV
if (iconv_fd != (iconv_t)-1)
{
--- 1434,1439 ----
***************
*** 1872,1883 ****
size = (long)((ptr + real_size) - dest);
ptr = dest;
}
! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
{
! /* Reading UTF-8: Check if the bytes are valid UTF-8.
! * Need to start before "ptr" when part of the character was
! * read in the previous read() call. */
! for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
{
int todo = (int)((ptr + size) - p);
int l;
--- 1895,1906 ----
size = (long)((ptr + real_size) - dest);
ptr = dest;
}
! else if (enc_utf8 && !curbuf->b_p_bin)
{
! int incomplete_tail = FALSE;
!
! /* Reading UTF-8: Check if the bytes are valid UTF-8. */
! for (p = ptr; ; ++p)
{
int todo = (int)((ptr + size) - p);
int l;
***************
*** 1891,1933 ****
* read() will get the next bytes, we'll check it
* then. */
l = utf_ptr2len_len(p, todo);
! if (l > todo)
{
! /* Incomplete byte sequence, the next read()
! * should get them and check the bytes. */
! p += todo;
! break;
}
! if (l == 1)
{
/* Illegal byte. If we can try another encoding
! * do that. */
! if (can_retry)
break;
-
- /* Remember the first linenr with an illegal byte */
- if (illegal_byte == 0)
- illegal_byte = readfile_linenr(linecnt, ptr, p);
# ifdef USE_ICONV
/* When we did a conversion report an error. */
if (iconv_fd != (iconv_t)-1 && conv_error == 0)
conv_error = readfile_linenr(linecnt, ptr, p);
# endif
/* Drop, keep or replace the bad byte. */
if (bad_char_behavior == BAD_DROP)
{
! mch_memmove(p, p+1, todo - 1);
--p;
--size;
}
else if (bad_char_behavior != BAD_KEEP)
*p = bad_char_behavior;
}
! p += l - 1;
}
}
! if (p < ptr + size)
{
/* Detected a UTF-8 error. */
rewind_retry:
--- 1914,1969 ----
* read() will get the next bytes, we'll check it
* then. */
l = utf_ptr2len_len(p, todo);
! if (l > todo && !incomplete_tail)
{
! /* Avoid retrying with a different encoding when
! * a truncated file is more likely, or attempting
! * to read the rest of an incomplete sequence when
! * we have already done so. */
! if (p > ptr || filesize > 0)
! incomplete_tail = TRUE;
! /* Incomplete byte sequence, move it to conv_rest[]
! * and try to read the rest of it, unless we've
! * already done so. */
! if (p > ptr)
! {
! conv_restlen = todo;
! mch_memmove(conv_rest, p, conv_restlen);
! size -= conv_restlen;
! break;
! }
}
! if (l == 1 || l > todo)
{
/* Illegal byte. If we can try another encoding
! * do that, unless at EOF where a truncated
! * file is more likely than a conversion error. */
! if (can_retry && !incomplete_tail)
break;
# ifdef USE_ICONV
/* When we did a conversion report an error. */
if (iconv_fd != (iconv_t)-1 && conv_error == 0)
conv_error = readfile_linenr(linecnt, ptr, p);
# endif
+ /* Remember the first linenr with an illegal byte */
+ if (conv_error == 0 && illegal_byte == 0)
+ illegal_byte = readfile_linenr(linecnt, ptr, p);
/* Drop, keep or replace the bad byte. */
if (bad_char_behavior == BAD_DROP)
{
! mch_memmove(p, p + 1, todo - 1);
--p;
--size;
}
else if (bad_char_behavior != BAD_KEEP)
*p = bad_char_behavior;
}
! else
! p += l - 1;
}
}
! if (p < ptr + size && !incomplete_tail)
{
/* Detected a UTF-8 error. */
rewind_retry:
*** ../vim-7.1.309/src/version.c Wed Jun 4 15:27:43 2008
--- src/version.c Wed Jun 4 19:35:16 2008
***************
*** 668,669 ****
--- 673,676 ----
{ /* Add new patch number below this line */
+ /**/
+ 310,
/**/
--
Normal people believe that if it ain't broke, don't fix it. Engineers believe
that if it ain't broke, it doesn't have enough features yet.
(Scott Adams - The Dilbert principle)
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ download, build and distribute -- http://www.A-A-P.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///