From e1217a468beafb9f4bc89efdb830b64c8332534d Mon Sep 17 00:00:00 2001 From: Karsten Hopp Date: Jul 18 2008 11:34:51 +0000 Subject: - patchlevel 310 --- diff --git a/7.1.310 b/7.1.310 new file mode 100644 index 0000000..7877226 --- /dev/null +++ b/7.1.310 @@ -0,0 +1,283 @@ +To: vim-dev@vim.org +Subject: Patch 7.1.310 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=ISO-8859-1 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.1.310 +Problem: Incomplete utf-8 byte sequence at end of the file is not detected. + Accessing memory that wasn't written. +Solution: Check the last bytes in the buffer for being a valid utf-8 + character. (mostly by Ben Schmidt) + Also fix that the reported line number of the error was wrong. +Files: src/fileio.c + + +*** ../vim-7.1.309/src/fileio.c Wed May 7 19:05:55 2008 +--- src/fileio.c Wed Jun 4 18:28:48 2008 +*************** +*** 1288,1299 **** + #ifdef FEAT_MBYTE + else if (conv_restlen > 0) + { +! /* Reached end-of-file but some trailing bytes could +! * not be converted. Truncated file? */ +! if (conv_error == 0) +! conv_error = linecnt; +! if (bad_char_behavior != BAD_DROP) + { + fio_flags = 0; /* don't convert this */ + # ifdef USE_ICONV + if (iconv_fd != (iconv_t)-1) +--- 1288,1336 ---- + #ifdef FEAT_MBYTE + else if (conv_restlen > 0) + { +! /* +! * Reached end-of-file but some trailing bytes could +! * not be converted. Truncated file? +! */ +! +! /* When we did a conversion report an error. */ +! if (fio_flags != 0 +! # ifdef USE_ICONV +! || iconv_fd != (iconv_t)-1 +! # endif +! ) + { ++ if (conv_error == 0) ++ conv_error = curbuf->b_ml.ml_line_count ++ - linecnt + 1; ++ } ++ /* Remember the first linenr with an illegal byte */ ++ else if (illegal_byte == 0) ++ illegal_byte = curbuf->b_ml.ml_line_count ++ - linecnt + 1; ++ if (bad_char_behavior == BAD_DROP) ++ { ++ *(ptr - conv_restlen) = NUL; ++ conv_restlen = 0; ++ } ++ else ++ { ++ /* Replace the trailing bytes with the replacement ++ * character if we were converting; if we weren't, ++ * leave the UTF8 checking code to do it, as it ++ * works slightly differently. */ ++ if (bad_char_behavior != BAD_KEEP && (fio_flags != 0 ++ # ifdef USE_ICONV ++ || iconv_fd != (iconv_t)-1 ++ # endif ++ )) ++ { ++ while (conv_restlen > 0) ++ { ++ *(--ptr) = bad_char_behavior; ++ --conv_restlen; ++ } ++ } + fio_flags = 0; /* don't convert this */ + # ifdef USE_ICONV + if (iconv_fd != (iconv_t)-1) +*************** +*** 1302,1321 **** + iconv_fd = (iconv_t)-1; + } + # endif +- if (bad_char_behavior == BAD_KEEP) +- { +- /* Keep the trailing bytes as-is. */ +- size = conv_restlen; +- ptr -= conv_restlen; +- } +- else +- { +- /* Replace the trailing bytes with the +- * replacement character. */ +- size = 1; +- *--ptr = bad_char_behavior; +- } +- conv_restlen = 0; + } + } + #endif +--- 1339,1344 ---- +*************** +*** 1397,1402 **** +--- 1420,1430 ---- + goto retry; + } + } ++ ++ /* Include not converted bytes. */ ++ ptr -= conv_restlen; ++ size += conv_restlen; ++ conv_restlen = 0; + #endif + /* + * Break here for a read error or end-of-file. +*************** +*** 1406,1416 **** + + #ifdef FEAT_MBYTE + +- /* Include not converted bytes. */ +- ptr -= conv_restlen; +- size += conv_restlen; +- conv_restlen = 0; +- + # ifdef USE_ICONV + if (iconv_fd != (iconv_t)-1) + { +--- 1434,1439 ---- +*************** +*** 1872,1883 **** + size = (long)((ptr + real_size) - dest); + ptr = dest; + } +! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin) + { +! /* Reading UTF-8: Check if the bytes are valid UTF-8. +! * Need to start before "ptr" when part of the character was +! * read in the previous read() call. */ +! for (p = ptr - utf_head_off(buffer, ptr); ; ++p) + { + int todo = (int)((ptr + size) - p); + int l; +--- 1895,1906 ---- + size = (long)((ptr + real_size) - dest); + ptr = dest; + } +! else if (enc_utf8 && !curbuf->b_p_bin) + { +! int incomplete_tail = FALSE; +! +! /* Reading UTF-8: Check if the bytes are valid UTF-8. */ +! for (p = ptr; ; ++p) + { + int todo = (int)((ptr + size) - p); + int l; +*************** +*** 1891,1933 **** + * read() will get the next bytes, we'll check it + * then. */ + l = utf_ptr2len_len(p, todo); +! if (l > todo) + { +! /* Incomplete byte sequence, the next read() +! * should get them and check the bytes. */ +! p += todo; +! break; + } +! if (l == 1) + { + /* Illegal byte. If we can try another encoding +! * do that. */ +! if (can_retry) + break; +- +- /* Remember the first linenr with an illegal byte */ +- if (illegal_byte == 0) +- illegal_byte = readfile_linenr(linecnt, ptr, p); + # ifdef USE_ICONV + /* When we did a conversion report an error. */ + if (iconv_fd != (iconv_t)-1 && conv_error == 0) + conv_error = readfile_linenr(linecnt, ptr, p); + # endif + + /* Drop, keep or replace the bad byte. */ + if (bad_char_behavior == BAD_DROP) + { +! mch_memmove(p, p+1, todo - 1); + --p; + --size; + } + else if (bad_char_behavior != BAD_KEEP) + *p = bad_char_behavior; + } +! p += l - 1; + } + } +! if (p < ptr + size) + { + /* Detected a UTF-8 error. */ + rewind_retry: +--- 1914,1969 ---- + * read() will get the next bytes, we'll check it + * then. */ + l = utf_ptr2len_len(p, todo); +! if (l > todo && !incomplete_tail) + { +! /* Avoid retrying with a different encoding when +! * a truncated file is more likely, or attempting +! * to read the rest of an incomplete sequence when +! * we have already done so. */ +! if (p > ptr || filesize > 0) +! incomplete_tail = TRUE; +! /* Incomplete byte sequence, move it to conv_rest[] +! * and try to read the rest of it, unless we've +! * already done so. */ +! if (p > ptr) +! { +! conv_restlen = todo; +! mch_memmove(conv_rest, p, conv_restlen); +! size -= conv_restlen; +! break; +! } + } +! if (l == 1 || l > todo) + { + /* Illegal byte. If we can try another encoding +! * do that, unless at EOF where a truncated +! * file is more likely than a conversion error. */ +! if (can_retry && !incomplete_tail) + break; + # ifdef USE_ICONV + /* When we did a conversion report an error. */ + if (iconv_fd != (iconv_t)-1 && conv_error == 0) + conv_error = readfile_linenr(linecnt, ptr, p); + # endif ++ /* Remember the first linenr with an illegal byte */ ++ if (conv_error == 0 && illegal_byte == 0) ++ illegal_byte = readfile_linenr(linecnt, ptr, p); + + /* Drop, keep or replace the bad byte. */ + if (bad_char_behavior == BAD_DROP) + { +! mch_memmove(p, p + 1, todo - 1); + --p; + --size; + } + else if (bad_char_behavior != BAD_KEEP) + *p = bad_char_behavior; + } +! else +! p += l - 1; + } + } +! if (p < ptr + size && !incomplete_tail) + { + /* Detected a UTF-8 error. */ + rewind_retry: +*** ../vim-7.1.309/src/version.c Wed Jun 4 15:27:43 2008 +--- src/version.c Wed Jun 4 19:35:16 2008 +*************** +*** 668,669 **** +--- 673,676 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 310, + /**/ + +-- +Normal people believe that if it ain't broke, don't fix it. Engineers believe +that if it ain't broke, it doesn't have enough features yet. + (Scott Adams - The Dilbert principle) + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ download, build and distribute -- http://www.A-A-P.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///