| To: vim-dev@vim.org |
| Subject: Patch 7.1.310 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=ISO-8859-1 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.1.310 |
| Problem: Incomplete utf-8 byte sequence at end of the file is not detected. |
| Accessing memory that wasn't written. |
| Solution: Check the last bytes in the buffer for being a valid utf-8 |
| character. (mostly by Ben Schmidt) |
| Also fix that the reported line number of the error was wrong. |
| Files: src/fileio.c |
| |
| |
| |
| |
| |
| *** 1288,1299 **** |
| #ifdef FEAT_MBYTE |
| else if (conv_restlen > 0) |
| { |
| ! /* Reached end-of-file but some trailing bytes could |
| ! * not be converted. Truncated file? */ |
| ! if (conv_error == 0) |
| ! conv_error = linecnt; |
| ! if (bad_char_behavior != BAD_DROP) |
| { |
| fio_flags = 0; /* don't convert this */ |
| # ifdef USE_ICONV |
| if (iconv_fd != (iconv_t)-1) |
| --- 1288,1336 ---- |
| #ifdef FEAT_MBYTE |
| else if (conv_restlen > 0) |
| { |
| ! /* |
| ! * Reached end-of-file but some trailing bytes could |
| ! * not be converted. Truncated file? |
| ! */ |
| ! |
| ! /* When we did a conversion report an error. */ |
| ! if (fio_flags != 0 |
| ! # ifdef USE_ICONV |
| ! || iconv_fd != (iconv_t)-1 |
| ! # endif |
| ! ) |
| { |
| + if (conv_error == 0) |
| + conv_error = curbuf->b_ml.ml_line_count |
| + - linecnt + 1; |
| + } |
| + /* Remember the first linenr with an illegal byte */ |
| + else if (illegal_byte == 0) |
| + illegal_byte = curbuf->b_ml.ml_line_count |
| + - linecnt + 1; |
| + if (bad_char_behavior == BAD_DROP) |
| + { |
| + *(ptr - conv_restlen) = NUL; |
| + conv_restlen = 0; |
| + } |
| + else |
| + { |
| + /* Replace the trailing bytes with the replacement |
| + * character if we were converting; if we weren't, |
| + * leave the UTF8 checking code to do it, as it |
| + * works slightly differently. */ |
| + if (bad_char_behavior != BAD_KEEP && (fio_flags != 0 |
| + # ifdef USE_ICONV |
| + || iconv_fd != (iconv_t)-1 |
| + # endif |
| + )) |
| + { |
| + while (conv_restlen > 0) |
| + { |
| + *(--ptr) = bad_char_behavior; |
| + --conv_restlen; |
| + } |
| + } |
| fio_flags = 0; /* don't convert this */ |
| # ifdef USE_ICONV |
| if (iconv_fd != (iconv_t)-1) |
| |
| *** 1302,1321 **** |
| iconv_fd = (iconv_t)-1; |
| } |
| # endif |
| - if (bad_char_behavior == BAD_KEEP) |
| - { |
| - /* Keep the trailing bytes as-is. */ |
| - size = conv_restlen; |
| - ptr -= conv_restlen; |
| - } |
| - else |
| - { |
| - /* Replace the trailing bytes with the |
| - * replacement character. */ |
| - size = 1; |
| - *--ptr = bad_char_behavior; |
| - } |
| - conv_restlen = 0; |
| } |
| } |
| #endif |
| --- 1339,1344 ---- |
| |
| *** 1397,1402 **** |
| --- 1420,1430 ---- |
| goto retry; |
| } |
| } |
| + |
| + /* Include not converted bytes. */ |
| + ptr -= conv_restlen; |
| + size += conv_restlen; |
| + conv_restlen = 0; |
| #endif |
| /* |
| * Break here for a read error or end-of-file. |
| |
| *** 1406,1416 **** |
| |
| #ifdef FEAT_MBYTE |
| |
| - /* Include not converted bytes. */ |
| - ptr -= conv_restlen; |
| - size += conv_restlen; |
| - conv_restlen = 0; |
| - |
| # ifdef USE_ICONV |
| if (iconv_fd != (iconv_t)-1) |
| { |
| --- 1434,1439 ---- |
| |
| *** 1872,1883 **** |
| size = (long)((ptr + real_size) - dest); |
| ptr = dest; |
| } |
| ! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin) |
| { |
| ! /* Reading UTF-8: Check if the bytes are valid UTF-8. |
| ! * Need to start before "ptr" when part of the character was |
| ! * read in the previous read() call. */ |
| ! for (p = ptr - utf_head_off(buffer, ptr); ; ++p) |
| { |
| int todo = (int)((ptr + size) - p); |
| int l; |
| --- 1895,1906 ---- |
| size = (long)((ptr + real_size) - dest); |
| ptr = dest; |
| } |
| ! else if (enc_utf8 && !curbuf->b_p_bin) |
| { |
| ! int incomplete_tail = FALSE; |
| ! |
| ! /* Reading UTF-8: Check if the bytes are valid UTF-8. */ |
| ! for (p = ptr; ; ++p) |
| { |
| int todo = (int)((ptr + size) - p); |
| int l; |
| |
| *** 1891,1933 **** |
| * read() will get the next bytes, we'll check it |
| * then. */ |
| l = utf_ptr2len_len(p, todo); |
| ! if (l > todo) |
| { |
| ! /* Incomplete byte sequence, the next read() |
| ! * should get them and check the bytes. */ |
| ! p += todo; |
| ! break; |
| } |
| ! if (l == 1) |
| { |
| /* Illegal byte. If we can try another encoding |
| ! * do that. */ |
| ! if (can_retry) |
| break; |
| - |
| - /* Remember the first linenr with an illegal byte */ |
| - if (illegal_byte == 0) |
| - illegal_byte = readfile_linenr(linecnt, ptr, p); |
| # ifdef USE_ICONV |
| /* When we did a conversion report an error. */ |
| if (iconv_fd != (iconv_t)-1 && conv_error == 0) |
| conv_error = readfile_linenr(linecnt, ptr, p); |
| # endif |
| |
| /* Drop, keep or replace the bad byte. */ |
| if (bad_char_behavior == BAD_DROP) |
| { |
| ! mch_memmove(p, p+1, todo - 1); |
| --p; |
| --size; |
| } |
| else if (bad_char_behavior != BAD_KEEP) |
| *p = bad_char_behavior; |
| } |
| ! p += l - 1; |
| } |
| } |
| ! if (p < ptr + size) |
| { |
| /* Detected a UTF-8 error. */ |
| rewind_retry: |
| --- 1914,1969 ---- |
| * read() will get the next bytes, we'll check it |
| * then. */ |
| l = utf_ptr2len_len(p, todo); |
| ! if (l > todo && !incomplete_tail) |
| { |
| ! /* Avoid retrying with a different encoding when |
| ! * a truncated file is more likely, or attempting |
| ! * to read the rest of an incomplete sequence when |
| ! * we have already done so. */ |
| ! if (p > ptr || filesize > 0) |
| ! incomplete_tail = TRUE; |
| ! /* Incomplete byte sequence, move it to conv_rest[] |
| ! * and try to read the rest of it, unless we've |
| ! * already done so. */ |
| ! if (p > ptr) |
| ! { |
| ! conv_restlen = todo; |
| ! mch_memmove(conv_rest, p, conv_restlen); |
| ! size -= conv_restlen; |
| ! break; |
| ! } |
| } |
| ! if (l == 1 || l > todo) |
| { |
| /* Illegal byte. If we can try another encoding |
| ! * do that, unless at EOF where a truncated |
| ! * file is more likely than a conversion error. */ |
| ! if (can_retry && !incomplete_tail) |
| break; |
| # ifdef USE_ICONV |
| /* When we did a conversion report an error. */ |
| if (iconv_fd != (iconv_t)-1 && conv_error == 0) |
| conv_error = readfile_linenr(linecnt, ptr, p); |
| # endif |
| + /* Remember the first linenr with an illegal byte */ |
| + if (conv_error == 0 && illegal_byte == 0) |
| + illegal_byte = readfile_linenr(linecnt, ptr, p); |
| |
| /* Drop, keep or replace the bad byte. */ |
| if (bad_char_behavior == BAD_DROP) |
| { |
| ! mch_memmove(p, p + 1, todo - 1); |
| --p; |
| --size; |
| } |
| else if (bad_char_behavior != BAD_KEEP) |
| *p = bad_char_behavior; |
| } |
| ! else |
| ! p += l - 1; |
| } |
| } |
| ! if (p < ptr + size && !incomplete_tail) |
| { |
| /* Detected a UTF-8 error. */ |
| rewind_retry: |
| |
| |
| |
| *** 668,669 **** |
| --- 673,676 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 310, |
| /**/ |
| |
| -- |
| Normal people believe that if it ain't broke, don't fix it. Engineers believe |
| that if it ain't broke, it doesn't have enough features yet. |
| (Scott Adams - The Dilbert principle) |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ download, build and distribute -- http://www.A-A-P.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |