To: vim_dev@googlegroups.com Subject: Patch 7.4.088 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.4.088 Problem: When spell checking is enabled Asian characters are always marked as error. Solution: When 'spelllang' contains "cjk" do not mark Asian characters as error. (Ken Takata) Files: runtime/doc/options.txt, runtime/doc/spell.txt, src/mbyte.c, src/option.c, src/spell.c, src/structs.h *** ../vim-7.4.087/runtime/doc/options.txt 2013-11-06 05:26:08.000000000 +0100 --- runtime/doc/options.txt 2013-11-12 04:00:51.000000000 +0100 *************** *** 6555,6560 **** --- 6555,6563 ---- region by listing them: "en_us,en_ca" supports both US and Canadian English, but not words specific for Australia, New Zealand or Great Britain. + If the name "cjk" is included East Asian characters are excluded from + spell checking. This is useful when editing text that also has Asian + words. *E757* As a special case the name of a .spl file can be given as-is. The first "_xx" in the name is removed and used as the region name *** ../vim-7.4.087/runtime/doc/spell.txt 2013-08-10 13:25:01.000000000 +0200 --- runtime/doc/spell.txt 2013-11-12 04:02:27.000000000 +0100 *************** *** 269,274 **** --- 269,281 ---- latin1 yi transliterated Yiddish utf-8 yi-tr transliterated Yiddish + *spell-cjk* + Chinese, Japanese and other East Asian characters are normally marked as + errors, because spell checking of these characters is not supported. If + 'spelllang' includes "cjk", these characters are not marked as errors. This + is useful when editing text with spell checking while some Asian words are + present. + SPELL FILES *spell-load* *** ../vim-7.4.087/src/mbyte.c 2013-07-05 20:07:21.000000000 +0200 --- src/mbyte.c 2013-11-12 03:55:50.000000000 +0100 *************** *** 947,954 **** { case 0x2121: /* ZENKAKU space */ return 0; ! case 0x2122: /* KU-TEN (Japanese comma) */ ! case 0x2123: /* TOU-TEN (Japanese period) */ case 0x2124: /* ZENKAKU comma */ case 0x2125: /* ZENKAKU period */ return 1; --- 947,954 ---- { case 0x2121: /* ZENKAKU space */ return 0; ! case 0x2122: /* TOU-TEN (Japanese comma) */ ! case 0x2123: /* KU-TEN (Japanese period) */ case 0x2124: /* ZENKAKU comma */ case 0x2125: /* ZENKAKU period */ return 1; *************** *** 2477,2485 **** /* sorted list of non-overlapping intervals */ static struct clinterval { ! unsigned short first; ! unsigned short last; ! unsigned short class; } classes[] = { {0x037e, 0x037e, 1}, /* Greek question mark */ --- 2477,2485 ---- /* sorted list of non-overlapping intervals */ static struct clinterval { ! unsigned int first; ! unsigned int last; ! unsigned int class; } classes[] = { {0x037e, 0x037e, 1}, /* Greek question mark */ *************** *** 2544,2549 **** --- 2544,2553 ---- {0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */ {0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */ {0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */ + {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ + {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ + {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ + {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ }; int bot = 0; int top = sizeof(classes) / sizeof(struct clinterval) - 1; *************** *** 2563,2571 **** while (top >= bot) { mid = (bot + top) / 2; ! if (classes[mid].last < c) bot = mid + 1; ! else if (classes[mid].first > c) top = mid - 1; else return (int)classes[mid].class; --- 2567,2575 ---- while (top >= bot) { mid = (bot + top) / 2; ! if (classes[mid].last < (unsigned int)c) bot = mid + 1; ! else if (classes[mid].first > (unsigned int)c) top = mid - 1; else return (int)classes[mid].class; *** ../vim-7.4.087/src/option.c 2013-11-08 04:30:06.000000000 +0100 --- src/option.c 2013-11-12 04:34:46.000000000 +0100 *************** *** 7122,7127 **** --- 7122,7132 ---- if (varp == &(curwin->w_s->b_p_spl)) { char_u fname[200]; + char_u *q = curwin->w_s->b_p_spl; + + /* Skip the first name if it is "cjk". */ + if (STRNCMP(q, "cjk,", 4) == 0) + q += 4; /* * Source the spell/LANG.vim in 'runtimepath'. *************** *** 7129,7139 **** * Use the first name in 'spelllang' up to '_region' or * '.encoding'. */ ! for (p = curwin->w_s->b_p_spl; *p != NUL; ++p) if (vim_strchr((char_u *)"_.,", *p) != NULL) break; ! vim_snprintf((char *)fname, 200, "spell/%.*s.vim", ! (int)(p - curwin->w_s->b_p_spl), curwin->w_s->b_p_spl); source_runtime(fname, TRUE); } #endif --- 7134,7143 ---- * Use the first name in 'spelllang' up to '_region' or * '.encoding'. */ ! for (p = q; *p != NUL; ++p) if (vim_strchr((char_u *)"_.,", *p) != NULL) break; ! vim_snprintf((char *)fname, 200, "spell/%.*s.vim", (int)(p - q), q); source_runtime(fname, TRUE); } #endif *** ../vim-7.4.087/src/spell.c 2013-09-29 13:38:25.000000000 +0200 --- src/spell.c 2013-11-12 04:37:33.000000000 +0100 *************** *** 754,762 **** static void clear_spell_chartab __ARGS((spelltab_T *sp)); static int set_spell_finish __ARGS((spelltab_T *new_st)); static int spell_iswordp __ARGS((char_u *p, win_T *wp)); ! static int spell_iswordp_nmw __ARGS((char_u *p)); #ifdef FEAT_MBYTE ! static int spell_mb_isword_class __ARGS((int cl)); static int spell_iswordp_w __ARGS((int *p, win_T *wp)); #endif static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); --- 754,762 ---- static void clear_spell_chartab __ARGS((spelltab_T *sp)); static int set_spell_finish __ARGS((spelltab_T *new_st)); static int spell_iswordp __ARGS((char_u *p, win_T *wp)); ! static int spell_iswordp_nmw __ARGS((char_u *p, win_T *wp)); #ifdef FEAT_MBYTE ! static int spell_mb_isword_class __ARGS((int cl, win_T *wp)); static int spell_iswordp_w __ARGS((int *p, win_T *wp)); #endif static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); *************** *** 1149,1155 **** /* When we are at a non-word character there is no error, just * skip over the character (try looking for a word after it). */ ! else if (!spell_iswordp_nmw(ptr)) { if (capcol != NULL && wp->w_s->b_cap_prog != NULL) { --- 1149,1155 ---- /* When we are at a non-word character there is no error, just * skip over the character (try looking for a word after it). */ ! else if (!spell_iswordp_nmw(ptr, wp)) { if (capcol != NULL && wp->w_s->b_cap_prog != NULL) { *************** *** 1561,1567 **** * accept a no-caps word, even when the dictionary * word specifies ONECAP. */ mb_ptr_back(mip->mi_word, p); ! if (spell_iswordp_nmw(p) ? capflags == WF_ONECAP : (flags & WF_ONECAP) != 0 && capflags != WF_ONECAP) --- 1561,1567 ---- * accept a no-caps word, even when the dictionary * word specifies ONECAP. */ mb_ptr_back(mip->mi_word, p); ! if (spell_iswordp_nmw(p, mip->mi_win) ? capflags == WF_ONECAP : (flags & WF_ONECAP) != 0 && capflags != WF_ONECAP) *************** *** 4234,4240 **** if (spl_copy == NULL) goto theend; ! /* loop over comma separated language names. */ for (splp = spl_copy; *splp != NUL; ) { /* Get one language name. */ --- 4234,4242 ---- if (spl_copy == NULL) goto theend; ! wp->w_s->b_cjk = 0; ! ! /* Loop over comma separated language names. */ for (splp = spl_copy; *splp != NUL; ) { /* Get one language name. */ *************** *** 4242,4247 **** --- 4244,4255 ---- region = NULL; len = (int)STRLEN(lang); + if (STRCMP(lang, "cjk") == 0) + { + wp->w_s->b_cjk = 1; + continue; + } + /* If the name ends in ".spl" use it as the name of the spell file. * If there is a region name let "region" point to it and remove it * from the name. */ *************** *** 4601,4607 **** int past_second = FALSE; /* past second word char */ /* find first letter */ ! for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) if (end == NULL ? *p == NUL : p >= end) return 0; /* only non-word characters, illegal word */ #ifdef FEAT_MBYTE --- 4609,4615 ---- int past_second = FALSE; /* past second word char */ /* find first letter */ ! for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p)) if (end == NULL ? *p == NUL : p >= end) return 0; /* only non-word characters, illegal word */ #ifdef FEAT_MBYTE *************** *** 4617,4623 **** * But a word with an upper char only at start is a ONECAP. */ for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) ! if (spell_iswordp_nmw(p)) { c = PTR2CHAR(p); if (!SPELL_ISUPPER(c)) --- 4625,4631 ---- * But a word with an upper char only at start is a ONECAP. */ for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) ! if (spell_iswordp_nmw(p, curwin)) { c = PTR2CHAR(p); if (!SPELL_ISUPPER(c)) *************** *** 9907,9913 **** c = mb_ptr2char(s); if (c > 255) ! return spell_mb_isword_class(mb_get_class(s)); return spelltab.st_isw[c]; } #endif --- 9915,9921 ---- c = mb_ptr2char(s); if (c > 255) ! return spell_mb_isword_class(mb_get_class(s), wp); return spelltab.st_isw[c]; } #endif *************** *** 9920,9927 **** * Unlike spell_iswordp() this doesn't check for "midword" characters. */ static int ! spell_iswordp_nmw(p) char_u *p; { #ifdef FEAT_MBYTE int c; --- 9928,9936 ---- * Unlike spell_iswordp() this doesn't check for "midword" characters. */ static int ! spell_iswordp_nmw(p, wp) char_u *p; + win_T *wp; { #ifdef FEAT_MBYTE int c; *************** *** 9930,9936 **** { c = mb_ptr2char(p); if (c > 255) ! return spell_mb_isword_class(mb_get_class(p)); return spelltab.st_isw[c]; } #endif --- 9939,9945 ---- { c = mb_ptr2char(p); if (c > 255) ! return spell_mb_isword_class(mb_get_class(p), wp); return spelltab.st_isw[c]; } #endif *************** *** 9942,9952 **** * Return TRUE if word class indicates a word character. * Only for characters above 255. * Unicode subscript and superscript are not considered word characters. */ static int ! spell_mb_isword_class(cl) ! int cl; { return cl >= 2 && cl != 0x2070 && cl != 0x2080; } --- 9951,9966 ---- * Return TRUE if word class indicates a word character. * Only for characters above 255. * Unicode subscript and superscript are not considered word characters. + * See also dbcs_class() and utf_class() in mbyte.c. */ static int ! spell_mb_isword_class(cl, wp) ! int cl; ! win_T *wp; { + if (wp->w_s->b_cjk) + /* East Asian characters are not considered word characters. */ + return cl == 2 || cl == 0x2800; return cl >= 2 && cl != 0x2070 && cl != 0x2080; } *************** *** 9971,9979 **** if (*s > 255) { if (enc_utf8) ! return spell_mb_isword_class(utf_class(*s)); if (enc_dbcs) ! return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; return 0; } return spelltab.st_isw[*s]; --- 9985,9994 ---- if (*s > 255) { if (enc_utf8) ! return spell_mb_isword_class(utf_class(*s), wp); if (enc_dbcs) ! return spell_mb_isword_class( ! dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); return 0; } return spelltab.st_isw[*s]; *************** *** 10193,10205 **** line = ml_get_curline(); p = line + curwin->w_cursor.col; /* Backup to before start of word. */ ! while (p > line && spell_iswordp_nmw(p)) mb_ptr_back(line, p); /* Forward to start of word. */ ! while (*p != NUL && !spell_iswordp_nmw(p)) mb_ptr_adv(p); ! if (!spell_iswordp_nmw(p)) /* No word found. */ { beep_flush(); return; --- 10208,10220 ---- line = ml_get_curline(); p = line + curwin->w_cursor.col; /* Backup to before start of word. */ ! while (p > line && spell_iswordp_nmw(p, curwin)) mb_ptr_back(line, p); /* Forward to start of word. */ ! while (*p != NUL && !spell_iswordp_nmw(p, curwin)) mb_ptr_adv(p); ! if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ { beep_flush(); return; *************** *** 10436,10442 **** for (;;) { mb_ptr_back(line, p); ! if (p == line || spell_iswordp_nmw(p)) break; if (vim_regexec(®match, p, 0) && regmatch.endp[0] == line + endcol) --- 10451,10457 ---- for (;;) { mb_ptr_back(line, p); ! if (p == line || spell_iswordp_nmw(p, curwin)) break; if (vim_regexec(®match, p, 0) && regmatch.endp[0] == line + endcol) *************** *** 11645,11651 **** /* When appending a compound word after a word character don't * use Onecap. */ ! if (p != NULL && spell_iswordp_nmw(p)) c &= ~WF_ONECAP; make_case_word(tword + sp->ts_splitoff, preword + sp->ts_prewordlen, c); --- 11660,11666 ---- /* When appending a compound word after a word character don't * use Onecap. */ ! if (p != NULL && spell_iswordp_nmw(p, curwin)) c &= ~WF_ONECAP; make_case_word(tword + sp->ts_splitoff, preword + sp->ts_prewordlen, c); *************** *** 11895,11901 **** * character when the word ends. But only when the * good word can end. */ if (((!try_compound && !spell_iswordp_nmw(fword ! + sp->ts_fidx)) || fword_ends) && fword[sp->ts_fidx] != NUL && goodword_ends) --- 11910,11917 ---- * character when the word ends. But only when the * good word can end. */ if (((!try_compound && !spell_iswordp_nmw(fword ! + sp->ts_fidx, ! curwin)) || fword_ends) && fword[sp->ts_fidx] != NUL && goodword_ends) *************** *** 14226,14232 **** } else { ! if (spell_iswordp_nmw(s)) *t++ = *s; ++s; } --- 14242,14248 ---- } else { ! if (spell_iswordp_nmw(s, curwin)) *t++ = *s; ++s; } *************** *** 14521,14527 **** else { did_white = FALSE; ! if (!spell_iswordp_nmw(t)) continue; } } --- 14537,14543 ---- else { did_white = FALSE; ! if (!spell_iswordp_nmw(t, curwin)) continue; } } *************** *** 16045,16051 **** for (p = line + startcol; p > line; ) { mb_ptr_back(line, p); ! if (spell_iswordp_nmw(p)) break; } --- 16061,16067 ---- for (p = line + startcol; p > line; ) { mb_ptr_back(line, p); ! if (spell_iswordp_nmw(p, curwin)) break; } *** ../vim-7.4.087/src/structs.h 2013-11-09 05:30:18.000000000 +0100 --- src/structs.h 2013-11-12 03:55:50.000000000 +0100 *************** *** 1310,1315 **** --- 1310,1318 ---- regprog_T *b_cap_prog; /* program for 'spellcapcheck' */ char_u *b_p_spf; /* 'spellfile' */ char_u *b_p_spl; /* 'spelllang' */ + # ifdef FEAT_MBYTE + int b_cjk; /* all CJK letters as OK */ + # endif #endif #if !defined(FEAT_SYN_HL) && !defined(FEAT_SPELL) int dummy; *** ../vim-7.4.087/src/version.c 2013-11-11 23:17:31.000000000 +0100 --- src/version.c 2013-11-12 03:59:03.000000000 +0100 *************** *** 740,741 **** --- 740,743 ---- { /* Add new patch number below this line */ + /**/ + 88, /**/ -- THEOREM: VI is perfect. PROOF: VI in roman numerals is 6. The natural numbers < 6 which divide 6 are 1, 2, and 3. 1+2+3 = 6. So 6 is a perfect number. Therefore, VI is perfect. QED -- Arthur Tateishi /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///