| To: vim_dev@googlegroups.com |
| Subject: Patch 7.4.088 |
| Fcc: outbox |
| From: Bram Moolenaar <Bram@moolenaar.net> |
| Mime-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| |
| Patch 7.4.088 |
| Problem: When spell checking is enabled Asian characters are always marked |
| as error. |
| Solution: When 'spelllang' contains "cjk" do not mark Asian characters as |
| error. (Ken Takata) |
| Files: runtime/doc/options.txt, runtime/doc/spell.txt, src/mbyte.c, |
| src/option.c, src/spell.c, src/structs.h |
| |
| |
| |
| |
| |
| *** 6555,6560 **** |
| --- 6555,6563 ---- |
| region by listing them: "en_us,en_ca" supports both US and Canadian |
| English, but not words specific for Australia, New Zealand or Great |
| Britain. |
| + If the name "cjk" is included East Asian characters are excluded from |
| + spell checking. This is useful when editing text that also has Asian |
| + words. |
| *E757* |
| As a special case the name of a .spl file can be given as-is. The |
| first "_xx" in the name is removed and used as the region name |
| |
| |
| |
| *** 269,274 **** |
| --- 269,281 ---- |
| latin1 yi transliterated Yiddish |
| utf-8 yi-tr transliterated Yiddish |
| |
| + *spell-cjk* |
| + Chinese, Japanese and other East Asian characters are normally marked as |
| + errors, because spell checking of these characters is not supported. If |
| + 'spelllang' includes "cjk", these characters are not marked as errors. This |
| + is useful when editing text with spell checking while some Asian words are |
| + present. |
| + |
| |
| SPELL FILES *spell-load* |
| |
| |
| |
| |
| *** 947,954 **** |
| { |
| case 0x2121: /* ZENKAKU space */ |
| return 0; |
| ! case 0x2122: /* KU-TEN (Japanese comma) */ |
| ! case 0x2123: /* TOU-TEN (Japanese period) */ |
| case 0x2124: /* ZENKAKU comma */ |
| case 0x2125: /* ZENKAKU period */ |
| return 1; |
| --- 947,954 ---- |
| { |
| case 0x2121: /* ZENKAKU space */ |
| return 0; |
| ! case 0x2122: /* TOU-TEN (Japanese comma) */ |
| ! case 0x2123: /* KU-TEN (Japanese period) */ |
| case 0x2124: /* ZENKAKU comma */ |
| case 0x2125: /* ZENKAKU period */ |
| return 1; |
| |
| *** 2477,2485 **** |
| /* sorted list of non-overlapping intervals */ |
| static struct clinterval |
| { |
| ! unsigned short first; |
| ! unsigned short last; |
| ! unsigned short class; |
| } classes[] = |
| { |
| {0x037e, 0x037e, 1}, /* Greek question mark */ |
| --- 2477,2485 ---- |
| /* sorted list of non-overlapping intervals */ |
| static struct clinterval |
| { |
| ! unsigned int first; |
| ! unsigned int last; |
| ! unsigned int class; |
| } classes[] = |
| { |
| {0x037e, 0x037e, 1}, /* Greek question mark */ |
| |
| *** 2544,2549 **** |
| --- 2544,2553 ---- |
| {0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */ |
| {0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */ |
| {0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */ |
| + {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ |
| + {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ |
| + {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ |
| + {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ |
| }; |
| int bot = 0; |
| int top = sizeof(classes) / sizeof(struct clinterval) - 1; |
| |
| *** 2563,2571 **** |
| while (top >= bot) |
| { |
| mid = (bot + top) / 2; |
| ! if (classes[mid].last < c) |
| bot = mid + 1; |
| ! else if (classes[mid].first > c) |
| top = mid - 1; |
| else |
| return (int)classes[mid].class; |
| --- 2567,2575 ---- |
| while (top >= bot) |
| { |
| mid = (bot + top) / 2; |
| ! if (classes[mid].last < (unsigned int)c) |
| bot = mid + 1; |
| ! else if (classes[mid].first > (unsigned int)c) |
| top = mid - 1; |
| else |
| return (int)classes[mid].class; |
| |
| |
| |
| *** 7122,7127 **** |
| --- 7122,7132 ---- |
| if (varp == &(curwin->w_s->b_p_spl)) |
| { |
| char_u fname[200]; |
| + char_u *q = curwin->w_s->b_p_spl; |
| + |
| + /* Skip the first name if it is "cjk". */ |
| + if (STRNCMP(q, "cjk,", 4) == 0) |
| + q += 4; |
| |
| /* |
| * Source the spell/LANG.vim in 'runtimepath'. |
| |
| *** 7129,7139 **** |
| * Use the first name in 'spelllang' up to '_region' or |
| * '.encoding'. |
| */ |
| ! for (p = curwin->w_s->b_p_spl; *p != NUL; ++p) |
| if (vim_strchr((char_u *)"_.,", *p) != NULL) |
| break; |
| ! vim_snprintf((char *)fname, 200, "spell/%.*s.vim", |
| ! (int)(p - curwin->w_s->b_p_spl), curwin->w_s->b_p_spl); |
| source_runtime(fname, TRUE); |
| } |
| #endif |
| --- 7134,7143 ---- |
| * Use the first name in 'spelllang' up to '_region' or |
| * '.encoding'. |
| */ |
| ! for (p = q; *p != NUL; ++p) |
| if (vim_strchr((char_u *)"_.,", *p) != NULL) |
| break; |
| ! vim_snprintf((char *)fname, 200, "spell/%.*s.vim", (int)(p - q), q); |
| source_runtime(fname, TRUE); |
| } |
| #endif |
| |
| |
| |
| *** 754,762 **** |
| static void clear_spell_chartab __ARGS((spelltab_T *sp)); |
| static int set_spell_finish __ARGS((spelltab_T *new_st)); |
| static int spell_iswordp __ARGS((char_u *p, win_T *wp)); |
| ! static int spell_iswordp_nmw __ARGS((char_u *p)); |
| #ifdef FEAT_MBYTE |
| ! static int spell_mb_isword_class __ARGS((int cl)); |
| static int spell_iswordp_w __ARGS((int *p, win_T *wp)); |
| #endif |
| static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); |
| --- 754,762 ---- |
| static void clear_spell_chartab __ARGS((spelltab_T *sp)); |
| static int set_spell_finish __ARGS((spelltab_T *new_st)); |
| static int spell_iswordp __ARGS((char_u *p, win_T *wp)); |
| ! static int spell_iswordp_nmw __ARGS((char_u *p, win_T *wp)); |
| #ifdef FEAT_MBYTE |
| ! static int spell_mb_isword_class __ARGS((int cl, win_T *wp)); |
| static int spell_iswordp_w __ARGS((int *p, win_T *wp)); |
| #endif |
| static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); |
| |
| *** 1149,1155 **** |
| |
| /* When we are at a non-word character there is no error, just |
| * skip over the character (try looking for a word after it). */ |
| ! else if (!spell_iswordp_nmw(ptr)) |
| { |
| if (capcol != NULL && wp->w_s->b_cap_prog != NULL) |
| { |
| --- 1149,1155 ---- |
| |
| /* When we are at a non-word character there is no error, just |
| * skip over the character (try looking for a word after it). */ |
| ! else if (!spell_iswordp_nmw(ptr, wp)) |
| { |
| if (capcol != NULL && wp->w_s->b_cap_prog != NULL) |
| { |
| |
| *** 1561,1567 **** |
| * accept a no-caps word, even when the dictionary |
| * word specifies ONECAP. */ |
| mb_ptr_back(mip->mi_word, p); |
| ! if (spell_iswordp_nmw(p) |
| ? capflags == WF_ONECAP |
| : (flags & WF_ONECAP) != 0 |
| && capflags != WF_ONECAP) |
| --- 1561,1567 ---- |
| * accept a no-caps word, even when the dictionary |
| * word specifies ONECAP. */ |
| mb_ptr_back(mip->mi_word, p); |
| ! if (spell_iswordp_nmw(p, mip->mi_win) |
| ? capflags == WF_ONECAP |
| : (flags & WF_ONECAP) != 0 |
| && capflags != WF_ONECAP) |
| |
| *** 4234,4240 **** |
| if (spl_copy == NULL) |
| goto theend; |
| |
| ! /* loop over comma separated language names. */ |
| for (splp = spl_copy; *splp != NUL; ) |
| { |
| /* Get one language name. */ |
| --- 4234,4242 ---- |
| if (spl_copy == NULL) |
| goto theend; |
| |
| ! wp->w_s->b_cjk = 0; |
| ! |
| ! /* Loop over comma separated language names. */ |
| for (splp = spl_copy; *splp != NUL; ) |
| { |
| /* Get one language name. */ |
| |
| *** 4242,4247 **** |
| --- 4244,4255 ---- |
| region = NULL; |
| len = (int)STRLEN(lang); |
| |
| + if (STRCMP(lang, "cjk") == 0) |
| + { |
| + wp->w_s->b_cjk = 1; |
| + continue; |
| + } |
| + |
| /* If the name ends in ".spl" use it as the name of the spell file. |
| * If there is a region name let "region" point to it and remove it |
| * from the name. */ |
| |
| *** 4601,4607 **** |
| int past_second = FALSE; /* past second word char */ |
| |
| /* find first letter */ |
| ! for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) |
| if (end == NULL ? *p == NUL : p >= end) |
| return 0; /* only non-word characters, illegal word */ |
| #ifdef FEAT_MBYTE |
| --- 4609,4615 ---- |
| int past_second = FALSE; /* past second word char */ |
| |
| /* find first letter */ |
| ! for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p)) |
| if (end == NULL ? *p == NUL : p >= end) |
| return 0; /* only non-word characters, illegal word */ |
| #ifdef FEAT_MBYTE |
| |
| *** 4617,4623 **** |
| * But a word with an upper char only at start is a ONECAP. |
| */ |
| for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) |
| ! if (spell_iswordp_nmw(p)) |
| { |
| c = PTR2CHAR(p); |
| if (!SPELL_ISUPPER(c)) |
| --- 4625,4631 ---- |
| * But a word with an upper char only at start is a ONECAP. |
| */ |
| for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) |
| ! if (spell_iswordp_nmw(p, curwin)) |
| { |
| c = PTR2CHAR(p); |
| if (!SPELL_ISUPPER(c)) |
| |
| *** 9907,9913 **** |
| |
| c = mb_ptr2char(s); |
| if (c > 255) |
| ! return spell_mb_isword_class(mb_get_class(s)); |
| return spelltab.st_isw[c]; |
| } |
| #endif |
| --- 9915,9921 ---- |
| |
| c = mb_ptr2char(s); |
| if (c > 255) |
| ! return spell_mb_isword_class(mb_get_class(s), wp); |
| return spelltab.st_isw[c]; |
| } |
| #endif |
| |
| *** 9920,9927 **** |
| * Unlike spell_iswordp() this doesn't check for "midword" characters. |
| */ |
| static int |
| ! spell_iswordp_nmw(p) |
| char_u *p; |
| { |
| #ifdef FEAT_MBYTE |
| int c; |
| --- 9928,9936 ---- |
| * Unlike spell_iswordp() this doesn't check for "midword" characters. |
| */ |
| static int |
| ! spell_iswordp_nmw(p, wp) |
| char_u *p; |
| + win_T *wp; |
| { |
| #ifdef FEAT_MBYTE |
| int c; |
| |
| *** 9930,9936 **** |
| { |
| c = mb_ptr2char(p); |
| if (c > 255) |
| ! return spell_mb_isword_class(mb_get_class(p)); |
| return spelltab.st_isw[c]; |
| } |
| #endif |
| --- 9939,9945 ---- |
| { |
| c = mb_ptr2char(p); |
| if (c > 255) |
| ! return spell_mb_isword_class(mb_get_class(p), wp); |
| return spelltab.st_isw[c]; |
| } |
| #endif |
| |
| *** 9942,9952 **** |
| * Return TRUE if word class indicates a word character. |
| * Only for characters above 255. |
| * Unicode subscript and superscript are not considered word characters. |
| */ |
| static int |
| ! spell_mb_isword_class(cl) |
| ! int cl; |
| { |
| return cl >= 2 && cl != 0x2070 && cl != 0x2080; |
| } |
| |
| --- 9951,9966 ---- |
| * Return TRUE if word class indicates a word character. |
| * Only for characters above 255. |
| * Unicode subscript and superscript are not considered word characters. |
| + * See also dbcs_class() and utf_class() in mbyte.c. |
| */ |
| static int |
| ! spell_mb_isword_class(cl, wp) |
| ! int cl; |
| ! win_T *wp; |
| { |
| + if (wp->w_s->b_cjk) |
| + /* East Asian characters are not considered word characters. */ |
| + return cl == 2 || cl == 0x2800; |
| return cl >= 2 && cl != 0x2070 && cl != 0x2080; |
| } |
| |
| |
| *** 9971,9979 **** |
| if (*s > 255) |
| { |
| if (enc_utf8) |
| ! return spell_mb_isword_class(utf_class(*s)); |
| if (enc_dbcs) |
| ! return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; |
| return 0; |
| } |
| return spelltab.st_isw[*s]; |
| --- 9985,9994 ---- |
| if (*s > 255) |
| { |
| if (enc_utf8) |
| ! return spell_mb_isword_class(utf_class(*s), wp); |
| if (enc_dbcs) |
| ! return spell_mb_isword_class( |
| ! dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); |
| return 0; |
| } |
| return spelltab.st_isw[*s]; |
| |
| *** 10193,10205 **** |
| line = ml_get_curline(); |
| p = line + curwin->w_cursor.col; |
| /* Backup to before start of word. */ |
| ! while (p > line && spell_iswordp_nmw(p)) |
| mb_ptr_back(line, p); |
| /* Forward to start of word. */ |
| ! while (*p != NUL && !spell_iswordp_nmw(p)) |
| mb_ptr_adv(p); |
| |
| ! if (!spell_iswordp_nmw(p)) /* No word found. */ |
| { |
| beep_flush(); |
| return; |
| --- 10208,10220 ---- |
| line = ml_get_curline(); |
| p = line + curwin->w_cursor.col; |
| /* Backup to before start of word. */ |
| ! while (p > line && spell_iswordp_nmw(p, curwin)) |
| mb_ptr_back(line, p); |
| /* Forward to start of word. */ |
| ! while (*p != NUL && !spell_iswordp_nmw(p, curwin)) |
| mb_ptr_adv(p); |
| |
| ! if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ |
| { |
| beep_flush(); |
| return; |
| |
| *** 10436,10442 **** |
| for (;;) |
| { |
| mb_ptr_back(line, p); |
| ! if (p == line || spell_iswordp_nmw(p)) |
| break; |
| if (vim_regexec(®match, p, 0) |
| && regmatch.endp[0] == line + endcol) |
| --- 10451,10457 ---- |
| for (;;) |
| { |
| mb_ptr_back(line, p); |
| ! if (p == line || spell_iswordp_nmw(p, curwin)) |
| break; |
| if (vim_regexec(®match, p, 0) |
| && regmatch.endp[0] == line + endcol) |
| |
| *** 11645,11651 **** |
| |
| /* When appending a compound word after a word character don't |
| * use Onecap. */ |
| ! if (p != NULL && spell_iswordp_nmw(p)) |
| c &= ~WF_ONECAP; |
| make_case_word(tword + sp->ts_splitoff, |
| preword + sp->ts_prewordlen, c); |
| --- 11660,11666 ---- |
| |
| /* When appending a compound word after a word character don't |
| * use Onecap. */ |
| ! if (p != NULL && spell_iswordp_nmw(p, curwin)) |
| c &= ~WF_ONECAP; |
| make_case_word(tword + sp->ts_splitoff, |
| preword + sp->ts_prewordlen, c); |
| |
| *** 11895,11901 **** |
| * character when the word ends. But only when the |
| * good word can end. */ |
| if (((!try_compound && !spell_iswordp_nmw(fword |
| ! + sp->ts_fidx)) |
| || fword_ends) |
| && fword[sp->ts_fidx] != NUL |
| && goodword_ends) |
| --- 11910,11917 ---- |
| * character when the word ends. But only when the |
| * good word can end. */ |
| if (((!try_compound && !spell_iswordp_nmw(fword |
| ! + sp->ts_fidx, |
| ! curwin)) |
| || fword_ends) |
| && fword[sp->ts_fidx] != NUL |
| && goodword_ends) |
| |
| *** 14226,14232 **** |
| } |
| else |
| { |
| ! if (spell_iswordp_nmw(s)) |
| *t++ = *s; |
| ++s; |
| } |
| --- 14242,14248 ---- |
| } |
| else |
| { |
| ! if (spell_iswordp_nmw(s, curwin)) |
| *t++ = *s; |
| ++s; |
| } |
| |
| *** 14521,14527 **** |
| else |
| { |
| did_white = FALSE; |
| ! if (!spell_iswordp_nmw(t)) |
| continue; |
| } |
| } |
| --- 14537,14543 ---- |
| else |
| { |
| did_white = FALSE; |
| ! if (!spell_iswordp_nmw(t, curwin)) |
| continue; |
| } |
| } |
| |
| *** 16045,16051 **** |
| for (p = line + startcol; p > line; ) |
| { |
| mb_ptr_back(line, p); |
| ! if (spell_iswordp_nmw(p)) |
| break; |
| } |
| |
| --- 16061,16067 ---- |
| for (p = line + startcol; p > line; ) |
| { |
| mb_ptr_back(line, p); |
| ! if (spell_iswordp_nmw(p, curwin)) |
| break; |
| } |
| |
| |
| |
| |
| *** 1310,1315 **** |
| --- 1310,1318 ---- |
| regprog_T *b_cap_prog; /* program for 'spellcapcheck' */ |
| char_u *b_p_spf; /* 'spellfile' */ |
| char_u *b_p_spl; /* 'spelllang' */ |
| + # ifdef FEAT_MBYTE |
| + int b_cjk; /* all CJK letters as OK */ |
| + # endif |
| #endif |
| #if !defined(FEAT_SYN_HL) && !defined(FEAT_SPELL) |
| int dummy; |
| |
| |
| |
| *** 740,741 **** |
| --- 740,743 ---- |
| { /* Add new patch number below this line */ |
| + /**/ |
| + 88, |
| /**/ |
| |
| -- |
| THEOREM: VI is perfect. |
| PROOF: VI in roman numerals is 6. The natural numbers < 6 which divide 6 are |
| 1, 2, and 3. 1+2+3 = 6. So 6 is a perfect number. Therefore, VI is perfect. |
| QED |
| -- Arthur Tateishi |
| |
| /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ |
| /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
| \\\ an exciting new programming language -- http://www.Zimbu.org /// |
| \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |