From a241a9b727f03afe62a161a2662a0f1192fd523a Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Wed, 6 Oct 2021 14:33:59 -0400 Subject: [PATCH] cpp: Implement -Wbidirectional=[none|unpaired|any] This patch implements -Wbidirectional=[none|unpaired|any] to warn about possibly dangerous bidirectional characters. gcc/c-family/ChangeLog: * c.opt (Wbidirectional, Wbidirectional=): New option. gcc/ChangeLog: * doc/invoke.texi: Document -Wbidirectional. libcpp/ChangeLog: * include/cpplib.h (enum cpp_bidirectional_level): New. (struct cpp_options): Add cpp_warn_bidirectional. (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. * init.c (cpp_create_reader): Set cpp_warn_bidirectional. * lex.c (bidi): New namespace. (get_bidi_utf8): New function. (get_bidi_ucn): Likewise. (maybe_warn_bidi_on_close): Likewise. (maybe_warn_bidi_on_char): Likewise. (_cpp_skip_block_comment): Implement warning about bidirectional characters. (skip_line_comment): Likewise. (forms_identifier_p): Likewise. (lex_identifier): Likewise. (lex_string): Likewise. (lex_raw_string): Likewise. gcc/testsuite/ChangeLog: * c-c++-common/Wbidirectional-1.c: New test. * c-c++-common/Wbidirectional-2.c: New test. * c-c++-common/Wbidirectional-3.c: New test. * c-c++-common/Wbidirectional-4.c: New test. * c-c++-common/Wbidirectional-5.c: New test. * c-c++-common/Wbidirectional-6.c: New test. * c-c++-common/Wbidirectional-7.c: New test. * c-c++-common/Wbidirectional-8.c: New test. * c-c++-common/Wbidirectional-9.c: New test. * c-c++-common/Wbidirectional-10.c: New test. * c-c++-common/Wbidirectional-11.c: New test. * c-c++-common/Wbidirectional-12.c: New test. * c-c++-common/Wbidirectional-13.c: New test. --- gcc/c-family/c.opt | 24 ++ gcc/doc/invoke.texi | 19 +- gcc/testsuite/c-c++-common/Wbidirectional-1.c | 11 + .../c-c++-common/Wbidirectional-10.c | 27 ++ .../c-c++-common/Wbidirectional-11.c | 12 + .../c-c++-common/Wbidirectional-12.c | 18 + .../c-c++-common/Wbidirectional-13.c | 16 + gcc/testsuite/c-c++-common/Wbidirectional-2.c | 8 + gcc/testsuite/c-c++-common/Wbidirectional-3.c | 10 + gcc/testsuite/c-c++-common/Wbidirectional-4.c | 165 ++++++++ gcc/testsuite/c-c++-common/Wbidirectional-5.c | 165 ++++++++ gcc/testsuite/c-c++-common/Wbidirectional-6.c | 154 +++++++ gcc/testsuite/c-c++-common/Wbidirectional-7.c | 8 + gcc/testsuite/c-c++-common/Wbidirectional-8.c | 12 + gcc/testsuite/c-c++-common/Wbidirectional-9.c | 28 ++ libcpp/include/cpplib.h | 18 +- libcpp/init.c | 1 + libcpp/lex.c | 391 +++++++++++++++++- 18 files changed, 1072 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-1.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-10.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-11.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-12.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-13.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-2.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-3.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-4.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-5.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-6.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-7.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-8.c create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-9.c diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 06457ac739e..09391824676 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -374,6 +374,30 @@ Wbad-function-cast C ObjC Var(warn_bad_function_cast) Warning Warn about casting functions to incompatible types. +Wbidirectional +C ObjC C++ ObjC++ Warning Alias(Wbidirectional=,any,none) +; + +Wbidirectional= +C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) +-Wbidirectional=[none|unpaired|any] Warn about UTF-8 bidirectional characters. + +; Required for these enum values. +SourceInclude +cpplib.h + +Enum +Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidirectional%> not recognized) + +EnumValue +Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) + +EnumValue +Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) + +EnumValue +Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) + Wbool-compare C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn about boolean expression compared with an integer value different from true/false. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b64ec18ae46..e32858ce767 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -304,7 +304,9 @@ Objective-C and Objective-C++ Dialects}. -Warith-conversion @gol -Warray-bounds -Warray-bounds=@var{n} @gol -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol --Wno-attribute-warning -Wbool-compare -Wbool-operation @gol +-Wno-attribute-warning @gol +-Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol +-Wbool-compare -Wbool-operation @gol -Wno-builtin-declaration-mismatch @gol -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol -Wc11-c2x-compat @gol @@ -6758,6 +6760,21 @@ Attributes considered include @code{allo This is the default. You can disable these warnings with either @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. +@item -Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} +@opindex Wbidirectional= +@opindex Wbidirectional +@opindex Wno-bidirectional +Warn about UTF-8 bidirectional characters. Such characters can change +left-to-right writing direction into right-to-left (and vice versa), +which can cause confusion between the logical order and visual order. +This may be dangerous; for instance, it may seem that a piece of code +is not commented out, whereas it in fact is. + +There are three levels of warning supported by GCC@. The default is +@option{-Wbidirectional=unpaired}, which warns about improperly terminated +bidi contexts. @option{-Wbidirectional=none} turns the warning off. +@option{-Wbidirectional=any} warns about any use of bidirectional characters. + @item -Wbool-compare @opindex Wno-bool-compare @opindex Wbool-compare diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-1.c b/gcc/testsuite/c-c++-common/Wbidirectional-1.c new file mode 100644 index 00000000000..750de81fdd8 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ + +int main() { + int isAdmin = 0; + /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */ +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ + __builtin_printf("You are an admin.\n"); + /* end admins only ‮ { ⁦*/ +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ + return 0; +} diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-10.c b/gcc/testsuite/c-c++-common/Wbidirectional-10.c new file mode 100644 index 00000000000..cd4abeeefbd --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-10.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=unpaired" } */ +/* More nesting testing. */ + +/* RLE‫ LRI⁦ PDF‬ PDI⁩*/ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int LRE_\u202a_PDF_\u202c; +int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; +int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; +int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int FSI_\u2068; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int FSI_\u2068_PDI_\u2069; +int FSI_\u2068_FSI_\u2068_PDI_\u2069; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-11.c b/gcc/testsuite/c-c++-common/Wbidirectional-11.c new file mode 100644 index 00000000000..43d699acc64 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-11.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=unpaired" } */ +/* Test that we warn when mixing UCN and UTF-8. */ + +int LRE_‪_PDF_\u202c; +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ +int LRE_\u202a_PDF_‬_; +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ +const char *s1 = "LRE_‪_PDF_\u202c"; +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ +const char *s2 = "LRE_\u202a_PDF_‬"; +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-12.c b/gcc/testsuite/c-c++-common/Wbidirectional-12.c new file mode 100644 index 00000000000..20d1566401a --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-12.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { c || c++11 } } } */ +/* { dg-options "-Wbidirectional=any" } */ +/* Test raw strings. */ + +const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)"; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)"; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ +const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)"; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ +const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)"; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ +const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z"; +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ +const char *s8 = R"(a b c PDI⁩ x y )z"; +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ +const char *s9 = R"(a b c PDF‬ x y z)"; +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-13.c b/gcc/testsuite/c-c++-common/Wbidirectional-13.c new file mode 100644 index 00000000000..08010e3b37b --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-13.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { c || c++11 } } } */ +/* { dg-options "-Wbidirectional=unpaired" } */ +/* Test raw strings. */ + +const char *s1 = R"(a b c LRE‪ 1 2 3)"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +const char *s2 = R"(a b c RLE‫ 1 2 3)"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +const char *s3 = R"(a b c LRO‭ 1 2 3)"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +const char *s4 = R"(a b c FSI⁨ 1 2 3)"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +const char *s5 = R"(a b c LRI⁦ 1 2 3)"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +const char *s6 = R"(a b c RLI⁧ 1 2 3)"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-2.c b/gcc/testsuite/c-c++-common/Wbidirectional-2.c new file mode 100644 index 00000000000..4e04202e058 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ + +int main() { + /* Say hello; newline⁧/*/ return 0 ; +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ + __builtin_printf("Hello world.\n"); + return 0; +} diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-3.c b/gcc/testsuite/c-c++-common/Wbidirectional-3.c new file mode 100644 index 00000000000..921300e94e0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-3.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ + +int main() { + const char* access_level = "user"; + if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) { +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ + __builtin_printf("You are an admin.\n"); + } + return 0; +} diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-4.c b/gcc/testsuite/c-c++-common/Wbidirectional-4.c new file mode 100644 index 00000000000..e6638aecc6a --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-4.c @@ -0,0 +1,165 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=any -Wno-multichar -Wno-overflow" } */ +/* Test all bidi chars in various contexts (identifiers, comments, + string literals, character constants), both UCN and UTF-8. The bidi + chars here are properly terminated, except for the character constants. */ + +/* a b c LRE‪ 1 2 3 PDF‬ x y z */ +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +/* a b c RLE‫ 1 2 3 PDF‬ x y z */ +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ +/* a b c LRO‭ 1 2 3 PDF‬ x y z */ +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ +/* a b c RLO‮ 1 2 3 PDF‬ x y z */ +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ +/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ +/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ +/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ + +/* Same but C++ comments instead. */ +// a b c LRE‪ 1 2 3 PDF‬ x y z +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +// a b c RLE‫ 1 2 3 PDF‬ x y z +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ +// a b c LRO‭ 1 2 3 PDF‬ x y z +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ +// a b c RLO‮ 1 2 3 PDF‬ x y z +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ +// a b c LRI⁦ 1 2 3 PDI⁩ x y z +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ +// a b c RLI⁧ 1 2 3 PDI⁩ x y +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ +// a b c FSI⁨ 1 2 3 PDI⁩ x y z +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ + +/* Here we're closing an unopened context, warn when =any. */ +/* a b c PDI⁩ x y z */ +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ +/* a b c PDF‬ x y z */ +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ +// a b c PDI⁩ x y z +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ +// a b c PDF‬ x y z +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ + +void +g1 () +{ + const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ + const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ + const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ + const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ + const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ + const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ + const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ + const char *s8 = "a b c PDI⁩ x y z"; +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ + const char *s9 = "a b c PDF‬ x y z"; +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ + + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ +} + +void +g2 () +{ + const char c1 = '\u202a'; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ + const char c2 = '\u202A'; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ + const char c3 = '\u202b'; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ + const char c4 = '\u202B'; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ + const char c5 = '\u202d'; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ + const char c6 = '\u202D'; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ + const char c7 = '\u202e'; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ + const char c8 = '\u202E'; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ + const char c9 = '\u2066'; +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ + const char c10 = '\u2067'; +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ + const char c11 = '\u2068'; +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ +} + +int a‪b‬c; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +int a‫b‬c; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ +int a‭b‬c; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ +int a‮b‬c; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ +int a⁦b⁩c; +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ +int a⁧b⁩c; +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ +int a⁨b⁩c; +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ +int A‬X; +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ +int A\u202cY; +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ +int A\u202CY2; +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ + +int d\u202ae\u202cf; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +int d\u202Ae\u202cf2; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +int d\u202be\u202cf; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ +int d\u202Be\u202cf2; +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ +int d\u202de\u202cf; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ +int d\u202De\u202cf2; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ +int d\u202ee\u202cf; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ +int d\u202Ee\u202cf2; +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ +int d\u2066e\u2069f; +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ +int d\u2067e\u2069f; +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ +int d\u2068e\u2069f; +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ +int X\u2069; +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-5.c b/gcc/testsuite/c-c++-common/Wbidirectional-5.c new file mode 100644 index 00000000000..45d3402c941 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-5.c @@ -0,0 +1,165 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=unpaired -Wno-multichar -Wno-overflow" } */ +/* Test all bidi chars in various contexts (identifiers, comments, + string literals, character constants), both UCN and UTF-8. The bidi + chars here are properly terminated, except for the character constants. */ + +/* a b c LRE‪ 1 2 3 PDF‬ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c RLE‫ 1 2 3 PDF‬ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c LRO‭ 1 2 3 PDF‬ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c RLO‮ 1 2 3 PDF‬ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + +/* Same but C++ comments instead. */ +// a b c LRE‪ 1 2 3 PDF‬ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c RLE‫ 1 2 3 PDF‬ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c LRO‭ 1 2 3 PDF‬ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c RLO‮ 1 2 3 PDF‬ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c LRI⁦ 1 2 3 PDI⁩ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c RLI⁧ 1 2 3 PDI⁩ x y +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c FSI⁨ 1 2 3 PDI⁩ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + +/* Here we're closing an unopened context, warn when =any. */ +/* a b c PDI⁩ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +/* a b c PDF‬ x y z */ +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c PDI⁩ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +// a b c PDF‬ x y z +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + +void +g1 () +{ + const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s8 = "a b c PDI⁩ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s9 = "a b c PDF‬ x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +} + +void +g2 () +{ + const char c1 = '\u202a'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c2 = '\u202A'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c3 = '\u202b'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c4 = '\u202B'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c5 = '\u202d'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c6 = '\u202D'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c7 = '\u202e'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c8 = '\u202E'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c9 = '\u2066'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c10 = '\u2067'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char c11 = '\u2068'; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +} + +int a‪b‬c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int a‫b‬c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int a‭b‬c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int a‮b‬c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int a⁦b⁩c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int a⁧b⁩c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int a⁨b⁩c; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int A‬X; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int A\u202cY; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int A\u202CY2; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + +int d\u202ae\u202cf; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202Ae\u202cf2; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202be\u202cf; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202Be\u202cf2; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202de\u202cf; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202De\u202cf2; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202ee\u202cf; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u202Ee\u202cf2; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u2066e\u2069f; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u2067e\u2069f; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int d\u2068e\u2069f; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +int X\u2069; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-6.c b/gcc/testsuite/c-c++-common/Wbidirectional-6.c new file mode 100644 index 00000000000..1be017f828d --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-6.c @@ -0,0 +1,154 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=unpaired" } */ +/* Test nesting of bidi chars in various contexts. */ + +/* Terminated by the wrong char: */ +/* a b c LRE‪ 1 2 3 PDI⁩ x y z */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* a b c RLE‫ 1 2 3 PDI⁩ x y z*/ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* a b c LRO‭ 1 2 3 PDI⁩ x y z */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* a b c RLO‮ 1 2 3 PDI⁩ x y z */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* a b c LRI⁦ 1 2 3 PDF‬ x y z */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* a b c RLI⁧ 1 2 3 PDF‬ x y z */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* a b c FSI⁨ 1 2 3 PDF‬ x y z*/ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + +/* LRE‪ PDF‬ */ +/* LRE‪ LRE‪ PDF‬ PDF‬ */ +/* PDF‬ LRE‪ PDF‬ */ +/* LRE‪ PDF‬ LRE‪ PDF‬ */ +/* LRE‪ LRE‪ PDF‬ */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* PDF‬ LRE‪ */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + +// a b c LRE‪ 1 2 3 PDI⁩ x y z +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// a b c RLE‫ 1 2 3 PDI⁩ x y z*/ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// a b c LRO‭ 1 2 3 PDI⁩ x y z +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// a b c RLO‮ 1 2 3 PDI⁩ x y z +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// a b c LRI⁦ 1 2 3 PDF‬ x y z +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// a b c RLI⁧ 1 2 3 PDF‬ x y z +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// a b c FSI⁨ 1 2 3 PDF‬ x y z +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + +// LRE‪ PDF‬ +// LRE‪ LRE‪ PDF‬ PDF‬ +// PDF‬ LRE‪ PDF‬ +// LRE‪ PDF‬ LRE‪ PDF‬ +// LRE‪ LRE‪ PDF‬ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +// PDF‬ LRE‪ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + +void +g1 () +{ + const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y "; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\ + "; +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ + const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s15 = "PDF‬ LRE‪"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s16 = "PDF\u202c LRE\u202a"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s17 = "LRE‪ PDF‬"; + const char *s18 = "LRE\u202a PDF\u202c"; + const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬"; + const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; + const char *s21 = "PDF‬ LRE‪ PDF‬"; + const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; + const char *s23 = "LRE‪ LRE‪ PDF‬"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s25 = "PDF‬ LRE‪"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s26 = "PDF\u202c LRE\u202a"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s27 = "PDF‬ LRE\u202a"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s28 = "PDF\u202c LRE‪"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +} + +int aLRE‪bPDI⁩; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int A\u202aB\u2069C; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aRLE‫bPDI⁩; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a\u202bB\u2069c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aLRO‭bPDI⁩; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a\u202db\u2069c2; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aRLO‮bPDI⁩; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a\u202eb\u2069; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aLRI⁦bPDF‬; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a\u2066b\u202c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aRLI⁧bPDF‬c +; +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ +int a\u2067b\u202c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aFSI⁨bPDF‬; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a\u2068b\u202c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aFSI⁨bPD\u202C; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aFSI\u2068bPDF‬_; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int aLRE‪bPDF‬b; +int A\u202aB\u202c; +int a_LRE‪_LRE‪_b_PDF‬_PDF‬; +int A\u202aA\u202aB\u202cB\u202c; +int aPDF‬bLREadPDF‬; +int a_\u202C_\u202a_\u202c; +int a_LRE‪_b_PDF‬_c_LRE‪_PDF‬; +int a_\u202a_\u202c_\u202a_\u202c_; +int a_LRE‪_b_PDF‬_c_LRE‪; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a_\u202a_\u202c_\u202a_; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-7.c b/gcc/testsuite/c-c++-common/Wbidirectional-7.c new file mode 100644 index 00000000000..f0f7b3ca14a --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-7.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=any" } */ +/* Test we ignore UCNs in comments. */ + +// a b c \u202a 1 2 3 +// a b c \u202A 1 2 3 +/* a b c \u202a 1 2 3 */ +/* a b c \u202A 1 2 3 */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-8.c b/gcc/testsuite/c-c++-common/Wbidirectional-8.c new file mode 100644 index 00000000000..c7d02193131 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-8.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=any" } */ +/* Test \u vs \U. */ + +int a_\u202A; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +int a_\u202a_2; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +int a_\U0000202A_3; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +int a_\U0000202a_4; +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-9.c b/gcc/testsuite/c-c++-common/Wbidirectional-9.c new file mode 100644 index 00000000000..d029209babb --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidirectional-9.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-Wbidirectional=unpaired" } */ +/* Test that we properly separate bidi contexts (comment/identifier/character + constant/string literal). */ + +/* LRE ->‪<- */ int pdf_\u202c_1; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* RLE ->‫<- */ int pdf_\u202c_2; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* LRO ->‭<- */ int pdf_\u202c_3; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* RLO ->‮<- */ int pdf_\u202c_4; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* LRI ->⁦<-*/ int pdi_\u2069_1; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* RLI ->⁧<- */ int pdi_\u2069_12; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* FSI ->⁨<- */ int pdi_\u2069_3; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + +const char *s1 = "LRE\u202a"; /* PDF ->‬<- */ +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +/* LRE ->‪<- */ const char *s2 = "PDF\u202c"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +const char *s3 = "LRE\u202a"; int pdf_\u202c_5; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int lre_\u202a; const char *s4 = "PDF\u202c"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 6e2fcb6b1f2..e48d13c4ee1 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -308,6 +308,17 @@ enum cpp_normalize_level { normalized_none }; +/* The possible bidirectional characters checking levels, from least + restrictive to most. */ +enum cpp_bidirectional_level { + /* No checking. */ + bidirectional_none, + /* Only detect unpaired uses of bidirectional characters. */ + bidirectional_unpaired, + /* Detect any use of bidirectional characters. */ + bidirectional_any +}; + /* This structure is nested inside struct cpp_reader, and carries all the options visible to the command line. */ struct cpp_options @@ -518,6 +529,10 @@ struct cpp_options /* True if warn about differences between C++98 and C++11. */ bool cpp_warn_cxx11_compat; + /* Nonzero of bidirectional characters checking is on. See enum + cpp_bidirectional_level. */ + unsigned char cpp_warn_bidirectional; + /* Dependency generation. */ struct { @@ -616,7 +631,8 @@ enum cpp_warning_reason { CPP_W_C90_C99_COMPAT, CPP_W_C11_C2X_COMPAT, CPP_W_CXX11_COMPAT, - CPP_W_EXPANSION_TO_DEFINED + CPP_W_EXPANSION_TO_DEFINED, + CPP_W_BIDIRECTIONAL }; /* Callback for header lookup for HEADER, which is the name of a diff --git a/libcpp/init.c b/libcpp/init.c index 5a424e23553..f9a8f5f088f 100644 --- a/libcpp/init.c +++ b/libcpp/init.c @@ -223,6 +223,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, = ENABLE_CANONICAL_SYSTEM_HEADERS; CPP_OPTION (pfile, ext_numeric_literals) = 1; CPP_OPTION (pfile, warn_date_time) = 0; + CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; /* Default CPP arithmetic to something sensible for the host for the benefit of dumb users like fix-header. */ diff --git a/libcpp/lex.c b/libcpp/lex.c index 8e3ef096bbe..d9c39a4105f 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1164,6 +1164,284 @@ _cpp_process_line_notes (cpp_reader *pfi } } +namespace bidi { + enum kind { + NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI + }; + + /* All the UTF-8 encodings of bidi characters start with E2. */ + const uchar utf8_start = 0xe2; + + /* A vector holding currently open bidi contexts. We use a char for + each context, its LSB is 1 if it represents a PDF context, 0 if it + represents a PDI context. The next bit is 1 if this context was open + by a bidi character written as a UCN, and 0 when it was UTF-8. */ + semi_embedded_vec vec; + + /* Close the whole comment/identifier/string literal/character constant + context. */ + void on_close () + { + vec.truncate (0); + } + + /* Pop the last element in the vector. */ + void pop () + { + unsigned int len = vec.count (); + gcc_checking_assert (len > 0); + vec.truncate (len - 1); + } + + /* Return which context is currently opened. */ + kind current_ctx () + { + unsigned int len = vec.count (); + if (len == 0) + return NONE; + return (vec[len - 1] & 1) ? PDF : PDI; + } + + /* Return true if the current context comes from a UCN origin, that is, + the bidi char which started this bidi context was written as a UCN. */ + bool current_ctx_ucn_p () + { + unsigned int len = vec.count (); + gcc_checking_assert (len > 0); + return (vec[len - 1] >> 1) & 1; + } + + /* We've read a bidi char, update the current vector as necessary. */ + void on_char (kind k, bool ucn_p) + { + switch (k) + { + case LRE: + case RLE: + case LRO: + case RLO: + vec.push (ucn_p ? 3u : 1u); + break; + case LRI: + case RLI: + case FSI: + vec.push (ucn_p ? 2u : 0u); + break; + case PDF: + if (current_ctx () == PDF) + pop (); + break; + case PDI: + if (current_ctx () == PDI) + pop (); + break; + [[likely]] case NONE: + break; + default: + abort (); + } + } + + /* Return a descriptive string for K. */ + const char *to_str (kind k) + { + switch (k) + { + case LRE: + return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; + case RLE: + return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; + case LRO: + return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; + case RLO: + return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; + case LRI: + return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; + case RLI: + return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; + case FSI: + return "U+2068 (FIRST STRONG ISOLATE)"; + case PDF: + return "U+202C (POP DIRECTIONAL FORMATTING)"; + case PDI: + return "U+2069 (POP DIRECTIONAL ISOLATE)"; + default: + abort (); + } + } +} + +/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ + +static bidi::kind +get_bidi_utf8 (const unsigned char *const p) +{ + gcc_checking_assert (p[0] == bidi::utf8_start); + + if (p[1] == 0x80) + switch (p[2]) + { + case 0xaa: + return bidi::LRE; + case 0xab: + return bidi::RLE; + case 0xac: + return bidi::PDF; + case 0xad: + return bidi::LRO; + case 0xae: + return bidi::RLO; + default: + break; + } + else if (p[1] == 0x81) + switch (p[2]) + { + case 0xa6: + return bidi::LRI; + case 0xa7: + return bidi::RLI; + case 0xa8: + return bidi::FSI; + case 0xa9: + return bidi::PDI; + default: + break; + } + + return bidi::NONE; +} + +/* Parse a UCN where P points just past \u or \U and return its bidi code. */ + +static bidi::kind +get_bidi_ucn (const unsigned char *p, bool is_U) +{ + /* 6.4.3 Universal Character Names + \u hex-quad + \U hex-quad hex-quad + where \unnnn means \U0000nnnn. */ + + if (is_U) + { + if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') + return bidi::NONE; + /* Skip 4B so we can treat \u and \U the same below. */ + p += 4; + } + + /* All code points we are looking for start with 20xx. */ + if (p[0] != '2' || p[1] != '0') + return bidi::NONE; + else if (p[2] == '2') + switch (p[3]) + { + case 'a': + case 'A': + return bidi::LRE; + case 'b': + case 'B': + return bidi::RLE; + case 'c': + case 'C': + return bidi::PDF; + case 'd': + case 'D': + return bidi::LRO; + case 'e': + case 'E': + return bidi::RLO; + default: + break; + } + else if (p[2] == '6') + switch (p[3]) + { + case '6': + return bidi::LRI; + case '7': + return bidi::RLI; + case '8': + return bidi::FSI; + case '9': + return bidi::PDI; + default: + break; + } + + return bidi::NONE; +} + +/* We're closing a bidi context, that is, we've encountered a newline, + are closing a C-style comment, or are at the end of a string literal, + character constant, or identifier. Warn if this context was not + properly terminated by a PDI or PDF. P points to the last character + in this context. */ + +static void +maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) +{ + if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired + && bidi::vec.count () > 0) + { + const location_t loc + = linemap_position_for_column (pfile->line_table, + CPP_BUF_COLUMN (pfile->buffer, p)); + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, + "unpaired UTF-8 bidirectional character " + "detected"); + } + /* We're done with this context. */ + bidi::on_close (); +} + +/* We're at the beginning or in the middle of an identifier/comment/string + literal/character constant. Warn if we've encountered a bidi character. + KIND says which bidi character it was; P points to it in the character + stream. UCN_P is true iff this bidi character was written as a UCN. */ + +static void +maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, + bool ucn_p) +{ + if (__builtin_expect (kind == bidi::NONE, 1)) + return; + + const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); + + if (warn_bidi != bidirectional_none) + { + const location_t loc + = linemap_position_for_column (pfile->line_table, + CPP_BUF_COLUMN (pfile->buffer, p)); + /* It seems excessive to warn about a PDI/PDF that is closing + an opened context because we've already warned about the + opening character. Except warn when we have a UCN x UTF-8 + mismatch. */ + if (kind == bidi::current_ctx ()) + { + if (warn_bidi == bidirectional_unpaired + && bidi::current_ctx_ucn_p () != ucn_p) + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, + "UTF-8 vs UCN mismatch when closing " + "a context by \"%s\"", bidi::to_str (kind)); + } + else if (warn_bidi == bidirectional_any) + { + if (kind == bidi::PDF || kind == bidi::PDI) + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, + "\"%s\" is closing an unopened context", + bidi::to_str (kind)); + else + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, + "found problematic Unicode character \"%s\"", + bidi::to_str (kind)); + } + } + /* We're done with this context. */ + bidi::on_char (kind, ucn_p); +} + /* Skip a C-style block comment. We find the end of the comment by seeing if an asterisk is before every '/' we encounter. Returns nonzero if comment terminated by EOF, zero otherwise. @@ -1175,7 +1453,8 @@ _cpp_skip_block_comment (cpp_reader *pfi cpp_buffer *buffer = pfile->buffer; const uchar *cur = buffer->cur; uchar c; - + const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) + != bidirectional_none); cur++; if (*cur == '/') cur++; @@ -1189,7 +1468,11 @@ _cpp_skip_block_comment (cpp_reader *pfi if (c == '/') { if (cur[-2] == '*') - break; + { + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur); + break; + } /* Warn about potential nested comments, but not if the '/' comes immediately before the true comment delimiter. @@ -1208,6 +1491,8 @@ _cpp_skip_block_comment (cpp_reader *pfi { unsigned int cols; buffer->cur = cur - 1; + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur); _cpp_process_line_notes (pfile, true); if (buffer->next_line >= buffer->rlimit) return true; @@ -1218,6 +1503,13 @@ _cpp_skip_block_comment (cpp_reader *pfi cur = buffer->cur; } + /* If this is a beginning of a UTF-8 encoding, it might be + a bidirectional character. */ + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) + { + bidi::kind kind = get_bidi_utf8 (cur - 1); + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); + } } buffer->cur = cur; @@ -1233,9 +1525,32 @@ skip_line_comment (cpp_reader *pfile) { cpp_buffer *buffer = pfile->buffer; location_t orig_line = pfile->line_table->highest_line; + const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) + != bidirectional_none); - while (*buffer->cur != '\n') - buffer->cur++; + if (!warn_bidi_p) + while (*buffer->cur != '\n') + buffer->cur++; + else + { + while (*buffer->cur != '\n' + && *buffer->cur != bidi::utf8_start) + buffer->cur++; + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) + { + while (*buffer->cur != '\n') + { + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) + { + bidi::kind kind = get_bidi_utf8 (buffer->cur); + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, + /*ucn_p=*/false); + } + buffer->cur++; + } + maybe_warn_bidi_on_close (pfile, buffer->cur); + } + } _cpp_process_line_notes (pfile, true); return orig_line != pfile->line_table->highest_line; @@ -1317,11 +1632,14 @@ static const cppchar_t utf8_signifier = /* Returns TRUE if the sequence starting at buffer->cur is valid in an identifier. FIRST is TRUE if this starts an identifier. */ + static bool forms_identifier_p (cpp_reader *pfile, int first, struct normalize_state *state) { cpp_buffer *buffer = pfile->buffer; + const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) + != bidirectional_none); if (*buffer->cur == '$') { @@ -1344,6 +1662,13 @@ forms_identifier_p (cpp_reader *pfile, i cppchar_t s; if (*buffer->cur >= utf8_signifier) { + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) + && warn_bidi_p) + { + bidi::kind kind = get_bidi_utf8 (buffer->cur); + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, + /*ucn_p=*/false); + } if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, state, &s)) return true; @@ -1352,6 +1677,13 @@ forms_identifier_p (cpp_reader *pfile, i && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) { buffer->cur += 2; + if (warn_bidi_p) + { + bidi::kind kind = get_bidi_ucn (buffer->cur, + buffer->cur[-1] == 'U'); + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, + /*ucn_p=*/true); + } if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, state, &s, NULL, NULL)) return true; @@ -1460,6 +1792,8 @@ lex_identifier (cpp_reader *pfile, const const uchar *cur; unsigned int len; unsigned int hash = HT_HASHSTEP (0, *base); + const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) + != bidirectional_none); cur = pfile->buffer->cur; if (! starts_ucn) @@ -1476,13 +1810,17 @@ lex_identifier (cpp_reader *pfile, const { /* Slower version for identifiers containing UCNs or extended chars (including $). */ - do { - while (ISIDNUM (*pfile->buffer->cur)) - { - NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur); - pfile->buffer->cur++; - } - } while (forms_identifier_p (pfile, false, nst)); + do + { + while (ISIDNUM (*pfile->buffer->cur)) + { + NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur); + pfile->buffer->cur++; + } + } + while (forms_identifier_p (pfile, false, nst)); + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); result = _cpp_interpret_identifier (pfile, base, pfile->buffer->cur - base); *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); @@ -1684,6 +2022,8 @@ lex_raw_string (cpp_reader *pfile, cpp_t _cpp_buff *first_buff = NULL, *last_buff = NULL; size_t raw_prefix_start; _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; + const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) + != bidirectional_none); type = (*base == 'L' ? CPP_WSTRING : *base == 'U' ? CPP_STRING32 : @@ -1920,8 +2260,16 @@ lex_raw_string (cpp_reader *pfile, cpp_t cur = base = pfile->buffer->cur; note = &pfile->buffer->notes[pfile->buffer->cur_note]; } + else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) + && warn_bidi_p) + maybe_warn_bidi_on_char (pfile, cur - 1, + get_bidi_utf8 (cur - 1), + /*ucn_p=*/false); } + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur); + if (CPP_OPTION (pfile, user_literals)) { /* If a string format macro, say from inttypes.h, is placed touching @@ -2016,15 +2364,28 @@ lex_string (cpp_reader *pfile, cpp_token else terminator = '>', type = CPP_HEADER_NAME; + const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) + != bidirectional_none); for (;;) { cppchar_t c = *cur++; /* In #include-style directives, terminators are not escapable. */ if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') - cur++; + { + if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) + { + bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); + } + cur++; + } else if (c == terminator) - break; + { + if (warn_bidi_p) + maybe_warn_bidi_on_close (pfile, cur - 1); + break; + } else if (c == '\n') { cur--; @@ -2041,6 +2402,11 @@ lex_string (cpp_reader *pfile, cpp_token } else if (c == '\0') saw_NUL = true; + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) + { + bidi::kind kind = get_bidi_utf8 (cur - 1); + maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); + } } if (saw_NUL && !pfile->state.skipping) base-commit: b0b1d8d5d90d7c499e2733e8d01ba8b73217f332 -- 2.31.1