From 3de30fba48743f27a02a79a85fbacb1952ee7e6d Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jan 11 2022 18:31:24 +0000 Subject: import gcc-11.2.1-6.1.el9 --- diff --git a/SOURCES/gcc11-Wbidi-chars.patch b/SOURCES/gcc11-Wbidi-chars.patch new file mode 100644 index 0000000..c2b4211 --- /dev/null +++ b/SOURCES/gcc11-Wbidi-chars.patch @@ -0,0 +1,1721 @@ +commit 51c500269bf53749b107807d84271385fad35628 +Author: Marek Polacek +Date: Wed Oct 6 14:33:59 2021 -0400 + + libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026] + + From a link below: + "An issue was discovered in the Bidirectional Algorithm in the Unicode + Specification through 14.0. It permits the visual reordering of + characters via control sequences, which can be used to craft source code + that renders different logic than the logical ordering of tokens + ingested by compilers and interpreters. Adversaries can leverage this to + encode source code for compilers accepting Unicode such that targeted + vulnerabilities are introduced invisibly to human reviewers." + + More info: + https://nvd.nist.gov/vuln/detail/CVE-2021-42574 + https://trojansource.codes/ + + This is not a compiler bug. However, to mitigate the problem, this patch + implements -Wbidi-chars=[none|unpaired|any] to warn about possibly + misleading Unicode bidirectional control characters the preprocessor may + encounter. + + The default is =unpaired, which warns about improperly terminated + bidirectional control characters; e.g. a LRE without its corresponding PDF. + The level =any warns about any use of bidirectional control characters. + + This patch handles both UCNs and UTF-8 characters. UCNs designating + bidi characters in identifiers are accepted since r204886. Then r217144 + enabled -fextended-identifiers by default. Extended characters in C/C++ + identifiers have been accepted since r275979. However, this patch still + warns about mixing UTF-8 and UCN bidi characters; there seems to be no + good reason to allow mixing them. + + We warn in different contexts: comments (both C and C++-style), string + literals, character constants, and identifiers. Expectedly, UCNs are ignored + in comments and raw string literals. The bidirectional control characters + can nest so this patch handles that as well. + + I have not included nor tested this at all with Fortran (which also has + string literals and line comments). + + Dave M. posted patches improving diagnostic involving Unicode characters. + This patch does not make use of this new infrastructure yet. + + PR preprocessor/103026 + + gcc/c-family/ChangeLog: + + * c.opt (Wbidi-chars, Wbidi-chars=): New option. + + gcc/ChangeLog: + + * doc/invoke.texi: Document -Wbidi-chars. + + libcpp/ChangeLog: + + * include/cpplib.h (enum cpp_bidirectional_level): New. + (struct cpp_options): Add cpp_warn_bidirectional. + (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. + * internal.h (struct cpp_reader): Add warn_bidi_p member + function. + * init.c (cpp_create_reader): Set cpp_warn_bidirectional. + * lex.c (bidi): New namespace. + (get_bidi_utf8): New function. + (get_bidi_ucn): Likewise. + (maybe_warn_bidi_on_close): Likewise. + (maybe_warn_bidi_on_char): Likewise. + (_cpp_skip_block_comment): Implement warning about bidirectional + control characters. + (skip_line_comment): Likewise. + (forms_identifier_p): Likewise. + (lex_identifier): Likewise. + (lex_string): Likewise. + (lex_raw_string): Likewise. + + gcc/testsuite/ChangeLog: + + * c-c++-common/Wbidi-chars-1.c: New test. + * c-c++-common/Wbidi-chars-2.c: New test. + * c-c++-common/Wbidi-chars-3.c: New test. + * c-c++-common/Wbidi-chars-4.c: New test. + * c-c++-common/Wbidi-chars-5.c: New test. + * c-c++-common/Wbidi-chars-6.c: New test. + * c-c++-common/Wbidi-chars-7.c: New test. + * c-c++-common/Wbidi-chars-8.c: New test. + * c-c++-common/Wbidi-chars-9.c: New test. + * c-c++-common/Wbidi-chars-10.c: New test. + * c-c++-common/Wbidi-chars-11.c: New test. + * c-c++-common/Wbidi-chars-12.c: New test. + * c-c++-common/Wbidi-chars-13.c: New test. + * c-c++-common/Wbidi-chars-14.c: New test. + * c-c++-common/Wbidi-chars-15.c: New test. + * c-c++-common/Wbidi-chars-16.c: New test. + * c-c++-common/Wbidi-chars-17.c: New test. + +diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt +index 8a4cd634f77..3976fc368db 100644 +--- a/gcc/c-family/c.opt ++++ b/gcc/c-family/c.opt +@@ -374,6 +374,30 @@ Wbad-function-cast + C ObjC Var(warn_bad_function_cast) Warning + Warn about casting functions to incompatible types. + ++Wbidi-chars ++C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) ++; ++ ++Wbidi-chars= ++C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) ++-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. ++ ++; Required for these enum values. ++SourceInclude ++cpplib.h ++ ++Enum ++Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) ++ ++EnumValue ++Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) ++ ++EnumValue ++Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) ++ ++EnumValue ++Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) ++ + Wbool-compare + C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) + Warn about boolean expression compared with an integer value different from true/false. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 6070288856c..a22758d18ee 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -325,7 +325,9 @@ Objective-C and Objective-C++ Dialects}. + -Warith-conversion @gol + -Warray-bounds -Warray-bounds=@var{n} @gol + -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol +--Wno-attribute-warning -Wbool-compare -Wbool-operation @gol ++-Wno-attribute-warning @gol ++-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol ++-Wbool-compare -Wbool-operation @gol + -Wno-builtin-declaration-mismatch @gol + -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol + -Wc11-c2x-compat @gol +@@ -7557,6 +7559,23 @@ Attributes considered include @code{alloc_align}, @code{alloc_size}, + This is the default. You can disable these warnings with either + @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. + ++@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} ++@opindex Wbidi-chars= ++@opindex Wbidi-chars ++@opindex Wno-bidi-chars ++Warn about possibly misleading UTF-8 bidirectional control characters in ++comments, string literals, character constants, and identifiers. Such ++characters can change left-to-right writing direction into right-to-left ++(and vice versa), which can cause confusion between the logical order and ++visual order. This may be dangerous; for instance, it may seem that a piece ++of code is not commented out, whereas it in fact is. ++ ++There are three levels of warning supported by GCC@. The default is ++@option{-Wbidi-chars=unpaired}, which warns about improperly terminated ++bidi contexts. @option{-Wbidi-chars=none} turns the warning off. ++@option{-Wbidi-chars=any} warns about any use of bidirectional control ++characters. ++ + @item -Wbool-compare + @opindex Wno-bool-compare + @opindex Wbool-compare +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c +new file mode 100644 +index 00000000000..34f5ac19271 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c +@@ -0,0 +1,12 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++ ++int main() { ++ int isAdmin = 0; ++ /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */ ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ __builtin_printf("You are an admin.\n"); ++ /* end admins only ‮ { ⁦*/ ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c +new file mode 100644 +index 00000000000..3f851b69e65 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c +@@ -0,0 +1,27 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* More nesting testing. */ ++ ++/* RLE‫ LRI⁦ PDF‬ PDI⁩*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int LRE_\u202a_PDF_\u202c; ++int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; ++int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; ++int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; ++int FSI_\u2068; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int FSI_\u2068_PDI_\u2069; ++int FSI_\u2068_FSI_\u2068_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c +new file mode 100644 +index 00000000000..270ce2368a9 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c +@@ -0,0 +1,13 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test that we warn when mixing UCN and UTF-8. */ ++ ++int LRE_‪_PDF_\u202c; ++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ ++int LRE_\u202a_PDF_‬_; ++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ ++const char *s1 = "LRE_‪_PDF_\u202c"; ++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ ++const char *s2 = "LRE_\u202a_PDF_‬"; ++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c +new file mode 100644 +index 00000000000..b07eec1da91 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c +@@ -0,0 +1,19 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile { target { c || c++11 } } } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test raw strings. */ ++ ++const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z"; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++const char *s8 = R"(a b c PDI⁩ x y )z"; ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++const char *s9 = R"(a b c PDF‬ x y z)"; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c +new file mode 100644 +index 00000000000..b2dd9fde752 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c +@@ -0,0 +1,17 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile { target { c || c++11 } } } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test raw strings. */ ++ ++const char *s1 = R"(a b c LRE‪ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s2 = R"(a b c RLE‫ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s3 = R"(a b c LRO‭ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s4 = R"(a b c FSI⁨ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s5 = R"(a b c LRI⁦ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s6 = R"(a b c RLI⁧ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c +new file mode 100644 +index 00000000000..ba5f75d9553 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c +@@ -0,0 +1,38 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, ++ or RLOs. */ ++ ++/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩ ++// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩ ++// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩ ++// FSI_⁨_RLO_‮_PDI_⁩ ++// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; ++int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int PDI_\u2069; ++int LRI_\u2066_PDI_\u2069; ++int RLI_\u2067_PDI_\u2069; ++int LRE_\u202a_LRI_\u2066_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLO_\u202e_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_PDI_\u2069_RLI_\u2067; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int FSI_\u2068_PDF_\u202c_PDI_\u2069; ++int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c +new file mode 100644 +index 00000000000..a0ce8ff5e2c +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c +@@ -0,0 +1,59 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test unpaired bidi control chars in multiline comments. */ ++ ++/* ++ * LRE‪ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * RLE‫ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * LRO‭ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * RLO‮ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * LRI⁦ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * RLI⁧ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * FSI⁨ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* LRE‪ ++ PDF‬ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* FSI⁨ ++ PDI⁩ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++ ++/* LRE<‪> ++ * ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */ ++ ++/* ++ * LRE<‪> ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++ ++/* ++ * ++ * LRE<‪> */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* RLI<⁧> */ /* PDI<⁩> */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRE<‪> */ /* PDF<‬> */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c +new file mode 100644 +index 00000000000..baa0159861c +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c +@@ -0,0 +1,26 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test LTR/RTL chars. */ ++ ++/* LTR<‎> */ ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++// LTR<‎> ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++/* RTL<‏> */ ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++// RTL<‏> ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++ ++const char *s1 = "LTR<‎>"; ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++const char *s2 = "LTR\u200e"; ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++const char *s3 = "LTR\u200E"; ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++const char *s4 = "RTL<‏>"; ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++const char *s5 = "RTL\u200f"; ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++const char *s6 = "RTL\u200F"; ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c +new file mode 100644 +index 00000000000..07cb4321f96 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c +@@ -0,0 +1,30 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test LTR/RTL chars. */ ++ ++/* LTR<‎> */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// LTR<‎> ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* RTL<‏> */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// RTL<‏> ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int ltr_\u200e; ++/* { dg-error "universal character " "" { target *-*-* } .-1 } */ ++int rtl_\u200f; ++/* { dg-error "universal character " "" { target *-*-* } .-1 } */ ++ ++const char *s1 = "LTR<‎>"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s2 = "LTR\u200e"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s3 = "LTR\u200E"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s4 = "RTL<‏>"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s5 = "RTL\u200f"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s6 = "RTL\u200F"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c +new file mode 100644 +index 00000000000..2340374f276 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c +@@ -0,0 +1,9 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++ ++int main() { ++ /* Say hello; newline⁧/*/ return 0 ; ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ __builtin_printf("Hello world.\n"); ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c +new file mode 100644 +index 00000000000..9dc7edb6e64 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c +@@ -0,0 +1,11 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++ ++int main() { ++ const char* access_level = "user"; ++ if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) { ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ __builtin_printf("You are an admin.\n"); ++ } ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c +new file mode 100644 +index 00000000000..639e5c62e88 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c +@@ -0,0 +1,188 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ ++/* Test all bidi chars in various contexts (identifiers, comments, ++ string literals, character constants), both UCN and UTF-8. The bidi ++ chars here are properly terminated, except for the character constants. */ ++ ++/* a b c LRE‪ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++/* a b c RLE‫ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++/* a b c LRO‭ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++/* a b c RLO‮ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++ ++/* Same but C++ comments instead. */ ++// a b c LRE‪ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++// a b c RLE‫ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++// a b c LRO‭ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++// a b c RLO‮ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++// a b c LRI⁦ 1 2 3 PDI⁩ x y z ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++// a b c RLI⁧ 1 2 3 PDI⁩ x y ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++// a b c FSI⁨ 1 2 3 PDI⁩ x y z ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++ ++/* Here we're closing an unopened context, warn when =any. */ ++/* a b c PDI⁩ x y z */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++/* a b c PDF‬ x y z */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++// a b c PDI⁩ x y z ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++// a b c PDF‬ x y z ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++/* Multiline comments. */ ++/* a b c PDI⁩ x y z ++ */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ ++/* a b c PDF‬ x y z ++ */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z ++ */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDF‬ x y z ++ */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++/* first ++ a b c PDF‬ x y z */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++void ++g1 () ++{ ++ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++ const char *s8 = "a b c PDI⁩ x y z"; ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++ const char *s9 = "a b c PDF‬ x y z"; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++} ++ ++void ++g2 () ++{ ++ const char c1 = '\u202a'; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char c2 = '\u202A'; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char c3 = '\u202b'; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char c4 = '\u202B'; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char c5 = '\u202d'; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char c6 = '\u202D'; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char c7 = '\u202e'; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char c8 = '\u202E'; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char c9 = '\u2066'; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++ const char c10 = '\u2067'; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++ const char c11 = '\u2068'; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++} ++ ++int a‪b‬c; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a‫b‬c; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++int a‭b‬c; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++int a‮b‬c; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++int a⁦b⁩c; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++int a⁧b⁩c; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++int a⁨b⁩c; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++int A‬X; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++int A\u202cY; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++int A\u202CY2; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++int d\u202ae\u202cf; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int d\u202Ae\u202cf2; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int d\u202be\u202cf; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++int d\u202Be\u202cf2; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++int d\u202de\u202cf; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++int d\u202De\u202cf2; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++int d\u202ee\u202cf; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++int d\u202Ee\u202cf2; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++int d\u2066e\u2069f; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++int d\u2067e\u2069f; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++int d\u2068e\u2069f; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++int X\u2069; ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c +new file mode 100644 +index 00000000000..68cb053144b +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c +@@ -0,0 +1,188 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ ++/* Test all bidi chars in various contexts (identifiers, comments, ++ string literals, character constants), both UCN and UTF-8. The bidi ++ chars here are properly terminated, except for the character constants. */ ++ ++/* a b c LRE‪ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLE‫ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRO‭ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLO‮ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* Same but C++ comments instead. */ ++// a b c LRE‪ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLE‫ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRO‭ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLO‮ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRI⁦ 1 2 3 PDI⁩ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLI⁧ 1 2 3 PDI⁩ x y ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c FSI⁨ 1 2 3 PDI⁩ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* Here we're closing an unopened context, warn when =any. */ ++/* a b c PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c PDI⁩ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* Multiline comments. */ ++/* a b c PDI⁩ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* a b c PDF‬ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDF‬ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* first ++ a b c PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++void ++g1 () ++{ ++ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s8 = "a b c PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s9 = "a b c PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++} ++ ++void ++g2 () ++{ ++ const char c1 = '\u202a'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c2 = '\u202A'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c3 = '\u202b'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c4 = '\u202B'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c5 = '\u202d'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c6 = '\u202D'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c7 = '\u202e'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c8 = '\u202E'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c9 = '\u2066'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c10 = '\u2067'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c11 = '\u2068'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++} ++ ++int a‪b‬c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int a‫b‬c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int a‭b‬c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int a‮b‬c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int a⁦b⁩c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int a⁧b⁩c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int a⁨b⁩c; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int A‬X; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int A\u202cY; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int A\u202CY2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++int d\u202ae\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202Ae\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202be\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202Be\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202de\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202De\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202ee\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202Ee\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u2066e\u2069f; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u2067e\u2069f; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u2068e\u2069f; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int X\u2069; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c +new file mode 100644 +index 00000000000..0ce6fff2dee +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c +@@ -0,0 +1,155 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test nesting of bidi chars in various contexts. */ ++ ++/* Terminated by the wrong char: */ ++/* a b c LRE‪ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLE‫ 1 2 3 PDI⁩ x y z*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRO‭ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLO‮ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRI⁦ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLI⁧ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c FSI⁨ 1 2 3 PDF‬ x y z*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* LRE‪ PDF‬ */ ++/* LRE‪ LRE‪ PDF‬ PDF‬ */ ++/* PDF‬ LRE‪ PDF‬ */ ++/* LRE‪ PDF‬ LRE‪ PDF‬ */ ++/* LRE‪ LRE‪ PDF‬ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* PDF‬ LRE‪ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++// a b c LRE‪ 1 2 3 PDI⁩ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLE‫ 1 2 3 PDI⁩ x y z*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRO‭ 1 2 3 PDI⁩ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLO‮ 1 2 3 PDI⁩ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRI⁦ 1 2 3 PDF‬ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLI⁧ 1 2 3 PDF‬ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c FSI⁨ 1 2 3 PDF‬ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++// LRE‪ PDF‬ ++// LRE‪ LRE‪ PDF‬ PDF‬ ++// PDF‬ LRE‪ PDF‬ ++// LRE‪ PDF‬ LRE‪ PDF‬ ++// LRE‪ LRE‪ PDF‬ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// PDF‬ LRE‪ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++void ++g1 () ++{ ++ const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y "; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\ ++ "; ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s15 = "PDF‬ LRE‪"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s16 = "PDF\u202c LRE\u202a"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s17 = "LRE‪ PDF‬"; ++ const char *s18 = "LRE\u202a PDF\u202c"; ++ const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬"; ++ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; ++ const char *s21 = "PDF‬ LRE‪ PDF‬"; ++ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; ++ const char *s23 = "LRE‪ LRE‪ PDF‬"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s25 = "PDF‬ LRE‪"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s26 = "PDF\u202c LRE\u202a"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s27 = "PDF‬ LRE\u202a"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s28 = "PDF\u202c LRE‪"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++} ++ ++int aLRE‪bPDI⁩; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int A\u202aB\u2069C; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aRLE‫bPDI⁩; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u202bB\u2069c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aLRO‭bPDI⁩; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u202db\u2069c2; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aRLO‮bPDI⁩; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u202eb\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aLRI⁦bPDF‬; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u2066b\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aRLI⁧bPDF‬c ++; ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++int a\u2067b\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aFSI⁨bPDF‬; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u2068b\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aFSI⁨bPD\u202C; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aFSI\u2068bPDF‬_; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int aLRE‪bPDF‬b; ++int A\u202aB\u202c; ++int a_LRE‪_LRE‪_b_PDF‬_PDF‬; ++int A\u202aA\u202aB\u202cB\u202c; ++int aPDF‬bLREadPDF‬; ++int a_\u202C_\u202a_\u202c; ++int a_LRE‪_b_PDF‬_c_LRE‪_PDF‬; ++int a_\u202a_\u202c_\u202a_\u202c_; ++int a_LRE‪_b_PDF‬_c_LRE‪; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a_\u202a_\u202c_\u202a_; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c +new file mode 100644 +index 00000000000..d012d420ec0 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c +@@ -0,0 +1,9 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test we ignore UCNs in comments. */ ++ ++// a b c \u202a 1 2 3 ++// a b c \u202A 1 2 3 ++/* a b c \u202a 1 2 3 */ ++/* a b c \u202A 1 2 3 */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c +new file mode 100644 +index 00000000000..4f54c5092ec +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c +@@ -0,0 +1,13 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test \u vs \U. */ ++ ++int a_\u202A; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a_\u202a_2; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a_\U0000202A_3; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a_\U0000202a_4; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c +new file mode 100644 +index 00000000000..e2af1b1ca97 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c +@@ -0,0 +1,29 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test that we properly separate bidi contexts (comment/identifier/character ++ constant/string literal). */ ++ ++/* LRE ->‪<- */ int pdf_\u202c_1; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* RLE ->‫<- */ int pdf_\u202c_2; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRO ->‭<- */ int pdf_\u202c_3; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* RLO ->‮<- */ int pdf_\u202c_4; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRI ->⁦<-*/ int pdi_\u2069_1; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* RLI ->⁧<- */ int pdi_\u2069_12; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* FSI ->⁨<- */ int pdi_\u2069_3; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++const char *s1 = "LRE\u202a"; /* PDF ->‬<- */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRE ->‪<- */ const char *s2 = "PDF\u202c"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s3 = "LRE\u202a"; int pdf_\u202c_5; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int lre_\u202a; const char *s4 = "PDF\u202c"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h +index 176f8c5bbce..112b9c24751 100644 +--- a/libcpp/include/cpplib.h ++++ b/libcpp/include/cpplib.h +@@ -319,6 +319,17 @@ enum cpp_main_search + CMS_system, /* Search the system INCLUDE path. */ + }; + ++/* The possible bidirectional control characters checking levels, from least ++ restrictive to most. */ ++enum cpp_bidirectional_level { ++ /* No checking. */ ++ bidirectional_none, ++ /* Only detect unpaired uses of bidirectional control characters. */ ++ bidirectional_unpaired, ++ /* Detect any use of bidirectional control characters. */ ++ bidirectional_any ++}; ++ + /* This structure is nested inside struct cpp_reader, and + carries all the options visible to the command line. */ + struct cpp_options +@@ -539,6 +550,10 @@ struct cpp_options + /* True if warn about differences between C++98 and C++11. */ + bool cpp_warn_cxx11_compat; + ++ /* Nonzero if bidirectional control characters checking is on. See enum ++ cpp_bidirectional_level. */ ++ unsigned char cpp_warn_bidirectional; ++ + /* Dependency generation. */ + struct + { +@@ -643,7 +658,8 @@ enum cpp_warning_reason { + CPP_W_C90_C99_COMPAT, + CPP_W_C11_C2X_COMPAT, + CPP_W_CXX11_COMPAT, +- CPP_W_EXPANSION_TO_DEFINED ++ CPP_W_EXPANSION_TO_DEFINED, ++ CPP_W_BIDIRECTIONAL + }; + + /* Callback for header lookup for HEADER, which is the name of a +diff --git a/libcpp/init.c b/libcpp/init.c +index 5a424e23553..f9a8f5f088f 100644 +--- a/libcpp/init.c ++++ b/libcpp/init.c +@@ -223,6 +223,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, + = ENABLE_CANONICAL_SYSTEM_HEADERS; + CPP_OPTION (pfile, ext_numeric_literals) = 1; + CPP_OPTION (pfile, warn_date_time) = 0; ++ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; + + /* Default CPP arithmetic to something sensible for the host for the + benefit of dumb users like fix-header. */ +diff --git a/libcpp/internal.h b/libcpp/internal.h +index 8577cab6c83..0ce0246c5a2 100644 +--- a/libcpp/internal.h ++++ b/libcpp/internal.h +@@ -597,6 +597,13 @@ struct cpp_reader + /* Location identifying the main source file -- intended to be line + zero of said file. */ + location_t main_loc; ++ ++ /* Returns true iff we should warn about UTF-8 bidirectional control ++ characters. */ ++ bool warn_bidi_p () const ++ { ++ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none; ++ } + }; + + /* Character classes. Based on the more primitive macros in safe-ctype.h. +diff --git a/libcpp/lex.c b/libcpp/lex.c +index fa2253d41c3..6a4fbce6030 100644 +--- a/libcpp/lex.c ++++ b/libcpp/lex.c +@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment) + } + } + ++namespace bidi { ++ enum class kind { ++ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL ++ }; ++ ++ /* All the UTF-8 encodings of bidi characters start with E2. */ ++ constexpr uchar utf8_start = 0xe2; ++ ++ /* A vector holding currently open bidi contexts. We use a char for ++ each context, its LSB is 1 if it represents a PDF context, 0 if it ++ represents a PDI context. The next bit is 1 if this context was open ++ by a bidi character written as a UCN, and 0 when it was UTF-8. */ ++ semi_embedded_vec vec; ++ ++ /* Close the whole comment/identifier/string literal/character constant ++ context. */ ++ void on_close () ++ { ++ vec.truncate (0); ++ } ++ ++ /* Pop the last element in the vector. */ ++ void pop () ++ { ++ unsigned int len = vec.count (); ++ gcc_checking_assert (len > 0); ++ vec.truncate (len - 1); ++ } ++ ++ /* Return the context of the Ith element. */ ++ kind ctx_at (unsigned int i) ++ { ++ return (vec[i] & 1) ? kind::PDF : kind::PDI; ++ } ++ ++ /* Return which context is currently opened. */ ++ kind current_ctx () ++ { ++ unsigned int len = vec.count (); ++ if (len == 0) ++ return kind::NONE; ++ return ctx_at (len - 1); ++ } ++ ++ /* Return true if the current context comes from a UCN origin, that is, ++ the bidi char which started this bidi context was written as a UCN. */ ++ bool current_ctx_ucn_p () ++ { ++ unsigned int len = vec.count (); ++ gcc_checking_assert (len > 0); ++ return (vec[len - 1] >> 1) & 1; ++ } ++ ++ /* We've read a bidi char, update the current vector as necessary. */ ++ void on_char (kind k, bool ucn_p) ++ { ++ switch (k) ++ { ++ case kind::LRE: ++ case kind::RLE: ++ case kind::LRO: ++ case kind::RLO: ++ vec.push (ucn_p ? 3u : 1u); ++ break; ++ case kind::LRI: ++ case kind::RLI: ++ case kind::FSI: ++ vec.push (ucn_p ? 2u : 0u); ++ break; ++ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO ++ whose scope has not yet been terminated. */ ++ case kind::PDF: ++ if (current_ctx () == kind::PDF) ++ pop (); ++ break; ++ /* PDI terminates the scope of the last LRI, RLI, or FSI whose ++ scope has not yet been terminated, as well as the scopes of ++ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not ++ yet been terminated. */ ++ case kind::PDI: ++ for (int i = vec.count () - 1; i >= 0; --i) ++ if (ctx_at (i) == kind::PDI) ++ { ++ vec.truncate (i); ++ break; ++ } ++ break; ++ case kind::LTR: ++ case kind::RTL: ++ /* These aren't popped by a PDF/PDI. */ ++ break; ++ [[likely]] case kind::NONE: ++ break; ++ default: ++ abort (); ++ } ++ } ++ ++ /* Return a descriptive string for K. */ ++ const char *to_str (kind k) ++ { ++ switch (k) ++ { ++ case kind::LRE: ++ return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; ++ case kind::RLE: ++ return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; ++ case kind::LRO: ++ return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; ++ case kind::RLO: ++ return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; ++ case kind::LRI: ++ return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; ++ case kind::RLI: ++ return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; ++ case kind::FSI: ++ return "U+2068 (FIRST STRONG ISOLATE)"; ++ case kind::PDF: ++ return "U+202C (POP DIRECTIONAL FORMATTING)"; ++ case kind::PDI: ++ return "U+2069 (POP DIRECTIONAL ISOLATE)"; ++ case kind::LTR: ++ return "U+200E (LEFT-TO-RIGHT MARK)"; ++ case kind::RTL: ++ return "U+200F (RIGHT-TO-LEFT MARK)"; ++ default: ++ abort (); ++ } ++ } ++} ++ ++/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ ++ ++static bidi::kind ++get_bidi_utf8 (const unsigned char *const p) ++{ ++ gcc_checking_assert (p[0] == bidi::utf8_start); ++ ++ if (p[1] == 0x80) ++ switch (p[2]) ++ { ++ case 0xaa: ++ return bidi::kind::LRE; ++ case 0xab: ++ return bidi::kind::RLE; ++ case 0xac: ++ return bidi::kind::PDF; ++ case 0xad: ++ return bidi::kind::LRO; ++ case 0xae: ++ return bidi::kind::RLO; ++ case 0x8e: ++ return bidi::kind::LTR; ++ case 0x8f: ++ return bidi::kind::RTL; ++ default: ++ break; ++ } ++ else if (p[1] == 0x81) ++ switch (p[2]) ++ { ++ case 0xa6: ++ return bidi::kind::LRI; ++ case 0xa7: ++ return bidi::kind::RLI; ++ case 0xa8: ++ return bidi::kind::FSI; ++ case 0xa9: ++ return bidi::kind::PDI; ++ default: ++ break; ++ } ++ ++ return bidi::kind::NONE; ++} ++ ++/* Parse a UCN where P points just past \u or \U and return its bidi code. */ ++ ++static bidi::kind ++get_bidi_ucn (const unsigned char *p, bool is_U) ++{ ++ /* 6.4.3 Universal Character Names ++ \u hex-quad ++ \U hex-quad hex-quad ++ where \unnnn means \U0000nnnn. */ ++ ++ if (is_U) ++ { ++ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') ++ return bidi::kind::NONE; ++ /* Skip 4B so we can treat \u and \U the same below. */ ++ p += 4; ++ } ++ ++ /* All code points we are looking for start with 20xx. */ ++ if (p[0] != '2' || p[1] != '0') ++ return bidi::kind::NONE; ++ else if (p[2] == '2') ++ switch (p[3]) ++ { ++ case 'a': ++ case 'A': ++ return bidi::kind::LRE; ++ case 'b': ++ case 'B': ++ return bidi::kind::RLE; ++ case 'c': ++ case 'C': ++ return bidi::kind::PDF; ++ case 'd': ++ case 'D': ++ return bidi::kind::LRO; ++ case 'e': ++ case 'E': ++ return bidi::kind::RLO; ++ default: ++ break; ++ } ++ else if (p[2] == '6') ++ switch (p[3]) ++ { ++ case '6': ++ return bidi::kind::LRI; ++ case '7': ++ return bidi::kind::RLI; ++ case '8': ++ return bidi::kind::FSI; ++ case '9': ++ return bidi::kind::PDI; ++ default: ++ break; ++ } ++ else if (p[2] == '0') ++ switch (p[3]) ++ { ++ case 'e': ++ case 'E': ++ return bidi::kind::LTR; ++ case 'f': ++ case 'F': ++ return bidi::kind::RTL; ++ default: ++ break; ++ } ++ ++ return bidi::kind::NONE; ++} ++ ++/* We're closing a bidi context, that is, we've encountered a newline, ++ are closing a C-style comment, or are at the end of a string literal, ++ character constant, or identifier. Warn if this context was not ++ properly terminated by a PDI or PDF. P points to the last character ++ in this context. */ ++ ++static void ++maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) ++{ ++ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired ++ && bidi::vec.count () > 0) ++ { ++ const location_t loc ++ = linemap_position_for_column (pfile->line_table, ++ CPP_BUF_COLUMN (pfile->buffer, p)); ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "unpaired UTF-8 bidirectional control character " ++ "detected"); ++ } ++ /* We're done with this context. */ ++ bidi::on_close (); ++} ++ ++/* We're at the beginning or in the middle of an identifier/comment/string ++ literal/character constant. Warn if we've encountered a bidi character. ++ KIND says which bidi character it was; P points to it in the character ++ stream. UCN_P is true iff this bidi character was written as a UCN. */ ++ ++static void ++maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, ++ bool ucn_p) ++{ ++ if (__builtin_expect (kind == bidi::kind::NONE, 1)) ++ return; ++ ++ const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); ++ ++ if (warn_bidi != bidirectional_none) ++ { ++ const location_t loc ++ = linemap_position_for_column (pfile->line_table, ++ CPP_BUF_COLUMN (pfile->buffer, p)); ++ /* It seems excessive to warn about a PDI/PDF that is closing ++ an opened context because we've already warned about the ++ opening character. Except warn when we have a UCN x UTF-8 ++ mismatch. */ ++ if (kind == bidi::current_ctx ()) ++ { ++ if (warn_bidi == bidirectional_unpaired ++ && bidi::current_ctx_ucn_p () != ucn_p) ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "UTF-8 vs UCN mismatch when closing " ++ "a context by \"%s\"", bidi::to_str (kind)); ++ } ++ else if (warn_bidi == bidirectional_any) ++ { ++ if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "\"%s\" is closing an unopened context", ++ bidi::to_str (kind)); ++ else ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "found problematic Unicode character \"%s\"", ++ bidi::to_str (kind)); ++ } ++ } ++ /* We're done with this context. */ ++ bidi::on_char (kind, ucn_p); ++} ++ + /* Skip a C-style block comment. We find the end of the comment by + seeing if an asterisk is before every '/' we encounter. Returns + nonzero if comment terminated by EOF, zero otherwise. +@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile) + cpp_buffer *buffer = pfile->buffer; + const uchar *cur = buffer->cur; + uchar c; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + cur++; + if (*cur == '/') +@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile) + if (c == '/') + { + if (cur[-2] == '*') +- break; ++ { ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur); ++ break; ++ } + + /* Warn about potential nested comments, but not if the '/' + comes immediately before the true comment delimiter. +@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile) + { + unsigned int cols; + buffer->cur = cur - 1; ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur); + _cpp_process_line_notes (pfile, true); + if (buffer->next_line >= buffer->rlimit) + return true; +@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile) + + cur = buffer->cur; + } ++ /* If this is a beginning of a UTF-8 encoding, it might be ++ a bidirectional control character. */ ++ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_utf8 (cur - 1); ++ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); ++ } + } + + buffer->cur = cur; +@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile) + { + cpp_buffer *buffer = pfile->buffer; + location_t orig_line = pfile->line_table->highest_line; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + +- while (*buffer->cur != '\n') +- buffer->cur++; ++ if (!warn_bidi_p) ++ while (*buffer->cur != '\n') ++ buffer->cur++; ++ else ++ { ++ while (*buffer->cur != '\n' ++ && *buffer->cur != bidi::utf8_start) ++ buffer->cur++; ++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) ++ { ++ while (*buffer->cur != '\n') ++ { ++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) ++ { ++ bidi::kind kind = get_bidi_utf8 (buffer->cur); ++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, ++ /*ucn_p=*/false); ++ } ++ buffer->cur++; ++ } ++ maybe_warn_bidi_on_close (pfile, buffer->cur); ++ } ++ } + + _cpp_process_line_notes (pfile, true); + return orig_line != pfile->line_table->highest_line; +@@ -1346,11 +1700,13 @@ static const cppchar_t utf8_signifier = 0xC0; + + /* Returns TRUE if the sequence starting at buffer->cur is valid in + an identifier. FIRST is TRUE if this starts an identifier. */ ++ + static bool + forms_identifier_p (cpp_reader *pfile, int first, + struct normalize_state *state) + { + cpp_buffer *buffer = pfile->buffer; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + if (*buffer->cur == '$') + { +@@ -1373,6 +1729,13 @@ forms_identifier_p (cpp_reader *pfile, int first, + cppchar_t s; + if (*buffer->cur >= utf8_signifier) + { ++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) ++ && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_utf8 (buffer->cur); ++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, ++ /*ucn_p=*/false); ++ } + if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, + state, &s)) + return true; +@@ -1381,6 +1744,13 @@ forms_identifier_p (cpp_reader *pfile, int first, + && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) + { + buffer->cur += 2; ++ if (warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_ucn (buffer->cur, ++ buffer->cur[-1] == 'U'); ++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, ++ /*ucn_p=*/true); ++ } + if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, + state, &s, NULL, NULL)) + return true; +@@ -1489,6 +1859,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, + const uchar *cur; + unsigned int len; + unsigned int hash = HT_HASHSTEP (0, *base); ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + cur = pfile->buffer->cur; + if (! starts_ucn) +@@ -1512,6 +1883,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, + pfile->buffer->cur++; + } + } while (forms_identifier_p (pfile, false, nst)); ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); + result = _cpp_interpret_identifier (pfile, base, + pfile->buffer->cur - base); + *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); +@@ -1758,6 +2131,7 @@ static void + lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) + { + const uchar *pos = base; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + /* 'tis a pity this information isn't passed down from the lexer's + initial categorization of the token. */ +@@ -1994,8 +2368,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) + pos = base = pfile->buffer->cur; + note = &pfile->buffer->notes[pfile->buffer->cur_note]; + } ++ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) ++ && warn_bidi_p) ++ maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), ++ /*ucn_p=*/false); + } + ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, pos); ++ + if (CPP_OPTION (pfile, user_literals)) + { + /* If a string format macro, say from inttypes.h, is placed touching +@@ -2090,15 +2471,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) + else + terminator = '>', type = CPP_HEADER_NAME; + ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + for (;;) + { + cppchar_t c = *cur++; + + /* In #include-style directives, terminators are not escapable. */ + if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') +- cur++; ++ { ++ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); ++ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); ++ } ++ cur++; ++ } + else if (c == terminator) +- break; ++ { ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur - 1); ++ break; ++ } + else if (c == '\n') + { + cur--; +@@ -2115,6 +2508,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) + } + else if (c == '\0') + saw_NUL = true; ++ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_utf8 (cur - 1); ++ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); ++ } + } + + if (saw_NUL && !pfile->state.skipping) diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec index 3c2964d..919bb56 100644 --- a/SPECS/gcc.spec +++ b/SPECS/gcc.spec @@ -118,7 +118,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: %{gcc_release}%{?dist} +Release: %{gcc_release}.1%{?dist} # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -269,6 +269,7 @@ Patch14: gcc11-libgcc-link.patch Patch15: gcc11-pr101786.patch Patch16: gcc11-stringify-__VA_OPT__.patch Patch17: gcc11-pr102642.patch +Patch18: gcc11-Wbidi-chars.patch Patch100: gcc11-fortran-fdec-duplicates.patch Patch101: gcc11-fortran-flogical-as-integer.patch @@ -795,6 +796,7 @@ to NVidia PTX capable devices if available. %patch15 -p0 -b .pr101786~ %patch16 -p0 -b .stringify-__VA_OPT__~ %patch17 -p0 -b .pr102642~ +%patch18 -p1 -b .bidi~ %if 0%{?rhel} >= 9 %patch100 -p1 -b .fortran-fdec-duplicates~ @@ -3143,6 +3145,9 @@ end %endif %changelog +* Mon Nov 29 2021 Marek Polacek 11.2.1-6.1 +- add -Wbidi-chars patch (#2008393) + * Wed Oct 27 2021 Marek Polacek 11.2.1-6 - update from releases/gcc-11-branch (#1996858) - build target shared libraries with -Wl,-z,relro,-z,now