diff --git a/SOURCES/gcc8-Wbidi-chars.patch b/SOURCES/gcc8-Wbidi-chars.patch new file mode 100644 index 0000000..988defe --- /dev/null +++ b/SOURCES/gcc8-Wbidi-chars.patch @@ -0,0 +1,1644 @@ +commit 51c500269bf53749b107807d84271385fad35628 +Author: Marek Polacek +Date: Wed Oct 6 14:33:59 2021 -0400 + + libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026] + + From a link below: + "An issue was discovered in the Bidirectional Algorithm in the Unicode + Specification through 14.0. It permits the visual reordering of + characters via control sequences, which can be used to craft source code + that renders different logic than the logical ordering of tokens + ingested by compilers and interpreters. Adversaries can leverage this to + encode source code for compilers accepting Unicode such that targeted + vulnerabilities are introduced invisibly to human reviewers." + + More info: + https://nvd.nist.gov/vuln/detail/CVE-2021-42574 + https://trojansource.codes/ + + This is not a compiler bug. However, to mitigate the problem, this patch + implements -Wbidi-chars=[none|unpaired|any] to warn about possibly + misleading Unicode bidirectional control characters the preprocessor may + encounter. + + The default is =unpaired, which warns about improperly terminated + bidirectional control characters; e.g. a LRE without its corresponding PDF. + The level =any warns about any use of bidirectional control characters. + + This patch handles both UCNs and UTF-8 characters. UCNs designating + bidi characters in identifiers are accepted since r204886. Then r217144 + enabled -fextended-identifiers by default. Extended characters in C/C++ + identifiers have been accepted since r275979. However, this patch still + warns about mixing UTF-8 and UCN bidi characters; there seems to be no + good reason to allow mixing them. + + We warn in different contexts: comments (both C and C++-style), string + literals, character constants, and identifiers. Expectedly, UCNs are ignored + in comments and raw string literals. The bidirectional control characters + can nest so this patch handles that as well. + + I have not included nor tested this at all with Fortran (which also has + string literals and line comments). + + Dave M. posted patches improving diagnostic involving Unicode characters. + This patch does not make use of this new infrastructure yet. + + PR preprocessor/103026 + + gcc/c-family/ChangeLog: + + * c.opt (Wbidi-chars, Wbidi-chars=): New option. + + gcc/ChangeLog: + + * doc/invoke.texi: Document -Wbidi-chars. + + libcpp/ChangeLog: + + * include/cpplib.h (enum cpp_bidirectional_level): New. + (struct cpp_options): Add cpp_warn_bidirectional. + (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. + * internal.h (struct cpp_reader): Add warn_bidi_p member + function. + * init.c (cpp_create_reader): Set cpp_warn_bidirectional. + * lex.c (bidi): New namespace. + (get_bidi_utf8): New function. + (get_bidi_ucn): Likewise. + (maybe_warn_bidi_on_close): Likewise. + (maybe_warn_bidi_on_char): Likewise. + (_cpp_skip_block_comment): Implement warning about bidirectional + control characters. + (skip_line_comment): Likewise. + (forms_identifier_p): Likewise. + (lex_identifier): Likewise. + (lex_string): Likewise. + (lex_raw_string): Likewise. + + gcc/testsuite/ChangeLog: + + * c-c++-common/Wbidi-chars-1.c: New test. + * c-c++-common/Wbidi-chars-2.c: New test. + * c-c++-common/Wbidi-chars-3.c: New test. + * c-c++-common/Wbidi-chars-4.c: New test. + * c-c++-common/Wbidi-chars-5.c: New test. + * c-c++-common/Wbidi-chars-6.c: New test. + * c-c++-common/Wbidi-chars-7.c: New test. + * c-c++-common/Wbidi-chars-8.c: New test. + * c-c++-common/Wbidi-chars-9.c: New test. + * c-c++-common/Wbidi-chars-10.c: New test. + * c-c++-common/Wbidi-chars-11.c: New test. + * c-c++-common/Wbidi-chars-12.c: New test. + * c-c++-common/Wbidi-chars-13.c: New test. + * c-c++-common/Wbidi-chars-14.c: New test. + * c-c++-common/Wbidi-chars-15.c: New test. + * c-c++-common/Wbidi-chars-16.c: New test. + * c-c++-common/Wbidi-chars-17.c: New test. + +diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt +index f591b39be5a..cf922812198 100644 +--- a/gcc/c-family/c.opt ++++ b/gcc/c-family/c.opt +@@ -334,6 +334,30 @@ Wbad-function-cast + C ObjC Var(warn_bad_function_cast) Warning + Warn about casting functions to incompatible types. + ++Wbidi-chars ++C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) ++; ++ ++Wbidi-chars= ++C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) ++-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. ++ ++; Required for these enum values. ++SourceInclude ++cpplib.h ++ ++Enum ++Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) ++ ++EnumValue ++Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) ++ ++EnumValue ++Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) ++ ++EnumValue ++Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) ++ + Wbool-compare + C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) + Warn about boolean expression compared with an integer value different from true/false. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 78ca7738df2..cc85c53aede 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -264,7 +264,8 @@ Objective-C and Objective-C++ Dialects}. + -Walloc-zero -Walloc-size-larger-than=@var{n} + -Walloca -Walloca-larger-than=@var{n} @gol + -Wno-aggressive-loop-optimizations -Warray-bounds -Warray-bounds=@var{n} @gol +--Wno-attributes -Wbool-compare -Wbool-operation @gol ++-Wno-attributes -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol ++-Wbool-compare -Wbool-operation @gol + -Wno-builtin-declaration-mismatch @gol + -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol + -Wc++-compat -Wc++11-compat -Wc++14-compat @gol +@@ -5606,6 +5607,23 @@ Warn about declarations using the @code{alias} and similar attributes whose + target is incompatible with the type of the alias. @xref{Function Attributes, + ,Declaring Attributes of Functions}. + ++@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} ++@opindex Wbidi-chars= ++@opindex Wbidi-chars ++@opindex Wno-bidi-chars ++Warn about possibly misleading UTF-8 bidirectional control characters in ++comments, string literals, character constants, and identifiers. Such ++characters can change left-to-right writing direction into right-to-left ++(and vice versa), which can cause confusion between the logical order and ++visual order. This may be dangerous; for instance, it may seem that a piece ++of code is not commented out, whereas it in fact is. ++ ++There are three levels of warning supported by GCC@. The default is ++@option{-Wbidi-chars=unpaired}, which warns about improperly terminated ++bidi contexts. @option{-Wbidi-chars=none} turns the warning off. ++@option{-Wbidi-chars=any} warns about any use of bidirectional control ++characters. ++ + @item -Wbool-compare + @opindex Wno-bool-compare + @opindex Wbool-compare +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c +new file mode 100644 +index 00000000000..34f5ac19271 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c +@@ -0,0 +1,12 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++ ++int main() { ++ int isAdmin = 0; ++ /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */ ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ __builtin_printf("You are an admin.\n"); ++ /* end admins only ‮ { ⁦*/ ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c +new file mode 100644 +index 00000000000..3f851b69e65 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c +@@ -0,0 +1,27 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* More nesting testing. */ ++ ++/* RLE‫ LRI⁦ PDF‬ PDI⁩*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int LRE_\u202a_PDF_\u202c; ++int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; ++int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; ++int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; ++int FSI_\u2068; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int FSI_\u2068_PDI_\u2069; ++int FSI_\u2068_FSI_\u2068_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c +new file mode 100644 +index 00000000000..44d044d82de +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c +@@ -0,0 +1,9 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test that we warn when mixing UCN and UTF-8. */ ++ ++const char *s1 = "LRE_‪_PDF_\u202c"; ++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ ++const char *s2 = "LRE_\u202a_PDF_‬"; ++/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c +new file mode 100644 +index 00000000000..b07eec1da91 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c +@@ -0,0 +1,19 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile { target { c || c++11 } } } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test raw strings. */ ++ ++const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z"; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++const char *s8 = R"(a b c PDI⁩ x y )z"; ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++const char *s9 = R"(a b c PDF‬ x y z)"; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c +new file mode 100644 +index 00000000000..b2dd9fde752 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c +@@ -0,0 +1,17 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile { target { c || c++11 } } } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test raw strings. */ ++ ++const char *s1 = R"(a b c LRE‪ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s2 = R"(a b c RLE‫ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s3 = R"(a b c LRO‭ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s4 = R"(a b c FSI⁨ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s5 = R"(a b c LRI⁦ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s6 = R"(a b c RLI⁧ 1 2 3)"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c +new file mode 100644 +index 00000000000..ba5f75d9553 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c +@@ -0,0 +1,38 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, ++ or RLOs. */ ++ ++/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩ ++// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩ ++// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩ ++// FSI_⁨_RLO_‮_PDI_⁩ ++// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; ++int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int PDI_\u2069; ++int LRI_\u2066_PDI_\u2069; ++int RLI_\u2067_PDI_\u2069; ++int LRE_\u202a_LRI_\u2066_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLO_\u202e_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int RLI_\u2067_PDI_\u2069_RLI_\u2067; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int FSI_\u2068_PDF_\u202c_PDI_\u2069; ++int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c +new file mode 100644 +index 00000000000..a0ce8ff5e2c +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c +@@ -0,0 +1,59 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test unpaired bidi control chars in multiline comments. */ ++ ++/* ++ * LRE‪ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * RLE‫ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * LRO‭ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * RLO‮ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * LRI⁦ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * RLI⁧ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* ++ * FSI⁨ end ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* LRE‪ ++ PDF‬ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++/* FSI⁨ ++ PDI⁩ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++ ++/* LRE<‪> ++ * ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */ ++ ++/* ++ * LRE<‪> ++ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++ ++/* ++ * ++ * LRE<‪> */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* RLI<⁧> */ /* PDI<⁩> */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRE<‪> */ /* PDF<‬> */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c +new file mode 100644 +index 00000000000..baa0159861c +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c +@@ -0,0 +1,26 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test LTR/RTL chars. */ ++ ++/* LTR<‎> */ ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++// LTR<‎> ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++/* RTL<‏> */ ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++// RTL<‏> ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++ ++const char *s1 = "LTR<‎>"; ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++const char *s2 = "LTR\u200e"; ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++const char *s3 = "LTR\u200E"; ++/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ ++const char *s4 = "RTL<‏>"; ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++const char *s5 = "RTL\u200f"; ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ ++const char *s6 = "RTL\u200F"; ++/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c +new file mode 100644 +index 00000000000..07cb4321f96 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c +@@ -0,0 +1,30 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test LTR/RTL chars. */ ++ ++/* LTR<‎> */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// LTR<‎> ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* RTL<‏> */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// RTL<‏> ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int ltr_\u200e; ++/* { dg-error "universal character " "" { target *-*-* } .-1 } */ ++int rtl_\u200f; ++/* { dg-error "universal character " "" { target *-*-* } .-1 } */ ++ ++const char *s1 = "LTR<‎>"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s2 = "LTR\u200e"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s3 = "LTR\u200E"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s4 = "RTL<‏>"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s5 = "RTL\u200f"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++const char *s6 = "RTL\u200F"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c +new file mode 100644 +index 00000000000..2340374f276 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c +@@ -0,0 +1,9 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++ ++int main() { ++ /* Say hello; newline⁧/*/ return 0 ; ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ __builtin_printf("Hello world.\n"); ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c +new file mode 100644 +index 00000000000..9dc7edb6e64 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c +@@ -0,0 +1,11 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++ ++int main() { ++ const char* access_level = "user"; ++ if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) { ++/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ ++ __builtin_printf("You are an admin.\n"); ++ } ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c +new file mode 100644 +index 00000000000..49f856b9bfe +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c +@@ -0,0 +1,172 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ ++/* Test all bidi chars in various contexts (identifiers, comments, ++ string literals, character constants), both UCN and UTF-8. The bidi ++ chars here are properly terminated, except for the character constants. */ ++ ++/* a b c LRE‪ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++/* a b c RLE‫ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++/* a b c LRO‭ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++/* a b c RLO‮ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++ ++/* Same but C++ comments instead. */ ++// a b c LRE‪ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++// a b c RLE‫ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++// a b c LRO‭ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++// a b c RLO‮ 1 2 3 PDF‬ x y z ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++// a b c LRI⁦ 1 2 3 PDI⁩ x y z ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++// a b c RLI⁧ 1 2 3 PDI⁩ x y ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++// a b c FSI⁨ 1 2 3 PDI⁩ x y z ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++ ++/* Here we're closing an unopened context, warn when =any. */ ++/* a b c PDI⁩ x y z */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++/* a b c PDF‬ x y z */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++// a b c PDI⁩ x y z ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++// a b c PDF‬ x y z ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++/* Multiline comments. */ ++/* a b c PDI⁩ x y z ++ */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ ++/* a b c PDF‬ x y z ++ */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z ++ */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDF‬ x y z ++ */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z */ ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++/* first ++ a b c PDF‬ x y z */ ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++void ++g1 () ++{ ++ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++ const char *s8 = "a b c PDI⁩ x y z"; ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ ++ const char *s9 = "a b c PDF‬ x y z"; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++} ++ ++void ++g2 () ++{ ++ const char c1 = '\u202a'; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char c2 = '\u202A'; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++ const char c3 = '\u202b'; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char c4 = '\u202B'; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++ const char c5 = '\u202d'; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char c6 = '\u202D'; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++ const char c7 = '\u202e'; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char c8 = '\u202E'; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++ const char c9 = '\u2066'; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++ const char c10 = '\u2067'; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++ const char c11 = '\u2068'; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++} ++ ++int A\u202cY; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++int A\u202CY2; ++/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ ++ ++int d\u202ae\u202cf; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int d\u202Ae\u202cf2; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int d\u202be\u202cf; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++int d\u202Be\u202cf2; ++/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ ++int d\u202de\u202cf; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++int d\u202De\u202cf2; ++/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ ++int d\u202ee\u202cf; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++int d\u202Ee\u202cf2; ++/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ ++int d\u2066e\u2069f; ++/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ ++int d\u2067e\u2069f; ++/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ ++int d\u2068e\u2069f; ++/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ ++int X\u2069; ++/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c +new file mode 100644 +index 00000000000..f5776806c79 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c +@@ -0,0 +1,172 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ ++/* Test all bidi chars in various contexts (identifiers, comments, ++ string literals, character constants), both UCN and UTF-8. The bidi ++ chars here are properly terminated, except for the character constants. */ ++ ++/* a b c LRE‪ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLE‫ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRO‭ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLO‮ 1 2 3 PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* Same but C++ comments instead. */ ++// a b c LRE‪ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLE‫ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRO‭ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLO‮ 1 2 3 PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRI⁦ 1 2 3 PDI⁩ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLI⁧ 1 2 3 PDI⁩ x y ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c FSI⁨ 1 2 3 PDI⁩ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* Here we're closing an unopened context, warn when =any. */ ++/* a b c PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c PDI⁩ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++// a b c PDF‬ x y z ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* Multiline comments. */ ++/* a b c PDI⁩ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* a b c PDF‬ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDF‬ x y z ++ */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ ++/* first ++ a b c PDI⁩ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++/* first ++ a b c PDF‬ x y z */ ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++void ++g1 () ++{ ++ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s8 = "a b c PDI⁩ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s9 = "a b c PDF‬ x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++} ++ ++void ++g2 () ++{ ++ const char c1 = '\u202a'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c2 = '\u202A'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c3 = '\u202b'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c4 = '\u202B'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c5 = '\u202d'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c6 = '\u202D'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c7 = '\u202e'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c8 = '\u202E'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c9 = '\u2066'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c10 = '\u2067'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char c11 = '\u2068'; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++} ++ ++int A\u202cY; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int A\u202CY2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++ ++int d\u202ae\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202Ae\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202be\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202Be\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202de\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202De\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202ee\u202cf; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u202Ee\u202cf2; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u2066e\u2069f; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u2067e\u2069f; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int d\u2068e\u2069f; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ ++int X\u2069; ++/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c +new file mode 100644 +index 00000000000..a65d6faf60e +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c +@@ -0,0 +1,130 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test nesting of bidi chars in various contexts. */ ++ ++/* Terminated by the wrong char: */ ++/* a b c LRE‪ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLE‫ 1 2 3 PDI⁩ x y z*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRO‭ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLO‮ 1 2 3 PDI⁩ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c LRI⁦ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c RLI⁧ 1 2 3 PDF‬ x y z */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* a b c FSI⁨ 1 2 3 PDF‬ x y z*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++/* LRE‪ PDF‬ */ ++/* LRE‪ LRE‪ PDF‬ PDF‬ */ ++/* PDF‬ LRE‪ PDF‬ */ ++/* LRE‪ PDF‬ LRE‪ PDF‬ */ ++/* LRE‪ LRE‪ PDF‬ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* PDF‬ LRE‪ */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++// a b c LRE‪ 1 2 3 PDI⁩ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLE‫ 1 2 3 PDI⁩ x y z*/ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRO‭ 1 2 3 PDI⁩ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLO‮ 1 2 3 PDI⁩ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c LRI⁦ 1 2 3 PDF‬ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c RLI⁧ 1 2 3 PDF‬ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// a b c FSI⁨ 1 2 3 PDF‬ x y z ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++// LRE‪ PDF‬ ++// LRE‪ LRE‪ PDF‬ PDF‬ ++// PDF‬ LRE‪ PDF‬ ++// LRE‪ PDF‬ LRE‪ PDF‬ ++// LRE‪ LRE‪ PDF‬ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++// PDF‬ LRE‪ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++void ++g1 () ++{ ++ const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y "; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\ ++ "; ++/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ ++ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s15 = "PDF‬ LRE‪"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s16 = "PDF\u202c LRE\u202a"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s17 = "LRE‪ PDF‬"; ++ const char *s18 = "LRE\u202a PDF\u202c"; ++ const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬"; ++ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; ++ const char *s21 = "PDF‬ LRE‪ PDF‬"; ++ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; ++ const char *s23 = "LRE‪ LRE‪ PDF‬"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s25 = "PDF‬ LRE‪"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s26 = "PDF\u202c LRE\u202a"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s27 = "PDF‬ LRE\u202a"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ const char *s28 = "PDF\u202c LRE‪"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++} ++ ++int A\u202aB\u2069C; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u202bB\u2069c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u202db\u2069c2; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u202eb\u2069; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u2066b\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u2067b\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int a\u2068b\u202c; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int A\u202aB\u202c; ++int A\u202aA\u202aB\u202cB\u202c; ++int a_\u202C_\u202a_\u202c; ++int a_\u202a_\u202c_\u202a_\u202c_; ++int a_\u202a_\u202c_\u202a_; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c +new file mode 100644 +index 00000000000..d012d420ec0 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c +@@ -0,0 +1,9 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test we ignore UCNs in comments. */ ++ ++// a b c \u202a 1 2 3 ++// a b c \u202A 1 2 3 ++/* a b c \u202a 1 2 3 */ ++/* a b c \u202A 1 2 3 */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c +new file mode 100644 +index 00000000000..4f54c5092ec +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c +@@ -0,0 +1,13 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=any" } */ ++/* Test \u vs \U. */ ++ ++int a_\u202A; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a_\u202a_2; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a_\U0000202A_3; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ ++int a_\U0000202a_4; ++/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ +diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c +new file mode 100644 +index 00000000000..e2af1b1ca97 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c +@@ -0,0 +1,29 @@ ++/* PR preprocessor/103026 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wbidi-chars=unpaired" } */ ++/* Test that we properly separate bidi contexts (comment/identifier/character ++ constant/string literal). */ ++ ++/* LRE ->‪<- */ int pdf_\u202c_1; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* RLE ->‫<- */ int pdf_\u202c_2; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRO ->‭<- */ int pdf_\u202c_3; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* RLO ->‮<- */ int pdf_\u202c_4; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRI ->⁦<-*/ int pdi_\u2069_1; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* RLI ->⁧<- */ int pdi_\u2069_12; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* FSI ->⁨<- */ int pdi_\u2069_3; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++ ++const char *s1 = "LRE\u202a"; /* PDF ->‬<- */ ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++/* LRE ->‪<- */ const char *s2 = "PDF\u202c"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++const char *s3 = "LRE\u202a"; int pdf_\u202c_5; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ ++int lre_\u202a; const char *s4 = "PDF\u202c"; ++/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h +index 3ad52d5e01e..e0dcb7f0529 100644 +--- a/libcpp/include/cpplib.h ++++ b/libcpp/include/cpplib.h +@@ -305,6 +305,17 @@ enum cpp_normalize_level { + normalized_none + }; + ++/* The possible bidirectional control characters checking levels, from least ++ restrictive to most. */ ++enum cpp_bidirectional_level { ++ /* No checking. */ ++ bidirectional_none, ++ /* Only detect unpaired uses of bidirectional control characters. */ ++ bidirectional_unpaired, ++ /* Detect any use of bidirectional control characters. */ ++ bidirectional_any ++}; ++ + /* This structure is nested inside struct cpp_reader, and + carries all the options visible to the command line. */ + struct cpp_options +@@ -506,6 +517,10 @@ struct cpp_options + /* True if warn about differences between C++98 and C++11. */ + bool cpp_warn_cxx11_compat; + ++ /* Nonzero if bidirectional control characters checking is on. See enum ++ cpp_bidirectional_level. */ ++ unsigned char cpp_warn_bidirectional; ++ + /* Dependency generation. */ + struct + { +@@ -1063,7 +1078,8 @@ enum { + CPP_W_PEDANTIC, + CPP_W_C90_C99_COMPAT, + CPP_W_CXX11_COMPAT, +- CPP_W_EXPANSION_TO_DEFINED ++ CPP_W_EXPANSION_TO_DEFINED, ++ CPP_W_BIDIRECTIONAL + }; + + /* Output a diagnostic of some kind. */ +diff --git a/libcpp/init.c b/libcpp/init.c +index ca3fbaa5c05..5c15da82ff8 100644 +--- a/libcpp/init.c ++++ b/libcpp/init.c +@@ -208,6 +208,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, + = ENABLE_CANONICAL_SYSTEM_HEADERS; + CPP_OPTION (pfile, ext_numeric_literals) = 1; + CPP_OPTION (pfile, warn_date_time) = 0; ++ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; + + /* Default CPP arithmetic to something sensible for the host for the + benefit of dumb users like fix-header. */ +diff --git a/libcpp/internal.h b/libcpp/internal.h +index 4f74f995cec..53b4c0f4af7 100644 +--- a/libcpp/internal.h ++++ b/libcpp/internal.h +@@ -576,6 +576,13 @@ struct cpp_reader + /* If non-null, the lexer will use this location for the next token + instead of getting a location from the linemap. */ + source_location *forced_token_location_p; ++ ++ /* Returns true iff we should warn about UTF-8 bidirectional control ++ characters. */ ++ bool warn_bidi_p () const ++ { ++ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none; ++ } + }; + + /* Character classes. Based on the more primitive macros in safe-ctype.h. +diff --git a/libcpp/lex.c b/libcpp/lex.c +index a408f912c5c..ea7f75e842e 100644 +--- a/libcpp/lex.c ++++ b/libcpp/lex.c +@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment) + } + } + ++namespace bidi { ++ enum kind { ++ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL ++ }; ++ ++ /* All the UTF-8 encodings of bidi characters start with E2. */ ++ const uchar utf8_start = 0xe2; ++ ++ /* A vector holding currently open bidi contexts. We use a char for ++ each context, its LSB is 1 if it represents a PDF context, 0 if it ++ represents a PDI context. The next bit is 1 if this context was open ++ by a bidi character written as a UCN, and 0 when it was UTF-8. */ ++ semi_embedded_vec vec; ++ ++ /* Close the whole comment/identifier/string literal/character constant ++ context. */ ++ void on_close () ++ { ++ vec.truncate (0); ++ } ++ ++ /* Pop the last element in the vector. */ ++ void pop () ++ { ++ unsigned int len = vec.count (); ++ gcc_checking_assert (len > 0); ++ vec.truncate (len - 1); ++ } ++ ++ /* Return the context of the Ith element. */ ++ kind ctx_at (unsigned int i) ++ { ++ return (vec[i] & 1) ? PDF : PDI; ++ } ++ ++ /* Return which context is currently opened. */ ++ kind current_ctx () ++ { ++ unsigned int len = vec.count (); ++ if (len == 0) ++ return NONE; ++ return ctx_at (len - 1); ++ } ++ ++ /* Return true if the current context comes from a UCN origin, that is, ++ the bidi char which started this bidi context was written as a UCN. */ ++ bool current_ctx_ucn_p () ++ { ++ unsigned int len = vec.count (); ++ gcc_checking_assert (len > 0); ++ return (vec[len - 1] >> 1) & 1; ++ } ++ ++ /* We've read a bidi char, update the current vector as necessary. */ ++ void on_char (kind k, bool ucn_p) ++ { ++ switch (k) ++ { ++ case LRE: ++ case RLE: ++ case LRO: ++ case RLO: ++ vec.push (ucn_p ? 3u : 1u); ++ break; ++ case LRI: ++ case RLI: ++ case FSI: ++ vec.push (ucn_p ? 2u : 0u); ++ break; ++ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO ++ whose scope has not yet been terminated. */ ++ case PDF: ++ if (current_ctx () == PDF) ++ pop (); ++ break; ++ /* PDI terminates the scope of the last LRI, RLI, or FSI whose ++ scope has not yet been terminated, as well as the scopes of ++ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not ++ yet been terminated. */ ++ case PDI: ++ for (int i = vec.count () - 1; i >= 0; --i) ++ if (ctx_at (i) == PDI) ++ { ++ vec.truncate (i); ++ break; ++ } ++ break; ++ case LTR: ++ case RTL: ++ /* These aren't popped by a PDF/PDI. */ ++ break; ++ [[likely]] case NONE: ++ break; ++ default: ++ abort (); ++ } ++ } ++ ++ /* Return a descriptive string for K. */ ++ const char *to_str (kind k) ++ { ++ switch (k) ++ { ++ case LRE: ++ return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; ++ case RLE: ++ return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; ++ case LRO: ++ return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; ++ case RLO: ++ return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; ++ case LRI: ++ return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; ++ case RLI: ++ return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; ++ case FSI: ++ return "U+2068 (FIRST STRONG ISOLATE)"; ++ case PDF: ++ return "U+202C (POP DIRECTIONAL FORMATTING)"; ++ case PDI: ++ return "U+2069 (POP DIRECTIONAL ISOLATE)"; ++ case LTR: ++ return "U+200E (LEFT-TO-RIGHT MARK)"; ++ case RTL: ++ return "U+200F (RIGHT-TO-LEFT MARK)"; ++ default: ++ abort (); ++ } ++ } ++} ++ ++/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ ++ ++static bidi::kind ++get_bidi_utf8 (const unsigned char *const p) ++{ ++ gcc_checking_assert (p[0] == bidi::utf8_start); ++ ++ if (p[1] == 0x80) ++ switch (p[2]) ++ { ++ case 0xaa: ++ return bidi::LRE; ++ case 0xab: ++ return bidi::RLE; ++ case 0xac: ++ return bidi::PDF; ++ case 0xad: ++ return bidi::LRO; ++ case 0xae: ++ return bidi::RLO; ++ case 0x8e: ++ return bidi::LTR; ++ case 0x8f: ++ return bidi::RTL; ++ default: ++ break; ++ } ++ else if (p[1] == 0x81) ++ switch (p[2]) ++ { ++ case 0xa6: ++ return bidi::LRI; ++ case 0xa7: ++ return bidi::RLI; ++ case 0xa8: ++ return bidi::FSI; ++ case 0xa9: ++ return bidi::PDI; ++ default: ++ break; ++ } ++ ++ return bidi::NONE; ++} ++ ++/* Parse a UCN where P points just past \u or \U and return its bidi code. */ ++ ++static bidi::kind ++get_bidi_ucn (const unsigned char *p, bool is_U) ++{ ++ /* 6.4.3 Universal Character Names ++ \u hex-quad ++ \U hex-quad hex-quad ++ where \unnnn means \U0000nnnn. */ ++ ++ if (is_U) ++ { ++ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') ++ return bidi::NONE; ++ /* Skip 4B so we can treat \u and \U the same below. */ ++ p += 4; ++ } ++ ++ /* All code points we are looking for start with 20xx. */ ++ if (p[0] != '2' || p[1] != '0') ++ return bidi::NONE; ++ else if (p[2] == '2') ++ switch (p[3]) ++ { ++ case 'a': ++ case 'A': ++ return bidi::LRE; ++ case 'b': ++ case 'B': ++ return bidi::RLE; ++ case 'c': ++ case 'C': ++ return bidi::PDF; ++ case 'd': ++ case 'D': ++ return bidi::LRO; ++ case 'e': ++ case 'E': ++ return bidi::RLO; ++ default: ++ break; ++ } ++ else if (p[2] == '6') ++ switch (p[3]) ++ { ++ case '6': ++ return bidi::LRI; ++ case '7': ++ return bidi::RLI; ++ case '8': ++ return bidi::FSI; ++ case '9': ++ return bidi::PDI; ++ default: ++ break; ++ } ++ else if (p[2] == '0') ++ switch (p[3]) ++ { ++ case 'e': ++ case 'E': ++ return bidi::LTR; ++ case 'f': ++ case 'F': ++ return bidi::RTL; ++ default: ++ break; ++ } ++ ++ return bidi::NONE; ++} ++ ++/* We're closing a bidi context, that is, we've encountered a newline, ++ are closing a C-style comment, or are at the end of a string literal, ++ character constant, or identifier. Warn if this context was not ++ properly terminated by a PDI or PDF. P points to the last character ++ in this context. */ ++ ++static void ++maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) ++{ ++ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired ++ && bidi::vec.count () > 0) ++ { ++ const source_location loc ++ = linemap_position_for_column (pfile->line_table, ++ CPP_BUF_COLUMN (pfile->buffer, p)); ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "unpaired UTF-8 bidirectional control character " ++ "detected"); ++ } ++ /* We're done with this context. */ ++ bidi::on_close (); ++} ++ ++/* We're at the beginning or in the middle of an identifier/comment/string ++ literal/character constant. Warn if we've encountered a bidi character. ++ KIND says which bidi character it was; P points to it in the character ++ stream. UCN_P is true iff this bidi character was written as a UCN. */ ++ ++static void ++maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, ++ bool ucn_p) ++{ ++ if (__builtin_expect (kind == bidi::NONE, 1)) ++ return; ++ ++ const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); ++ ++ if (warn_bidi != bidirectional_none) ++ { ++ const source_location loc ++ = linemap_position_for_column (pfile->line_table, ++ CPP_BUF_COLUMN (pfile->buffer, p)); ++ /* It seems excessive to warn about a PDI/PDF that is closing ++ an opened context because we've already warned about the ++ opening character. Except warn when we have a UCN x UTF-8 ++ mismatch. */ ++ if (kind == bidi::current_ctx ()) ++ { ++ if (warn_bidi == bidirectional_unpaired ++ && bidi::current_ctx_ucn_p () != ucn_p) ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "UTF-8 vs UCN mismatch when closing " ++ "a context by \"%s\"", bidi::to_str (kind)); ++ } ++ else if (warn_bidi == bidirectional_any) ++ { ++ if (kind == bidi::PDF || kind == bidi::PDI) ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "\"%s\" is closing an unopened context", ++ bidi::to_str (kind)); ++ else ++ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, ++ "found problematic Unicode character \"%s\"", ++ bidi::to_str (kind)); ++ } ++ } ++ /* We're done with this context. */ ++ bidi::on_char (kind, ucn_p); ++} ++ + /* Skip a C-style block comment. We find the end of the comment by + seeing if an asterisk is before every '/' we encounter. Returns + nonzero if comment terminated by EOF, zero otherwise. +@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile) + cpp_buffer *buffer = pfile->buffer; + const uchar *cur = buffer->cur; + uchar c; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + cur++; + if (*cur == '/') +@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile) + if (c == '/') + { + if (cur[-2] == '*') +- break; ++ { ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur); ++ break; ++ } + + /* Warn about potential nested comments, but not if the '/' + comes immediately before the true comment delimiter. +@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile) + { + unsigned int cols; + buffer->cur = cur - 1; ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur); + _cpp_process_line_notes (pfile, true); + if (buffer->next_line >= buffer->rlimit) + return true; +@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile) + + cur = buffer->cur; + } ++ /* If this is a beginning of a UTF-8 encoding, it might be ++ a bidirectional control character. */ ++ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_utf8 (cur - 1); ++ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); ++ } + } + + buffer->cur = cur; +@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile) + { + cpp_buffer *buffer = pfile->buffer; + source_location orig_line = pfile->line_table->highest_line; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + +- while (*buffer->cur != '\n') +- buffer->cur++; ++ if (!warn_bidi_p) ++ while (*buffer->cur != '\n') ++ buffer->cur++; ++ else ++ { ++ while (*buffer->cur != '\n' ++ && *buffer->cur != bidi::utf8_start) ++ buffer->cur++; ++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) ++ { ++ while (*buffer->cur != '\n') ++ { ++ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) ++ { ++ bidi::kind kind = get_bidi_utf8 (buffer->cur); ++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, ++ /*ucn_p=*/false); ++ } ++ buffer->cur++; ++ } ++ maybe_warn_bidi_on_close (pfile, buffer->cur); ++ } ++ } + + _cpp_process_line_notes (pfile, true); + return orig_line != pfile->line_table->highest_line; +@@ -1315,11 +1669,13 @@ warn_about_normalization (cpp_reader *pfile, + + /* Returns TRUE if the sequence starting at buffer->cur is invalid in + an identifier. FIRST is TRUE if this starts an identifier. */ ++ + static bool + forms_identifier_p (cpp_reader *pfile, int first, + struct normalize_state *state) + { + cpp_buffer *buffer = pfile->buffer; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + if (*buffer->cur == '$') + { +@@ -1343,6 +1699,12 @@ forms_identifier_p (cpp_reader *pfile, int first, + { + cppchar_t s; + buffer->cur += 2; ++ if (warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_ucn (buffer->cur, ++ buffer->cur[-1] == 'U'); ++ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, /*ucn_p=*/true); ++ } + if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, + state, &s, NULL, NULL)) + return true; +@@ -1450,6 +1812,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, + const uchar *cur; + unsigned int len; + unsigned int hash = HT_HASHSTEP (0, *base); ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + cur = pfile->buffer->cur; + if (! starts_ucn) +@@ -1472,6 +1835,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, + pfile->buffer->cur++; + } + } while (forms_identifier_p (pfile, false, nst)); ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); + result = _cpp_interpret_identifier (pfile, base, + pfile->buffer->cur - base); + *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); +@@ -1673,6 +2038,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, + _cpp_buff *first_buff = NULL, *last_buff = NULL; + size_t raw_prefix_start; + _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + + type = (*base == 'L' ? CPP_WSTRING : + *base == 'U' ? CPP_STRING32 : +@@ -1909,8 +2275,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, + cur = base = pfile->buffer->cur; + note = &pfile->buffer->notes[pfile->buffer->cur_note]; + } ++ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) ++ && warn_bidi_p) ++ maybe_warn_bidi_on_char (pfile, cur - 1, get_bidi_utf8 (cur - 1), ++ /*ucn_p=*/false); + } + ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur); ++ + if (CPP_OPTION (pfile, user_literals)) + { + /* If a string format macro, say from inttypes.h, is placed touching +@@ -2005,15 +2378,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) + else + terminator = '>', type = CPP_HEADER_NAME; + ++ const bool warn_bidi_p = pfile->warn_bidi_p (); + for (;;) + { + cppchar_t c = *cur++; + + /* In #include-style directives, terminators are not escapable. */ + if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') +- cur++; ++ { ++ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); ++ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); ++ } ++ cur++; ++ } + else if (c == terminator) +- break; ++ { ++ if (warn_bidi_p) ++ maybe_warn_bidi_on_close (pfile, cur - 1); ++ break; ++ } + else if (c == '\n') + { + cur--; +@@ -2030,6 +2415,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) + } + else if (c == '\0') + saw_NUL = true; ++ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) ++ { ++ bidi::kind kind = get_bidi_utf8 (cur - 1); ++ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); ++ } + } + + if (saw_NUL && !pfile->state.skipping) diff --git a/SOURCES/gcc8-aarch64-mtune-neoverse-512tvb.patch b/SOURCES/gcc8-aarch64-mtune-neoverse-512tvb.patch new file mode 100644 index 0000000..af0a049 --- /dev/null +++ b/SOURCES/gcc8-aarch64-mtune-neoverse-512tvb.patch @@ -0,0 +1,105 @@ +From 9c108bb84d3a2447dac730c455df658be0a2c751 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 17 Aug 2021 15:15:27 +0100 +Subject: [PATCH] aarch64: Add -mtune=neoverse-512tvb +To: gcc-patches@gcc.gnu.org + +This patch adds an option to tune for Neoverse cores that have +a total vector bandwidth of 512 bits (4x128 for Advanced SIMD +and a vector-length-dependent equivalent for SVE). This is intended +to be a compromise between tuning aggressively for a single core like +Neoverse V1 (which can be too narrow) and tuning for AArch64 cores +in general (which can be too wide). + +-mcpu=neoverse-512tvb is equivalent to -mcpu=neoverse-v1 +-mtune=neoverse-512tvb. + +gcc/ + * doc/invoke.texi: Document -mtune=neoverse-512tvb and + -mcpu=neoverse-512tvb. + * config/aarch64/aarch64-cores.def (neoverse-512tvb): New entry. + * config/aarch64/aarch64-tune.md: Regenerate. + +(cherry picked from commit 048039c49b96875144f67e7789fdea54abf7710b) +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/doc/invoke.texi | 25 ++++++++++++++++++++++--- + 3 files changed, 24 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index dfb839c01cc..f348d31e22e 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -99,6 +99,7 @@ AARCH64_CORE("saphira", saphira, falkor, 8_3A, AARCH64_FL_FOR_ARCH8_3 + /* ARM ('A') cores. */ + AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) + AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) ++AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversev1, INVALID_IMP, INVALID_CORE, -1) + + /* Armv8.5-A Architecture Processors. */ + AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_RNG, neoversen2, 0x41, 0xd49, -1) +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 2d7c9aa4740..09b76480f0b 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,saphira,zeus,neoversev1,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55" ++ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,saphira,zeus,neoversev1,neoverse512tvb,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 78ca7738df2..68fda03281a 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -14772,9 +14772,9 @@ performance of the code. Permissible values for this option are: + @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55}, + @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75}, + @samp{cortex-a76}, @samp{ares}, @samp{neoverse-n1}, @samp{neoverse-n2}, +-@samp{neoverse-v1}, @samp{zeus}, @samp{exynos-m1}, @samp{falkor}, +-@samp{qdf24xx}, @samp{saphira}, @samp{xgene1}, @samp{vulcan}, @samp{thunderx}, +-@samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81}, ++@samp{neoverse-v1}, @samp{zeus}, @samp{neoverse-512tvb}, @samp{exynos-m1}, ++@samp{falkor}, @samp{qdf24xx}, @samp{saphira}, @samp{xgene1}, @samp{vulcan}, ++@samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81}, + @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53}, + @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35}, + @samp{cortex-a73.cortex-a53}, @samp{cortex-a75.cortex-a55}, +@@ -14785,6 +14785,15 @@ The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}, + @samp{cortex-a75.cortex-a55} specify that GCC should tune for a + big.LITTLE system. + ++The value @samp{neoverse-512tvb} specifies that GCC should tune ++for Neoverse cores that (a) implement SVE and (b) have a total vector ++bandwidth of 512 bits per cycle. In other words, the option tells GCC to ++tune for Neoverse cores that can execute 4 128-bit Advanced SIMD arithmetic ++instructions a cycle and that can execute an equivalent number of SVE ++arithmetic instructions per cycle (2 for 256-bit SVE, 4 for 128-bit SVE). ++This is more general than tuning for a specific core like Neoverse V1 ++but is more specific than the default tuning described below. ++ + Additionally on native AArch64 GNU/Linux systems the value + @samp{native} tunes performance to the host system. This option has no effect + if the compiler is unable to recognize the processor of the host system. +@@ -14814,6 +14823,16 @@ by @option{-mtune}). Where this option is used in conjunction + with @option{-march} or @option{-mtune}, those options take precedence + over the appropriate part of this option. + ++@option{-mcpu=neoverse-512tvb} is special in that it does not refer ++to a specific core, but instead refers to all Neoverse cores that ++(a) implement SVE and (b) have a total vector bandwidth of 512 bits ++a cycle. Unless overridden by @option{-march}, ++@option{-mcpu=neoverse-512tvb} generates code that can run on a ++Neoverse V1 core, since Neoverse V1 is the first Neoverse core with ++these properties. Unless overridden by @option{-mtune}, ++@option{-mcpu=neoverse-512tvb} tunes code in the same way as for ++@option{-mtune=neoverse-512tvb}. ++ + @item -moverride=@var{string} + @opindex moverride + Override tuning decisions made by the back-end in response to a +-- +2.25.1 + diff --git a/SOURCES/gcc8-add-Wbidirectional.patch b/SOURCES/gcc8-add-Wbidirectional.patch deleted file mode 100644 index b907335..0000000 --- a/SOURCES/gcc8-add-Wbidirectional.patch +++ /dev/null @@ -1,1263 +0,0 @@ -diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt -index f591b39be5a..3e9a97f4ae2 100644 ---- a/gcc/c-family/c.opt -+++ b/gcc/c-family/c.opt -@@ -334,6 +334,30 @@ Wbad-function-cast - C ObjC Var(warn_bad_function_cast) Warning - Warn about casting functions to incompatible types. - -+Wbidirectional -+C ObjC C++ ObjC++ Warning Alias(Wbidirectional=,any,none) -+; -+ -+Wbidirectional= -+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) -+-Wbidirectional=[none|unpaired|any] Warn about UTF-8 bidirectional characters. -+ -+; Required for these enum values. -+SourceInclude -+cpplib.h -+ -+Enum -+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidirectional%> not recognized) -+ -+EnumValue -+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) -+ -+EnumValue -+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) -+ -+EnumValue -+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) -+ - Wbool-compare - C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) - Warn about boolean expression compared with an integer value different from true/false. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 78ca7738df2..7342dd2e142 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -264,7 +264,9 @@ Objective-C and Objective-C++ Dialects}. - -Walloc-zero -Walloc-size-larger-than=@var{n} - -Walloca -Walloca-larger-than=@var{n} @gol - -Wno-aggressive-loop-optimizations -Warray-bounds -Warray-bounds=@var{n} @gol ---Wno-attributes -Wbool-compare -Wbool-operation @gol -+-Wno-attributes @gol -+-Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol -+-Wbool-compare -Wbool-operation @gol - -Wno-builtin-declaration-mismatch @gol - -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol - -Wc++-compat -Wc++11-compat -Wc++14-compat @gol -@@ -5606,6 +5608,21 @@ Warn about declarations using the @code{alias} and similar attributes whose - target is incompatible with the type of the alias. @xref{Function Attributes, - ,Declaring Attributes of Functions}. - -+@item -Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} -+@opindex Wbidirectional= -+@opindex Wbidirectional -+@opindex Wno-bidirectional -+Warn about UTF-8 bidirectional characters. Such characters can change -+left-to-right writing direction into right-to-left (and vice versa), -+which can cause confusion between the logical order and visual order. -+This may be dangerous; for instance, it may seem that a piece of code -+is not commented out, whereas it in fact is. -+ -+There are three levels of warning supported by GCC@. The default is -+@option{-Wbidirectional=unpaired}, which warns about improperly terminated -+bidi contexts. @option{-Wbidirectional=none} turns the warning off. -+@option{-Wbidirectional=any} warns about any use of bidirectional characters. -+ - @item -Wbool-compare - @opindex Wno-bool-compare - @opindex Wbool-compare -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-1.c b/gcc/testsuite/c-c++-common/Wbidirectional-1.c -new file mode 100644 -index 00000000000..750de81fdd8 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+ -+int main() { -+ int isAdmin = 0; -+ /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */ -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ __builtin_printf("You are an admin.\n"); -+ /* end admins only ‮ { ⁦*/ -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-10.c b/gcc/testsuite/c-c++-common/Wbidirectional-10.c -new file mode 100644 -index 00000000000..cd4abeeefbd ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-10.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=unpaired" } */ -+/* More nesting testing. */ -+ -+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int LRE_\u202a_PDF_\u202c; -+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; -+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; -+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int FSI_\u2068; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int FSI_\u2068_PDI_\u2069; -+int FSI_\u2068_FSI_\u2068_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-11.c b/gcc/testsuite/c-c++-common/Wbidirectional-11.c -new file mode 100644 -index 00000000000..89ed6e8affc ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-11.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=unpaired" } */ -+/* Test that we warn when mixing UCN and UTF-8. */ -+ -+const char *s1 = "LRE_‪_PDF_\u202c"; -+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ -+const char *s2 = "LRE_\u202a_PDF_‬"; -+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-12.c b/gcc/testsuite/c-c++-common/Wbidirectional-12.c -new file mode 100644 -index 00000000000..20d1566401a ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-12.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile { target { c || c++11 } } } */ -+/* { dg-options "-Wbidirectional=any" } */ -+/* Test raw strings. */ -+ -+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z"; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+const char *s8 = R"(a b c PDI⁩ x y )z"; -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+const char *s9 = R"(a b c PDF‬ x y z)"; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-13.c b/gcc/testsuite/c-c++-common/Wbidirectional-13.c -new file mode 100644 -index 00000000000..08010e3b37b ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-13.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile { target { c || c++11 } } } */ -+/* { dg-options "-Wbidirectional=unpaired" } */ -+/* Test raw strings. */ -+ -+const char *s1 = R"(a b c LRE‪ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s2 = R"(a b c RLE‫ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s3 = R"(a b c LRO‭ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s4 = R"(a b c FSI⁨ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s5 = R"(a b c LRI⁦ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s6 = R"(a b c RLI⁧ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-2.c b/gcc/testsuite/c-c++-common/Wbidirectional-2.c -new file mode 100644 -index 00000000000..4e04202e058 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-2.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+ -+int main() { -+ /* Say hello; newline⁧/*/ return 0 ; -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ __builtin_printf("Hello world.\n"); -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-3.c b/gcc/testsuite/c-c++-common/Wbidirectional-3.c -new file mode 100644 -index 00000000000..921300e94e0 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+ -+int main() { -+ const char* access_level = "user"; -+ if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) { -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ __builtin_printf("You are an admin.\n"); -+ } -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-4.c b/gcc/testsuite/c-c++-common/Wbidirectional-4.c -new file mode 100644 -index 00000000000..bdf334d9986 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-4.c -@@ -0,0 +1,149 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=any -Wno-multichar -Wno-overflow" } */ -+/* Test all bidi chars in various contexts (identifiers, comments, -+ string literals, character constants), both UCN and UTF-8. The bidi -+ chars here are properly terminated, except for the character constants. */ -+ -+/* a b c LRE‪ 1 2 3 PDF‬ x y z */ -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+/* a b c RLE‫ 1 2 3 PDF‬ x y z */ -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+/* a b c LRO‭ 1 2 3 PDF‬ x y z */ -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+/* a b c RLO‮ 1 2 3 PDF‬ x y z */ -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+ -+/* Same but C++ comments instead. */ -+// a b c LRE‪ 1 2 3 PDF‬ x y z -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+// a b c RLE‫ 1 2 3 PDF‬ x y z -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+// a b c LRO‭ 1 2 3 PDF‬ x y z -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+// a b c RLO‮ 1 2 3 PDF‬ x y z -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+// a b c LRI⁦ 1 2 3 PDI⁩ x y z -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+// a b c RLI⁧ 1 2 3 PDI⁩ x y -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+// a b c FSI⁨ 1 2 3 PDI⁩ x y z -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+ -+/* Here we're closing an unopened context, warn when =any. */ -+/* a b c PDI⁩ x y z */ -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+/* a b c PDF‬ x y z */ -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+// a b c PDI⁩ x y z -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+// a b c PDF‬ x y z -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+void -+g1 () -+{ -+ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+ const char *s8 = "a b c PDI⁩ x y z"; -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+ const char *s9 = "a b c PDF‬ x y z"; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+} -+ -+void -+g2 () -+{ -+ const char c1 = '\u202a'; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char c2 = '\u202A'; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char c3 = '\u202b'; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char c4 = '\u202B'; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char c5 = '\u202d'; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char c6 = '\u202D'; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char c7 = '\u202e'; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char c8 = '\u202E'; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char c9 = '\u2066'; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+ const char c10 = '\u2067'; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+ const char c11 = '\u2068'; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+} -+ -+int A\u202cY; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+int A\u202CY2; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+int d\u202ae\u202cf; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int d\u202Ae\u202cf2; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int d\u202be\u202cf; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+int d\u202Be\u202cf2; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+int d\u202de\u202cf; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+int d\u202De\u202cf2; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+int d\u202ee\u202cf; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+int d\u202Ee\u202cf2; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+int d\u2066e\u2069f; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+int d\u2067e\u2069f; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+int d\u2068e\u2069f; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+int X\u2069; -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-5.c b/gcc/testsuite/c-c++-common/Wbidirectional-5.c -new file mode 100644 -index 00000000000..08b373a1bda ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-5.c -@@ -0,0 +1,149 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=unpaired -Wno-multichar -Wno-overflow" } */ -+/* Test all bidi chars in various contexts (identifiers, comments, -+ string literals, character constants), both UCN and UTF-8. The bidi -+ chars here are properly terminated, except for the character constants. */ -+ -+/* a b c LRE‪ 1 2 3 PDF‬ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLE‫ 1 2 3 PDF‬ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LRO‭ 1 2 3 PDF‬ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLO‮ 1 2 3 PDF‬ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* Same but C++ comments instead. */ -+// a b c LRE‪ 1 2 3 PDF‬ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLE‫ 1 2 3 PDF‬ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LRO‭ 1 2 3 PDF‬ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLO‮ 1 2 3 PDF‬ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LRI⁦ 1 2 3 PDI⁩ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLI⁧ 1 2 3 PDI⁩ x y -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c FSI⁨ 1 2 3 PDI⁩ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* Here we're closing an unopened context, warn when =any. */ -+/* a b c PDI⁩ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c PDF‬ x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c PDI⁩ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c PDF‬ x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+void -+g1 () -+{ -+ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s8 = "a b c PDI⁩ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s9 = "a b c PDF‬ x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+} -+ -+void -+g2 () -+{ -+ const char c1 = '\u202a'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c2 = '\u202A'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c3 = '\u202b'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c4 = '\u202B'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c5 = '\u202d'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c6 = '\u202D'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c7 = '\u202e'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c8 = '\u202E'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c9 = '\u2066'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c10 = '\u2067'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c11 = '\u2068'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+} -+ -+int A\u202cY; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int A\u202CY2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+int d\u202ae\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202Ae\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202be\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202Be\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202de\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202De\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202ee\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202Ee\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u2066e\u2069f; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u2067e\u2069f; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u2068e\u2069f; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int X\u2069; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-6.c b/gcc/testsuite/c-c++-common/Wbidirectional-6.c -new file mode 100644 -index 00000000000..c0b37c3a130 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-6.c -@@ -0,0 +1,129 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=unpaired" } */ -+/* Test nesting of bidi chars in various contexts. */ -+ -+/* Terminated by the wrong char: */ -+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLE‫ 1 2 3 PDI⁩ x y z*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c FSI⁨ 1 2 3 PDF‬ x y z*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* LRE‪ PDF‬ */ -+/* LRE‪ LRE‪ PDF‬ PDF‬ */ -+/* PDF‬ LRE‪ PDF‬ */ -+/* LRE‪ PDF‬ LRE‪ PDF‬ */ -+/* LRE‪ LRE‪ PDF‬ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* PDF‬ LRE‪ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+// a b c LRE‪ 1 2 3 PDI⁩ x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLE‫ 1 2 3 PDI⁩ x y z*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LRO‭ 1 2 3 PDI⁩ x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLO‮ 1 2 3 PDI⁩ x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LRI⁦ 1 2 3 PDF‬ x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLI⁧ 1 2 3 PDF‬ x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c FSI⁨ 1 2 3 PDF‬ x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+// LRE‪ PDF‬ -+// LRE‪ LRE‪ PDF‬ PDF‬ -+// PDF‬ LRE‪ PDF‬ -+// LRE‪ PDF‬ LRE‪ PDF‬ -+// LRE‪ LRE‪ PDF‬ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// PDF‬ LRE‪ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+void -+g1 () -+{ -+ const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y "; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\ -+ "; -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s15 = "PDF‬ LRE‪"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s16 = "PDF\u202c LRE\u202a"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s17 = "LRE‪ PDF‬"; -+ const char *s18 = "LRE\u202a PDF\u202c"; -+ const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬"; -+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; -+ const char *s21 = "PDF‬ LRE‪ PDF‬"; -+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; -+ const char *s23 = "LRE‪ LRE‪ PDF‬"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s25 = "PDF‬ LRE‪"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s26 = "PDF\u202c LRE\u202a"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s27 = "PDF‬ LRE\u202a"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s28 = "PDF\u202c LRE‪"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+} -+ -+int A\u202aB\u2069C; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u202bB\u2069c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u202db\u2069c2; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u202eb\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u2066b\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u2067b\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u2068b\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int A\u202aB\u202c; -+int A\u202aA\u202aB\u202cB\u202c; -+int a_\u202C_\u202a_\u202c; -+int a_\u202a_\u202c_\u202a_\u202c_; -+int a_\u202a_\u202c_\u202a_; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-7.c b/gcc/testsuite/c-c++-common/Wbidirectional-7.c -new file mode 100644 -index 00000000000..f0f7b3ca14a ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-7.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=any" } */ -+/* Test we ignore UCNs in comments. */ -+ -+// a b c \u202a 1 2 3 -+// a b c \u202A 1 2 3 -+/* a b c \u202a 1 2 3 */ -+/* a b c \u202A 1 2 3 */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-8.c b/gcc/testsuite/c-c++-common/Wbidirectional-8.c -new file mode 100644 -index 00000000000..c7d02193131 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-8.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=any" } */ -+/* Test \u vs \U. */ -+ -+int a_\u202A; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int a_\u202a_2; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int a_\U0000202A_3; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int a_\U0000202a_4; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-9.c b/gcc/testsuite/c-c++-common/Wbidirectional-9.c -new file mode 100644 -index 00000000000..d029209babb ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidirectional-9.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidirectional=unpaired" } */ -+/* Test that we properly separate bidi contexts (comment/identifier/character -+ constant/string literal). */ -+ -+/* LRE ->‪<- */ int pdf_\u202c_1; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* RLE ->‫<- */ int pdf_\u202c_2; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRO ->‭<- */ int pdf_\u202c_3; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* RLO ->‮<- */ int pdf_\u202c_4; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRI ->⁦<-*/ int pdi_\u2069_1; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* RLI ->⁧<- */ int pdi_\u2069_12; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* FSI ->⁨<- */ int pdi_\u2069_3; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRE ->‪<- */ const char *s2 = "PDF\u202c"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s3 = "LRE\u202a"; int pdf_\u202c_5; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int lre_\u202a; const char *s4 = "PDF\u202c"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h -index 3ad52d5e01e..cecbefd59d4 100644 ---- a/libcpp/include/cpplib.h -+++ b/libcpp/include/cpplib.h -@@ -305,6 +305,17 @@ enum cpp_normalize_level { - normalized_none - }; - -+/* The possible bidirectional characters checking levels, from least -+ restrictive to most. */ -+enum cpp_bidirectional_level { -+ /* No checking. */ -+ bidirectional_none, -+ /* Only detect unpaired uses of bidirectional characters. */ -+ bidirectional_unpaired, -+ /* Detect any use of bidirectional characters. */ -+ bidirectional_any -+}; -+ - /* This structure is nested inside struct cpp_reader, and - carries all the options visible to the command line. */ - struct cpp_options -@@ -506,6 +517,10 @@ struct cpp_options - /* True if warn about differences between C++98 and C++11. */ - bool cpp_warn_cxx11_compat; - -+ /* Nonzero of bidirectional characters checking is on. See enum -+ cpp_bidirectional_level. */ -+ unsigned char cpp_warn_bidirectional; -+ - /* Dependency generation. */ - struct - { -@@ -1063,7 +1078,8 @@ enum { - CPP_W_PEDANTIC, - CPP_W_C90_C99_COMPAT, - CPP_W_CXX11_COMPAT, -- CPP_W_EXPANSION_TO_DEFINED -+ CPP_W_EXPANSION_TO_DEFINED, -+ CPP_W_BIDIRECTIONAL - }; - - /* Output a diagnostic of some kind. */ -diff --git a/libcpp/init.c b/libcpp/init.c -index ca3fbaa5c05..5c15da82ff8 100644 ---- a/libcpp/init.c -+++ b/libcpp/init.c -@@ -208,6 +208,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table, - = ENABLE_CANONICAL_SYSTEM_HEADERS; - CPP_OPTION (pfile, ext_numeric_literals) = 1; - CPP_OPTION (pfile, warn_date_time) = 0; -+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; - - /* Default CPP arithmetic to something sensible for the host for the - benefit of dumb users like fix-header. */ -diff --git a/libcpp/lex.c b/libcpp/lex.c -index a408f912c5c..3ba0a7ba818 100644 ---- a/libcpp/lex.c -+++ b/libcpp/lex.c -@@ -1164,6 +1164,284 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment) - } - } - -+namespace bidi { -+ enum kind { -+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI -+ }; -+ -+ /* All the UTF-8 encodings of bidi characters start with E2. */ -+ const uchar utf8_start = 0xe2; -+ -+ /* A vector holding currently open bidi contexts. We use a char for -+ each context, its LSB is 1 if it represents a PDF context, 0 if it -+ represents a PDI context. The next bit is 1 if this context was open -+ by a bidi character written as a UCN, and 0 when it was UTF-8. */ -+ semi_embedded_vec vec; -+ -+ /* Close the whole comment/identifier/string literal/character constant -+ context. */ -+ void on_close () -+ { -+ vec.truncate (0); -+ } -+ -+ /* Pop the last element in the vector. */ -+ void pop () -+ { -+ unsigned int len = vec.count (); -+ gcc_checking_assert (len > 0); -+ vec.truncate (len - 1); -+ } -+ -+ /* Return which context is currently opened. */ -+ kind current_ctx () -+ { -+ unsigned int len = vec.count (); -+ if (len == 0) -+ return NONE; -+ return (vec[len - 1] & 1) ? PDF : PDI; -+ } -+ -+ /* Return true if the current context comes from a UCN origin, that is, -+ the bidi char which started this bidi context was written as a UCN. */ -+ bool current_ctx_ucn_p () -+ { -+ unsigned int len = vec.count (); -+ gcc_checking_assert (len > 0); -+ return (vec[len - 1] >> 1) & 1; -+ } -+ -+ /* We've read a bidi char, update the current vector as necessary. */ -+ void on_char (kind k, bool ucn_p) -+ { -+ switch (k) -+ { -+ case LRE: -+ case RLE: -+ case LRO: -+ case RLO: -+ vec.push (ucn_p ? 3u : 1u); -+ break; -+ case LRI: -+ case RLI: -+ case FSI: -+ vec.push (ucn_p ? 2u : 0u); -+ break; -+ case PDF: -+ if (current_ctx () == PDF) -+ pop (); -+ break; -+ case PDI: -+ if (current_ctx () == PDI) -+ pop (); -+ break; -+ [[likely]] case NONE: -+ break; -+ default: -+ abort (); -+ } -+ } -+ -+ /* Return a descriptive string for K. */ -+ const char *to_str (kind k) -+ { -+ switch (k) -+ { -+ case LRE: -+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; -+ case RLE: -+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; -+ case LRO: -+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; -+ case RLO: -+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; -+ case LRI: -+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; -+ case RLI: -+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; -+ case FSI: -+ return "U+2068 (FIRST STRONG ISOLATE)"; -+ case PDF: -+ return "U+202C (POP DIRECTIONAL FORMATTING)"; -+ case PDI: -+ return "U+2069 (POP DIRECTIONAL ISOLATE)"; -+ default: -+ abort (); -+ } -+ } -+} -+ -+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ -+ -+static bidi::kind -+get_bidi_utf8 (const unsigned char *const p) -+{ -+ gcc_checking_assert (p[0] == bidi::utf8_start); -+ -+ if (p[1] == 0x80) -+ switch (p[2]) -+ { -+ case 0xaa: -+ return bidi::LRE; -+ case 0xab: -+ return bidi::RLE; -+ case 0xac: -+ return bidi::PDF; -+ case 0xad: -+ return bidi::LRO; -+ case 0xae: -+ return bidi::RLO; -+ default: -+ break; -+ } -+ else if (p[1] == 0x81) -+ switch (p[2]) -+ { -+ case 0xa6: -+ return bidi::LRI; -+ case 0xa7: -+ return bidi::RLI; -+ case 0xa8: -+ return bidi::FSI; -+ case 0xa9: -+ return bidi::PDI; -+ default: -+ break; -+ } -+ -+ return bidi::NONE; -+} -+ -+/* Parse a UCN where P points just past \u or \U and return its bidi code. */ -+ -+static bidi::kind -+get_bidi_ucn (const unsigned char *p, bool is_U) -+{ -+ /* 6.4.3 Universal Character Names -+ \u hex-quad -+ \U hex-quad hex-quad -+ where \unnnn means \U0000nnnn. */ -+ -+ if (is_U) -+ { -+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') -+ return bidi::NONE; -+ /* Skip 4B so we can treat \u and \U the same below. */ -+ p += 4; -+ } -+ -+ /* All code points we are looking for start with 20xx. */ -+ if (p[0] != '2' || p[1] != '0') -+ return bidi::NONE; -+ else if (p[2] == '2') -+ switch (p[3]) -+ { -+ case 'a': -+ case 'A': -+ return bidi::LRE; -+ case 'b': -+ case 'B': -+ return bidi::RLE; -+ case 'c': -+ case 'C': -+ return bidi::PDF; -+ case 'd': -+ case 'D': -+ return bidi::LRO; -+ case 'e': -+ case 'E': -+ return bidi::RLO; -+ default: -+ break; -+ } -+ else if (p[2] == '6') -+ switch (p[3]) -+ { -+ case '6': -+ return bidi::LRI; -+ case '7': -+ return bidi::RLI; -+ case '8': -+ return bidi::FSI; -+ case '9': -+ return bidi::PDI; -+ default: -+ break; -+ } -+ -+ return bidi::NONE; -+} -+ -+/* We're closing a bidi context, that is, we've encountered a newline, -+ are closing a C-style comment, or are at the end of a string literal, -+ character constant, or identifier. Warn if this context was not -+ properly terminated by a PDI or PDF. P points to the last character -+ in this context. */ -+ -+static void -+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) -+{ -+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired -+ && bidi::vec.count () > 0) -+ { -+ const source_location loc -+ = linemap_position_for_column (pfile->line_table, -+ CPP_BUF_COLUMN (pfile->buffer, p)); -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "unpaired UTF-8 bidirectional character " -+ "detected"); -+ } -+ /* We're done with this context. */ -+ bidi::on_close (); -+} -+ -+/* We're at the beginning or in the middle of an identifier/comment/string -+ literal/character constant. Warn if we've encountered a bidi character. -+ KIND says which bidi character it was; P points to it in the character -+ stream. UCN_P is true iff this bidi character was written as a UCN. */ -+ -+static void -+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, -+ bool ucn_p) -+{ -+ if (__builtin_expect (kind == bidi::NONE, 1)) -+ return; -+ -+ const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); -+ -+ if (warn_bidi != bidirectional_none) -+ { -+ const source_location loc -+ = linemap_position_for_column (pfile->line_table, -+ CPP_BUF_COLUMN (pfile->buffer, p)); -+ /* It seems excessive to warn about a PDI/PDF that is closing -+ an opened context because we've already warned about the -+ opening character. Except warn when we have a UCN x UTF-8 -+ mismatch. */ -+ if (kind == bidi::current_ctx ()) -+ { -+ if (warn_bidi == bidirectional_unpaired -+ && bidi::current_ctx_ucn_p () != ucn_p) -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "UTF-8 vs UCN mismatch when closing " -+ "a context by \"%s\"", bidi::to_str (kind)); -+ } -+ else if (warn_bidi == bidirectional_any) -+ { -+ if (kind == bidi::PDF || kind == bidi::PDI) -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "\"%s\" is closing an unopened context", -+ bidi::to_str (kind)); -+ else -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "found problematic Unicode character \"%s\"", -+ bidi::to_str (kind)); -+ } -+ } -+ /* We're done with this context. */ -+ bidi::on_char (kind, ucn_p); -+} -+ - /* Skip a C-style block comment. We find the end of the comment by - seeing if an asterisk is before every '/' we encounter. Returns - nonzero if comment terminated by EOF, zero otherwise. -@@ -1175,7 +1453,8 @@ _cpp_skip_block_comment (cpp_reader *pfile) - cpp_buffer *buffer = pfile->buffer; - const uchar *cur = buffer->cur; - uchar c; -- -+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) -+ != bidirectional_none); - cur++; - if (*cur == '/') - cur++; -@@ -1189,7 +1468,11 @@ _cpp_skip_block_comment (cpp_reader *pfile) - if (c == '/') - { - if (cur[-2] == '*') -- break; -+ { -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur); -+ break; -+ } - - /* Warn about potential nested comments, but not if the '/' - comes immediately before the true comment delimiter. -@@ -1208,6 +1491,8 @@ _cpp_skip_block_comment (cpp_reader *pfile) - { - unsigned int cols; - buffer->cur = cur - 1; -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur); - _cpp_process_line_notes (pfile, true); - if (buffer->next_line >= buffer->rlimit) - return true; -@@ -1218,6 +1503,13 @@ _cpp_skip_block_comment (cpp_reader *pfile) - - cur = buffer->cur; - } -+ /* If this is a beginning of a UTF-8 encoding, it might be -+ a bidirectional character. */ -+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_utf8 (cur - 1); -+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); -+ } - } - - buffer->cur = cur; -@@ -1233,9 +1525,32 @@ skip_line_comment (cpp_reader *pfile) - { - cpp_buffer *buffer = pfile->buffer; - source_location orig_line = pfile->line_table->highest_line; -+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) -+ != bidirectional_none); - -- while (*buffer->cur != '\n') -- buffer->cur++; -+ if (!warn_bidi_p) -+ while (*buffer->cur != '\n') -+ buffer->cur++; -+ else -+ { -+ while (*buffer->cur != '\n' -+ && *buffer->cur != bidi::utf8_start) -+ buffer->cur++; -+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) -+ { -+ while (*buffer->cur != '\n') -+ { -+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) -+ { -+ bidi::kind kind = get_bidi_utf8 (buffer->cur); -+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, -+ /*ucn_p=*/false); -+ } -+ buffer->cur++; -+ } -+ maybe_warn_bidi_on_close (pfile, buffer->cur); -+ } -+ } - - _cpp_process_line_notes (pfile, true); - return orig_line != pfile->line_table->highest_line; -@@ -1315,11 +1630,14 @@ warn_about_normalization (cpp_reader *pfile, - - /* Returns TRUE if the sequence starting at buffer->cur is invalid in - an identifier. FIRST is TRUE if this starts an identifier. */ -+ - static bool - forms_identifier_p (cpp_reader *pfile, int first, - struct normalize_state *state) - { - cpp_buffer *buffer = pfile->buffer; -+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) -+ != bidirectional_none); - - if (*buffer->cur == '$') - { -@@ -1343,6 +1661,12 @@ forms_identifier_p (cpp_reader *pfile, int first, - { - cppchar_t s; - buffer->cur += 2; -+ if (warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_ucn (buffer->cur, -+ buffer->cur[-1] == 'U'); -+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, /*ucn_p=*/true); -+ } - if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, - state, &s, NULL, NULL)) - return true; -@@ -1450,6 +1774,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, - const uchar *cur; - unsigned int len; - unsigned int hash = HT_HASHSTEP (0, *base); -+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) -+ != bidirectional_none); - - cur = pfile->buffer->cur; - if (! starts_ucn) -@@ -1472,6 +1798,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, - pfile->buffer->cur++; - } - } while (forms_identifier_p (pfile, false, nst)); -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); - result = _cpp_interpret_identifier (pfile, base, - pfile->buffer->cur - base); - *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); -@@ -1673,6 +2001,8 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, - _cpp_buff *first_buff = NULL, *last_buff = NULL; - size_t raw_prefix_start; - _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; -+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) -+ != bidirectional_none); - - type = (*base == 'L' ? CPP_WSTRING : - *base == 'U' ? CPP_STRING32 : -@@ -1909,8 +2239,16 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, - cur = base = pfile->buffer->cur; - note = &pfile->buffer->notes[pfile->buffer->cur_note]; - } -+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) -+ && warn_bidi_p) -+ maybe_warn_bidi_on_char (pfile, cur - 1, -+ get_bidi_utf8 (cur - 1), -+ /*ucn_p=*/false); - } - -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur); -+ - if (CPP_OPTION (pfile, user_literals)) - { - /* If a string format macro, say from inttypes.h, is placed touching -@@ -2005,15 +2343,28 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) - else - terminator = '>', type = CPP_HEADER_NAME; - -+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional) -+ != bidirectional_none); - for (;;) - { - cppchar_t c = *cur++; - - /* In #include-style directives, terminators are not escapable. */ - if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') -- cur++; -+ { -+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); -+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); -+ } -+ cur++; -+ } - else if (c == terminator) -- break; -+ { -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur - 1); -+ break; -+ } - else if (c == '\n') - { - cur--; -@@ -2030,6 +2381,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) - } - else if (c == '\0') - saw_NUL = true; -+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_utf8 (cur - 1); -+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); -+ } - } - - if (saw_NUL && !pfile->state.skipping) diff --git a/SOURCES/gcc8-pch-tweaks.patch b/SOURCES/gcc8-pch-tweaks.patch new file mode 100644 index 0000000..6d25329 --- /dev/null +++ b/SOURCES/gcc8-pch-tweaks.patch @@ -0,0 +1,844 @@ +commit fe7c3ecff1f9c0520090a77fa824d8c5d9dbec12 +Author: Jakub Jelinek +Date: Fri Dec 3 11:03:30 2021 +0100 + + pch: Add support for PCH for relocatable executables [PR71934] + + So, if we want to make PCH work for PIEs, I'd say we can: + 1) add a new GTY option, say callback, which would act like + skip for non-PCH and for PCH would make us skip it but + remember for address bias translation + 2) drop the skip for tree_translation_unit_decl::language + 3) change get_unnamed_section to have const char * as + last argument instead of const void *, change + unnamed_section::data also to const char * and update + everything related to that + 4) maybe add a host hook whether it is ok to support binaries + changing addresses (the only thing I'm worried is if + some host that uses function descriptors allocates them + dynamically instead of having them somewhere in the + executable) + 5) maybe add a gengtype warning if it sees in GTY tracked + structure a function pointer without that new callback + option + + Here is 1), 2), 3) implemented. + + Note, on stdc++.h.gch/O2g.gch there are just those 10 relocations without + the second patch, with it a few more, but nothing huge. And for non-PIEs + there isn't really any extra work on the load side except freading two scalar + values and fseek. + + 2021-12-03 Jakub Jelinek + + PR pch/71934 + gcc/ + * ggc.h (gt_pch_note_callback): Declare. + * gengtype.h (enum typekind): Add TYPE_CALLBACK. + (callback_type): Declare. + * gengtype.c (dbgprint_count_type_at): Handle TYPE_CALLBACK. + (callback_type): New variable. + (process_gc_options): Add CALLBACK argument, handle callback + option. + (set_gc_used_type): Adjust process_gc_options caller, if callback, + set type to &callback_type. + (output_mangled_typename): Handle TYPE_CALLBACK. + (walk_type): Likewise. Handle callback option. + (write_types_process_field): Handle TYPE_CALLBACK. + (write_types_local_user_process_field): Likewise. + (write_types_local_process_field): Likewise. + (write_root): Likewise. + (dump_typekind): Likewise. + (dump_type): Likewise. + * gengtype-state.c (type_lineloc): Handle TYPE_CALLBACK. + (state_writer::write_state_callback_type): New method. + (state_writer::write_state_type): Handle TYPE_CALLBACK. + (read_state_callback_type): New function. + (read_state_type): Handle TYPE_CALLBACK. + * ggc-common.c (callback_vec): New variable. + (gt_pch_note_callback): New function. + (gt_pch_save): Stream out gt_pch_save function address and relocation + table. + (gt_pch_restore): Stream in saved gt_pch_save function address and + relocation table and apply relocations if needed. + * doc/gty.texi (callback): Document new GTY option. + * varasm.c (get_unnamed_section): Change callback argument's type and + last argument's type from const void * to const char *. + (output_section_asm_op): Change argument's type from const void * + to const char *, remove unnecessary cast. + * tree-core.h (struct tree_translation_unit_decl): Drop GTY((skip)) + from language member. + * output.h (unnamed_section_callback): Change argument type from + const void * to const char *. + (struct unnamed_section): Use GTY((callback)) instead of GTY((skip)) + for callback member. Change data member type from const void * + to const char *. + (struct noswitch_section): Use GTY((callback)) instead of GTY((skip)) + for callback member. + (get_unnamed_section): Change callback argument's type and + last argument's type from const void * to const char *. + (output_section_asm_op): Change argument's type from const void * + to const char *. + * config/avr/avr.c (avr_output_progmem_section_asm_op): Likewise. + Remove unneeded cast. + * config/darwin.c (output_objc_section_asm_op): Change argument's type + from const void * to const char *. + * config/pa/pa.c (som_output_text_section_asm_op): Likewise. + (som_output_comdat_data_section_asm_op): Likewise. + * config/rs6000/rs6000.c (rs6000_elf_output_toc_section_asm_op): + Likewise. + (rs6000_xcoff_output_readonly_section_asm_op): Likewise. Instead + of dereferencing directive hardcode variable names and decide based on + whether directive is NULL or not. + (rs6000_xcoff_output_readwrite_section_asm_op): Change argument's type + from const void * to const char *. + (rs6000_xcoff_output_tls_section_asm_op): Likewise. Instead + of dereferencing directive hardcode variable names and decide based on + whether directive is NULL or not. + (rs6000_xcoff_output_toc_section_asm_op): Change argument's type + from const void * to const char *. + (rs6000_xcoff_asm_init_sections): Adjust get_unnamed_section callers. + gcc/c-family/ + * c-pch.c (struct c_pch_validity): Remove pch_init member. + (pch_init): Don't initialize v.pch_init. + (c_common_valid_pch): Don't warn and punt if .text addresses change. + libcpp/ + * include/line-map.h (class line_maps): Add GTY((callback)) to + reallocator and round_alloc_size members. + +commit 4dc6d19222581c77a174d44d97507d234fb7e39b +Author: Jakub Jelinek +Date: Mon Dec 6 11:18:58 2021 +0100 + + avr: Fix AVR build [PR71934] + + On Mon, Dec 06, 2021 at 11:00:30AM +0100, Martin Liška wrote: + > Jakub, I think the patch broke avr-linux target: + > + > g++ -fno-PIE -c -g -DIN_GCC -DCROSS_DIRECTORY_STRUCTURE -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-erro + > /home/marxin/Programming/gcc/gcc/config/avr/avr.c: In function ‘void avr_output_data_section_asm_op(const void*)’: + > /home/marxin/Programming/gcc/gcc/config/avr/avr.c:10097:26: error: invalid conversion from ‘const void*’ to ‘const char*’ [-fpermissive] + + This patch fixes that. + + 2021-12-06 Jakub Jelinek + + PR pch/71934 + * config/avr/avr.c (avr_output_data_section_asm_op, + avr_output_bss_section_asm_op): Change argument type from const void * + to const char *. + +diff --git a/gcc/c-family/c-pch.c b/gcc/c-family/c-pch.c +index 5da60423354..2cafa1387bb 100644 +--- a/gcc/c-family/c-pch.c ++++ b/gcc/c-family/c-pch.c +@@ -58,7 +58,6 @@ struct c_pch_validity + { + unsigned char debug_info_type; + signed char match[MATCH_SIZE]; +- void (*pch_init) (void); + size_t target_data_length; + }; + +@@ -123,7 +122,6 @@ pch_init (void) + gcc_assert (v.match[i] == *pch_matching[i].flag_var); + } + } +- v.pch_init = &pch_init; + target_validity = targetm.get_pch_validity (&v.target_data_length); + + if (fwrite (partial_pch, IDENT_LENGTH, 1, f) != 1 +@@ -287,20 +285,6 @@ c_common_valid_pch (cpp_reader *pfile, c + } + } + +- /* If the text segment was not loaded at the same address as it was +- when the PCH file was created, function pointers loaded from the +- PCH will not be valid. We could in theory remap all the function +- pointers, but no support for that exists at present. +- Since we have the same executable, it should only be necessary to +- check one function. */ +- if (v.pch_init != &pch_init) +- { +- if (cpp_get_options (pfile)->warn_invalid_pch) +- cpp_error (pfile, CPP_DL_WARNING, +- "%s: had text segment at different address", name); +- return 2; +- } +- + /* Check the target-specific validity data. */ + { + void *this_file_data = xmalloc (v.target_data_length); +diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c +index 200701a583c..6ba038881d6 100644 +--- a/gcc/config/avr/avr.c ++++ b/gcc/config/avr/avr.c +@@ -10114,10 +10114,9 @@ avr_output_bss_section_asm_op (const void *data) + /* Unnamed section callback for progmem*.data sections. */ + + static void +-avr_output_progmem_section_asm_op (const void *data) ++avr_output_progmem_section_asm_op (const char *data) + { +- fprintf (asm_out_file, "\t.section\t%s,\"a\",@progbits\n", +- (const char*) data); ++ fprintf (asm_out_file, "\t.section\t%s,\"a\",@progbits\n", data); + } + + +diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c +index c5ba7927ce1..8ad5b26c980 100644 +--- a/gcc/config/darwin.c ++++ b/gcc/config/darwin.c +@@ -134,7 +134,7 @@ int emit_aligned_common = false; + DIRECTIVE is as for output_section_asm_op. */ + + static void +-output_objc_section_asm_op (const void *directive) ++output_objc_section_asm_op (const char *directive) + { + static bool been_here = false; + +diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c +index f22d25a4066..2b10ef34061 100644 +--- a/gcc/config/pa/pa.c ++++ b/gcc/config/pa/pa.c +@@ -10009,7 +10009,7 @@ pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) + to the default text subspace. */ + + static void +-som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) ++som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED) + { + gcc_assert (TARGET_SOM); + if (TARGET_GAS) +@@ -10053,7 +10053,7 @@ som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) + sections. This function is only used with SOM. */ + + static void +-som_output_comdat_data_section_asm_op (const void *data) ++som_output_comdat_data_section_asm_op (const char *data) + { + in_section = NULL; + output_section_asm_op (data); +diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c +index 945157b1c1a..34089743759 100644 +--- a/gcc/config/rs6000/rs6000.c ++++ b/gcc/config/rs6000/rs6000.c +@@ -20599,7 +20599,7 @@ rs6000_ms_bitfield_layout_p (const_tree record_type) + /* A get_unnamed_section callback, used for switching to toc_section. */ + + static void +-rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) ++rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED) + { + if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) + && TARGET_MINIMAL_TOC) +@@ -21303,35 +21303,39 @@ rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name) + points to the section string variable. */ + + static void +-rs6000_xcoff_output_readonly_section_asm_op (const void *directive) ++rs6000_xcoff_output_readonly_section_asm_op (const char *directive) + { + fprintf (asm_out_file, "\t.csect %s[RO],%s\n", +- *(const char *const *) directive, ++ directive ++ ? xcoff_private_rodata_section_name ++ : xcoff_read_only_section_name, + XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); + } + + /* Likewise for read-write sections. */ + + static void +-rs6000_xcoff_output_readwrite_section_asm_op (const void *directive) ++rs6000_xcoff_output_readwrite_section_asm_op (const char *) + { + fprintf (asm_out_file, "\t.csect %s[RW],%s\n", +- *(const char *const *) directive, ++ xcoff_private_data_section_name, + XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); + } + + static void +-rs6000_xcoff_output_tls_section_asm_op (const void *directive) ++rs6000_xcoff_output_tls_section_asm_op (const char *directive) + { + fprintf (asm_out_file, "\t.csect %s[TL],%s\n", +- *(const char *const *) directive, ++ directive ++ ? xcoff_private_data_section_name ++ : xcoff_tls_data_section_name, + XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); + } + + /* A get_unnamed_section callback, used for switching to toc_section. */ + + static void +-rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) ++rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED) + { + if (TARGET_MINIMAL_TOC) + { +@@ -21358,26 +21362,26 @@ rs6000_xcoff_asm_init_sections (void) + { + read_only_data_section + = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, +- &xcoff_read_only_section_name); ++ NULL); + + private_data_section + = get_unnamed_section (SECTION_WRITE, + rs6000_xcoff_output_readwrite_section_asm_op, +- &xcoff_private_data_section_name); ++ NULL); + + read_only_private_data_section + = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, +- &xcoff_private_rodata_section_name); ++ ""); + + tls_data_section + = get_unnamed_section (SECTION_TLS, + rs6000_xcoff_output_tls_section_asm_op, +- &xcoff_tls_data_section_name); ++ NULL); + + tls_private_data_section + = get_unnamed_section (SECTION_TLS, + rs6000_xcoff_output_tls_section_asm_op, +- &xcoff_private_data_section_name); ++ ""); + + toc_section + = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL); +diff --git a/gcc/doc/gty.texi b/gcc/doc/gty.texi +index b996ff2c44e..ca2c8404894 100644 +--- a/gcc/doc/gty.texi ++++ b/gcc/doc/gty.texi +@@ -205,6 +205,15 @@ If @code{skip} is applied to a field, the type machinery will ignore it. + This is somewhat dangerous; the only safe use is in a union when one + field really isn't ever used. + ++@findex callback ++@item callback ++ ++@code{callback} should be applied to fields with pointer to function type ++and causes the field to be ignored similarly to @code{skip}, except when ++writing PCH and the field is non-NULL it will remember the field's address ++for relocation purposes if the process writing PCH has different load base ++from a process reading PCH. ++ + @findex for_user + @item for_user + +diff --git a/gcc/gengtype-state.c b/gcc/gengtype-state.c +index ac9d536963f..36a96e84574 100644 +--- a/gcc/gengtype-state.c ++++ b/gcc/gengtype-state.c +@@ -57,6 +57,7 @@ type_lineloc (const_type_p ty) + case TYPE_STRING: + case TYPE_POINTER: + case TYPE_ARRAY: ++ case TYPE_CALLBACK: + return NULL; + default: + gcc_unreachable (); +@@ -171,6 +172,7 @@ private: + void write_state_version (const char *version); + void write_state_scalar_type (type_p current); + void write_state_string_type (type_p current); ++ void write_state_callback_type (type_p current); + void write_state_undefined_type (type_p current); + void write_state_struct_union_type (type_p current, const char *kindstr); + void write_state_struct_type (type_p current); +@@ -898,6 +900,20 @@ state_writer::write_state_string_type (type_p current) + fatal ("Unexpected type in write_state_string_type"); + } + ++/* Write the callback type. There is only one such thing! */ ++void ++state_writer::write_state_callback_type (type_p current) ++{ ++ if (current == &callback_type) ++ { ++ write_any_indent (0); ++ fprintf (state_file, "callback "); ++ write_state_common_type_content (current); ++ } ++ else ++ fatal ("Unexpected type in write_state_callback_type"); ++} ++ + /* Write an undefined type. */ + void + state_writer::write_state_undefined_type (type_p current) +@@ -1143,6 +1159,9 @@ state_writer::write_state_type (type_p current) + case TYPE_STRING: + write_state_string_type (current); + break; ++ case TYPE_CALLBACK: ++ write_state_callback_type (current); ++ break; + } + } + +@@ -1477,6 +1496,14 @@ read_state_string_type (type_p *type) + read_state_common_type_content (*type); + } + ++/* Read the callback_type. */ ++static void ++read_state_callback_type (type_p *type) ++{ ++ *type = &callback_type; ++ read_state_common_type_content (*type); ++} ++ + + /* Read a lang_bitmap representing a set of GCC front-end languages. */ + static void +@@ -1834,6 +1861,11 @@ read_state_type (type_p *current) + next_state_tokens (1); + read_state_string_type (current); + } ++ else if (state_token_is_name (t0, "callback")) ++ { ++ next_state_tokens (1); ++ read_state_callback_type (current); ++ } + else if (state_token_is_name (t0, "undefined")) + { + *current = XCNEW (struct type); +diff --git a/gcc/gengtype.c b/gcc/gengtype.c +index a77cfd92bfa..b9daaa43689 100644 +--- a/gcc/gengtype.c ++++ b/gcc/gengtype.c +@@ -172,6 +172,7 @@ dbgprint_count_type_at (const char *fil, int lin, const char *msg, type_p t) + int nb_struct = 0, nb_union = 0, nb_array = 0, nb_pointer = 0; + int nb_lang_struct = 0; + int nb_user_struct = 0, nb_undefined = 0; ++ int nb_callback = 0; + type_p p = NULL; + for (p = t; p; p = p->next) + { +@@ -202,6 +203,9 @@ dbgprint_count_type_at (const char *fil, int lin, const char *msg, type_p t) + case TYPE_ARRAY: + nb_array++; + break; ++ case TYPE_CALLBACK: ++ nb_callback++; ++ break; + case TYPE_LANG_STRUCT: + nb_lang_struct++; + break; +@@ -217,6 +221,8 @@ dbgprint_count_type_at (const char *fil, int lin, const char *msg, type_p t) + fprintf (stderr, "@@%%@@ %d structs, %d unions\n", nb_struct, nb_union); + if (nb_pointer > 0 || nb_array > 0) + fprintf (stderr, "@@%%@@ %d pointers, %d arrays\n", nb_pointer, nb_array); ++ if (nb_callback > 0) ++ fprintf (stderr, "@@%%@@ %d callbacks\n", nb_callback); + if (nb_lang_struct > 0) + fprintf (stderr, "@@%%@@ %d lang_structs\n", nb_lang_struct); + if (nb_user_struct > 0) +@@ -495,6 +501,10 @@ struct type scalar_char = { + TYPE_SCALAR, 0, 0, 0, GC_USED, {0} + }; + ++struct type callback_type = { ++ TYPE_CALLBACK, 0, 0, 0, GC_USED, {0} ++}; ++ + /* Lists of various things. */ + + pair_p typedefs = NULL; +@@ -1464,7 +1474,7 @@ static void set_gc_used (pair_p); + + static void + process_gc_options (options_p opt, enum gc_used_enum level, int *maybe_undef, +- int *length, int *skip, type_p *nested_ptr) ++ int *length, int *skip, int *callback, type_p *nested_ptr) + { + options_p o; + for (o = opt; o; o = o->next) +@@ -1478,6 +1488,8 @@ process_gc_options (options_p opt, enum gc_used_enum level, int *maybe_undef, + *length = 1; + else if (strcmp (o->name, "skip") == 0) + *skip = 1; ++ else if (strcmp (o->name, "callback") == 0) ++ *callback = 1; + else if (strcmp (o->name, "nested_ptr") == 0 + && o->kind == OPTION_NESTED) + *nested_ptr = ((const struct nested_ptr_data *) o->info.nested)->type; +@@ -1526,7 +1538,7 @@ set_gc_used_type (type_p t, enum gc_used_enum level, + type_p dummy2; + bool allow_undefined_field_types = (t->kind == TYPE_USER_STRUCT); + +- process_gc_options (t->u.s.opt, level, &dummy, &dummy, &dummy, ++ process_gc_options (t->u.s.opt, level, &dummy, &dummy, &dummy, &dummy, + &dummy2); + + if (t->u.s.base_class) +@@ -1542,9 +1554,10 @@ set_gc_used_type (type_p t, enum gc_used_enum level, + int maybe_undef = 0; + int length = 0; + int skip = 0; ++ int callback = 0; + type_p nested_ptr = NULL; + process_gc_options (f->opt, level, &maybe_undef, &length, &skip, +- &nested_ptr); ++ &callback, &nested_ptr); + + if (nested_ptr && f->type->kind == TYPE_POINTER) + set_gc_used_type (nested_ptr, GC_POINTED_TO); +@@ -1554,6 +1567,8 @@ set_gc_used_type (type_p t, enum gc_used_enum level, + set_gc_used_type (f->type->u.p, GC_MAYBE_POINTED_TO); + else if (skip) + ; /* target type is not used through this field */ ++ else if (callback) ++ f->type = &callback_type; + else + set_gc_used_type (f->type, GC_USED, allow_undefined_field_types); + } +@@ -2519,6 +2534,7 @@ output_mangled_typename (outf_p of, const_type_p t) + { + case TYPE_NONE: + case TYPE_UNDEFINED: ++ case TYPE_CALLBACK: + gcc_unreachable (); + break; + case TYPE_POINTER: +@@ -2719,6 +2735,8 @@ walk_type (type_p t, struct walk_type_data *d) + ; + else if (strcmp (oo->name, "for_user") == 0) + ; ++ else if (strcmp (oo->name, "callback") == 0) ++ ; + else + error_at_line (d->line, "unknown option `%s'\n", oo->name); + +@@ -2744,6 +2762,7 @@ walk_type (type_p t, struct walk_type_data *d) + { + case TYPE_SCALAR: + case TYPE_STRING: ++ case TYPE_CALLBACK: + d->process_field (t, d); + break; + +@@ -3275,6 +3294,7 @@ write_types_process_field (type_p f, const struct walk_type_data *d) + break; + + case TYPE_SCALAR: ++ case TYPE_CALLBACK: + break; + + case TYPE_ARRAY: +@@ -3820,6 +3840,7 @@ write_types_local_user_process_field (type_p f, const struct walk_type_data *d) + break; + + case TYPE_SCALAR: ++ case TYPE_CALLBACK: + break; + + case TYPE_ARRAY: +@@ -3906,6 +3927,13 @@ write_types_local_process_field (type_p f, const struct walk_type_data *d) + case TYPE_SCALAR: + break; + ++ case TYPE_CALLBACK: ++ oprintf (d->of, "%*sif ((void *)(%s) == this_obj)\n", d->indent, "", ++ d->prev_val[3]); ++ oprintf (d->of, "%*s gt_pch_note_callback (&(%s), this_obj);\n", ++ d->indent, "", d->val); ++ break; ++ + case TYPE_ARRAY: + case TYPE_NONE: + case TYPE_UNDEFINED: +@@ -4434,6 +4462,7 @@ write_root (outf_p f, pair_p v, type_p type, const char *name, int has_length, + case TYPE_UNDEFINED: + case TYPE_UNION: + case TYPE_LANG_STRUCT: ++ case TYPE_CALLBACK: + error_at_line (line, "global `%s' is unimplemented type", name); + } + } +@@ -4728,6 +4757,9 @@ dump_typekind (int indent, enum typekind kind) + case TYPE_ARRAY: + printf ("TYPE_ARRAY"); + break; ++ case TYPE_CALLBACK: ++ printf ("TYPE_CALLBACK"); ++ break; + case TYPE_LANG_STRUCT: + printf ("TYPE_LANG_STRUCT"); + break; +@@ -4894,6 +4926,7 @@ dump_type (int indent, type_p t) + t->u.scalar_is_char ? "true" : "false"); + break; + case TYPE_STRING: ++ case TYPE_CALLBACK: + break; + case TYPE_STRUCT: + case TYPE_UNION: +diff --git a/gcc/gengtype.h b/gcc/gengtype.h +index 8a7a54957ea..8fa7064ca85 100644 +--- a/gcc/gengtype.h ++++ b/gcc/gengtype.h +@@ -154,6 +154,9 @@ enum typekind { + TYPE_UNION, /* Type for GTY-ed discriminated unions. */ + TYPE_POINTER, /* Pointer type to GTY-ed type. */ + TYPE_ARRAY, /* Array of GTY-ed types. */ ++ TYPE_CALLBACK, /* A function pointer that needs relocation if ++ the executable has been loaded at a different ++ address. */ + TYPE_LANG_STRUCT, /* GCC front-end language specific structs. + Various languages may have homonymous but + different structs. */ +@@ -331,6 +334,9 @@ extern struct type string_type; + extern struct type scalar_nonchar; + extern struct type scalar_char; + ++/* The one and only TYPE_CALLBACK. */ ++extern struct type callback_type; ++ + /* Test if a type is a union, either a plain one or a language + specific one. */ + #define UNION_P(x) \ +diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c +index b6abed1d9a2..7c998e95473 100644 +--- a/gcc/ggc-common.c ++++ b/gcc/ggc-common.c +@@ -256,6 +256,7 @@ saving_hasher::equal (const ptr_data *p1 + } + + static hash_table *saving_htab; ++static vec callback_vec; + + /* Register an object in the hash table. */ + +@@ -288,6 +289,23 @@ gt_pch_note_object (void *obj, void *not + return 1; + } + ++/* Register address of a callback pointer. */ ++void ++gt_pch_note_callback (void *obj, void *base) ++{ ++ void *ptr; ++ memcpy (&ptr, obj, sizeof (void *)); ++ if (ptr != NULL) ++ { ++ struct ptr_data *data ++ = (struct ptr_data *) ++ saving_htab->find_with_hash (base, POINTER_HASH (base)); ++ gcc_assert (data); ++ callback_vec.safe_push ((char *) data->new_addr ++ + ((char *) obj - (char *) base)); ++ } ++} ++ + /* Register an object in the hash table. */ + + void +@@ -582,10 +600,20 @@ gt_pch_save (FILE *f) + ggc_pch_finish (state.d, state.f); + gt_pch_fixup_stringpool (); + ++ unsigned num_callbacks = callback_vec.length (); ++ void (*pch_save) (FILE *) = >_pch_save; ++ if (fwrite (&pch_save, sizeof (pch_save), 1, f) != 1 ++ || fwrite (&num_callbacks, sizeof (num_callbacks), 1, f) != 1 ++ || (num_callbacks ++ && fwrite (callback_vec.address (), sizeof (void *), num_callbacks, ++ f) != num_callbacks)) ++ fatal_error (input_location, "cannot write PCH file: %m"); ++ + XDELETE (state.ptrs); + XDELETE (this_object); + delete saving_htab; + saving_htab = NULL; ++ callback_vec.release (); + } + + /* Read the state of the compiler back in from F. */ +@@ -639,6 +667,30 @@ gt_pch_restore (FILE *f) + ggc_pch_read (f, mmi.preferred_base); + + gt_pch_restore_stringpool (); ++ ++ void (*pch_save) (FILE *); ++ unsigned num_callbacks; ++ if (fread (&pch_save, sizeof (pch_save), 1, f) != 1 ++ || fread (&num_callbacks, sizeof (num_callbacks), 1, f) != 1) ++ fatal_error (input_location, "cannot read PCH file: %m"); ++ if (pch_save != >_pch_save) ++ { ++ uintptr_t bias = (uintptr_t) >_pch_save - (uintptr_t) pch_save; ++ void **ptrs = XNEWVEC (void *, num_callbacks); ++ unsigned i; ++ ++ if (fread (ptrs, sizeof (void *), num_callbacks, f) != num_callbacks) ++ fatal_error (input_location, "cannot read PCH file: %m"); ++ for (i = 0; i < num_callbacks; ++i) ++ { ++ memcpy (&pch_save, ptrs[i], sizeof (pch_save)); ++ pch_save = (void (*) (FILE *)) ((uintptr_t) pch_save + bias); ++ memcpy (ptrs[i], &pch_save, sizeof (pch_save)); ++ } ++ XDELETE (ptrs); ++ } ++ else if (fseek (f, num_callbacks * sizeof (void *), SEEK_CUR) != 0) ++ fatal_error (input_location, "cannot read PCH file: %m"); + } + + /* Default version of HOST_HOOKS_GT_PCH_GET_ADDRESS when mmap is not present. +diff --git a/gcc/ggc.h b/gcc/ggc.h +index 5e921d957fd..c005f7e0412 100644 +--- a/gcc/ggc.h ++++ b/gcc/ggc.h +@@ -46,6 +46,10 @@ typedef void (*gt_handle_reorder) (void *, void *, gt_pointer_operator, + /* Used by the gt_pch_n_* routines. Register an object in the hash table. */ + extern int gt_pch_note_object (void *, void *, gt_note_pointers); + ++/* Used by the gt_pch_p_* routines. Register address of a callback ++ pointer. */ ++extern void gt_pch_note_callback (void *, void *); ++ + /* Used by the gt_pch_n_* routines. Register that an object has a reorder + function. */ + extern void gt_pch_note_reorder (void *, void *, gt_handle_reorder); +diff --git a/gcc/output.h b/gcc/output.h +index 8f6f15308f4..4a23795bf7e 100644 +--- a/gcc/output.h ++++ b/gcc/output.h +@@ -456,7 +456,7 @@ struct GTY(()) named_section { + + /* A callback that writes the assembly code for switching to an unnamed + section. The argument provides callback-specific data. */ +-typedef void (*unnamed_section_callback) (const void *); ++typedef void (*unnamed_section_callback) (const char *); + + /* Information about a SECTION_UNNAMED section. */ + struct GTY(()) unnamed_section { +@@ -464,8 +464,8 @@ struct GTY(()) unnamed_section { + + /* The callback used to switch to the section, and the data that + should be passed to the callback. */ +- unnamed_section_callback GTY ((skip)) callback; +- const void *GTY ((skip)) data; ++ unnamed_section_callback GTY ((callback)) callback; ++ const char *data; + + /* The next entry in the chain of unnamed sections. */ + section *next; +@@ -489,7 +489,7 @@ struct GTY(()) noswitch_section { + struct section_common common; + + /* The callback used to assemble decls in this section. */ +- noswitch_section_callback GTY ((skip)) callback; ++ noswitch_section_callback GTY ((callback)) callback; + }; + + /* Information about a section, which may be named or unnamed. */ +@@ -524,8 +524,8 @@ extern GTY(()) section *bss_noswitch_sec + extern GTY(()) section *in_section; + extern GTY(()) bool in_cold_section_p; + +-extern section *get_unnamed_section (unsigned int, void (*) (const void *), +- const void *); ++extern section *get_unnamed_section (unsigned int, void (*) (const char *), ++ const char *); + extern section *get_section (const char *, unsigned int, tree); + extern section *get_named_section (tree, const char *, int); + extern section *get_variable_section (tree, bool); +@@ -546,7 +546,7 @@ extern section *get_cdtor_priority_secti + + extern bool unlikely_text_section_p (section *); + extern void switch_to_section (section *); +-extern void output_section_asm_op (const void *); ++extern void output_section_asm_op (const char *); + + extern void record_tm_clone_pair (tree, tree); + extern void finish_tm_clone_pairs (void); +diff --git a/gcc/tree-core.h b/gcc/tree-core.h +index 8ab119dc9a2..91ae5237d7e 100644 +--- a/gcc/tree-core.h ++++ b/gcc/tree-core.h +@@ -1961,7 +1961,7 @@ struct GTY(()) tree_function_decl { + struct GTY(()) tree_translation_unit_decl { + struct tree_decl_common common; + /* Source language of this translation unit. Used for DWARF output. */ +- const char * GTY((skip(""))) language; ++ const char *language; + /* TODO: Non-optimization used to build this translation unit. */ + /* TODO: Root of a partial DWARF tree for global types and decls. */ + }; +diff --git a/gcc/varasm.c b/gcc/varasm.c +index 9315e2c6936..aff93ca5de9 100644 +--- a/gcc/varasm.c ++++ b/gcc/varasm.c +@@ -250,8 +250,8 @@ object_block_hasher::hash (object_block *old) + /* Return a new unnamed section with the given fields. */ + + section * +-get_unnamed_section (unsigned int flags, void (*callback) (const void *), +- const void *data) ++get_unnamed_section (unsigned int flags, void (*callback) (const char *), ++ const char *data) + { + section *sect; + +@@ -7778,9 +7778,9 @@ file_end_indicate_split_stack (void) + a get_unnamed_section callback. */ + + void +-output_section_asm_op (const void *directive) ++output_section_asm_op (const char *directive) + { +- fprintf (asm_out_file, "%s\n", (const char *) directive); ++ fprintf (asm_out_file, "%s\n", directive); + } + + /* Emit assembly code to switch to section NEW_SECTION. Do nothing if +diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h +index 8b5e2f82982..bc40e333579 100644 +--- a/libcpp/include/line-map.h ++++ b/libcpp/include/line-map.h +@@ -758,11 +758,11 @@ struct GTY(()) line_maps { + + /* If non-null, the allocator to use when resizing 'maps'. If null, + xrealloc is used. */ +- line_map_realloc reallocator; ++ line_map_realloc GTY((callback)) reallocator; + + /* The allocators' function used to know the actual size it + allocated, for a certain allocation size requested. */ +- line_map_round_alloc_size_func round_alloc_size; ++ line_map_round_alloc_size_func GTY((callback)) round_alloc_size; + + struct location_adhoc_data_map location_adhoc_data_map; + +diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c +index 6ba038881d6..1c2f7d564e7 100644 +--- a/gcc/config/avr/avr.c ++++ b/gcc/config/avr/avr.c +@@ -10089,7 +10089,7 @@ avr_asm_asm_output_aligned_bss (FILE *file, tree decl, const char *name, + to track need of __do_copy_data. */ + + static void +-avr_output_data_section_asm_op (const void *data) ++avr_output_data_section_asm_op (const char *data) + { + avr_need_copy_data_p = true; + +@@ -10102,7 +10102,7 @@ avr_output_data_section_asm_op (const void *data) + to track need of __do_clear_bss. */ + + static void +-avr_output_bss_section_asm_op (const void *data) ++avr_output_bss_section_asm_op (const char *data) + { + avr_need_clear_bss_p = true; + diff --git a/SOURCES/gcc8-pr96796.patch b/SOURCES/gcc8-pr96796.patch new file mode 100644 index 0000000..46d1d2c --- /dev/null +++ b/SOURCES/gcc8-pr96796.patch @@ -0,0 +1,254 @@ +commit 6001db79c477b03eacc7e7049560921fb54b7845 +Author: Richard Sandiford +Date: Mon Sep 7 20:15:36 2020 +0100 + + lra: Avoid cycling on certain subreg reloads [PR96796] + + This PR is about LRA cycling for a reload of the form: + + ---------------------------------------------------------------------------- + Changing pseudo 196 in operand 1 of insn 103 on equiv [r105:DI*0x8+r140:DI] + Creating newreg=287, assigning class ALL_REGS to slow/invalid mem r287 + Creating newreg=288, assigning class ALL_REGS to slow/invalid mem r288 + 103: r203:SI=r288:SI<<0x1+r196:DI#0 + REG_DEAD r196:DI + Inserting slow/invalid mem reload before: + 316: r287:DI=[r105:DI*0x8+r140:DI] + 317: r288:SI=r287:DI#0 + ---------------------------------------------------------------------------- + + The problem is with r287. We rightly give it a broad starting class of + POINTER_AND_FP_REGS (reduced from ALL_REGS by preferred_reload_class). + However, we never make forward progress towards narrowing it down to + a specific choice of class (POINTER_REGS or FP_REGS). + + I think in practice we rely on two things to narrow a reload pseudo's + class down to a specific choice: + + (1) a restricted class is specified when the pseudo is created + + This happens for input address reloads, where the class is taken + from the target's chosen base register class. It also happens + for simple REG reloads, where the class is taken from the chosen + alternative's constraints. + + (2) uses of the reload pseudo as a direct input operand + + In this case get_reload_reg tries to reuse the existing register + and narrow its class, instead of creating a new reload pseudo. + + However, neither occurs here. As described above, r287 rightly + starts out with a wide choice of class, ultimately derived from + ALL_REGS, so we don't get (1). And as the comments in the PR + explain, r287 is never used as an input reload, only the subreg is, + so we don't get (2): + + ---------------------------------------------------------------------------- + Choosing alt 13 in insn 317: (0) r (1) w {*movsi_aarch64} + Creating newreg=291, assigning class FP_REGS to r291 + 317: r288:SI=r291:SI + Inserting insn reload before: + 320: r291:SI=r287:DI#0 + ---------------------------------------------------------------------------- + + IMO, in this case we should rely on the reload of r316 to narrow + down the class of r278. Currently we do: + + ---------------------------------------------------------------------------- + Choosing alt 7 in insn 316: (0) r (1) m {*movdi_aarch64} + Creating newreg=289 from oldreg=287, assigning class GENERAL_REGS to r289 + 316: r289:DI=[r105:DI*0x8+r140:DI] + Inserting insn reload after: + 318: r287:DI=r289:DI + --------------------------------------------------- + + i.e. we create a new pseudo register r289 and give *that* pseudo + GENERAL_REGS instead. This is because get_reload_reg only narrows + down the existing class for OP_IN and OP_INOUT, not OP_OUT. + + But if we have a reload pseudo in a reload instruction and have chosen + a specific class for the reload pseudo, I think we should simply install + it for OP_OUT reloads too, if the class is a subset of the existing class. + We will need to pick such a register whatever happens (for r289 in the + example above). And as explained in the PR, doing this actually avoids + an unnecessary move via the FP registers too. + + The patch is quite aggressive in that it does this for all reload + pseudos in all reload instructions. I wondered about reusing the + condition for a reload move in in_class_p: + + INSN_UID (curr_insn) >= new_insn_uid_start + && curr_insn_set != NULL + && ((OBJECT_P (SET_SRC (curr_insn_set)) + && ! CONSTANT_P (SET_SRC (curr_insn_set))) + || (GET_CODE (SET_SRC (curr_insn_set)) == SUBREG + && OBJECT_P (SUBREG_REG (SET_SRC (curr_insn_set))) + && ! CONSTANT_P (SUBREG_REG (SET_SRC (curr_insn_set))))))) + + but I can't really justify that on first principles. I think we + should apply the rule consistently until we have a specific reason + for doing otherwise. + + gcc/ + PR rtl-optimization/96796 + * lra-constraints.c (in_class_p): Add a default-false + allow_all_reload_class_changes_p parameter. Do not treat + reload moves specially when the parameter is true. + (get_reload_reg): Try to narrow the class of an existing OP_OUT + reload if we're reloading a reload pseudo in a reload instruction. + + gcc/testsuite/ + PR rtl-optimization/96796 + * gcc.c-torture/compile/pr96796.c: New test. + +diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c +index 580da9c3ed6..161b721efb1 100644 +--- a/gcc/lra-constraints.c ++++ b/gcc/lra-constraints.c +@@ -236,12 +236,17 @@ get_reg_class (int regno) + CL. Use elimination first if REG is a hard register. If REG is a + reload pseudo created by this constraints pass, assume that it will + be allocated a hard register from its allocno class, but allow that +- class to be narrowed to CL if it is currently a superset of CL. ++ class to be narrowed to CL if it is currently a superset of CL and ++ if either: ++ ++ - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or ++ - the instruction we're processing is not a reload move. + + If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of + REGNO (reg), or NO_REGS if no change in its class was needed. */ + static bool +-in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class) ++in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class, ++ bool allow_all_reload_class_changes_p = false) + { + enum reg_class rclass, common_class; + machine_mode reg_mode; +@@ -266,7 +271,8 @@ in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class) + typically moves that have many alternatives, and restricting + reload pseudos for one alternative may lead to situations + where other reload pseudos are no longer allocatable. */ +- || (INSN_UID (curr_insn) >= new_insn_uid_start ++ || (!allow_all_reload_class_changes_p ++ && INSN_UID (curr_insn) >= new_insn_uid_start + && curr_insn_set != NULL + && ((OBJECT_P (SET_SRC (curr_insn_set)) + && ! CONSTANT_P (SET_SRC (curr_insn_set))) +@@ -551,13 +557,12 @@ init_curr_insn_input_reloads (void) + curr_insn_input_reloads_num = 0; + } + +-/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already +- created input reload pseudo (only if TYPE is not OP_OUT). Don't +- reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be +- wrapped up in SUBREG. The result pseudo is returned through +- RESULT_REG. Return TRUE if we created a new pseudo, FALSE if we +- reused the already created input reload pseudo. Use TITLE to +- describe new registers for debug purposes. */ ++/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing ++ reload pseudo. Don't reuse an existing reload pseudo if IN_SUBREG_P ++ is true and the reused pseudo should be wrapped up in a SUBREG. ++ The result pseudo is returned through RESULT_REG. Return TRUE if we ++ created a new pseudo, FALSE if we reused an existing reload pseudo. ++ Use TITLE to describe new registers for debug purposes. */ + static bool + get_reload_reg (enum op_type type, machine_mode mode, rtx original, + enum reg_class rclass, bool in_subreg_p, +@@ -616,6 +621,35 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, + + if (type == OP_OUT) + { ++ /* Output reload registers tend to start out with a conservative ++ choice of register class. Usually this is ALL_REGS, although ++ a target might narrow it (for performance reasons) through ++ targetm.preferred_reload_class. It's therefore quite common ++ for a reload instruction to require a more restrictive class ++ than the class that was originally assigned to the reload register. ++ ++ In these situations, it's more efficient to refine the choice ++ of register class rather than create a second reload register. ++ This also helps to avoid cycling for registers that are only ++ used by reload instructions. */ ++ if (REG_P (original) ++ && (int) REGNO (original) >= new_regno_start ++ && INSN_UID (curr_insn) >= new_insn_uid_start ++ && in_class_p (original, rclass, &new_class, true)) ++ { ++ unsigned int regno = REGNO (original); ++ if (lra_dump_file != NULL) ++ { ++ fprintf (lra_dump_file, " Reuse r%d for output ", regno); ++ dump_value_slim (lra_dump_file, original, 1); ++ } ++ if (new_class != lra_get_allocno_class (regno)) ++ lra_change_class (regno, new_class, ", change to", false); ++ if (lra_dump_file != NULL) ++ fprintf (lra_dump_file, "\n"); ++ *result_reg = original; ++ return false; ++ } + *result_reg + = lra_create_new_reg_with_unique_value (mode, original, rclass, title); + return true; +diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96796.c b/gcc/testsuite/gcc.c-torture/compile/pr96796.c +new file mode 100644 +index 00000000000..8808e62fe77 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c +@@ -0,0 +1,55 @@ ++/* { dg-additional-options "-fcommon" } */ ++ ++struct S0 { ++ signed f0 : 8; ++ unsigned f1; ++ unsigned f4; ++}; ++struct S1 { ++ long f3; ++ char f4; ++} g_3_4; ++ ++int g_5, func_1_l_32, func_50___trans_tmp_31; ++static struct S0 g_144, g_834, g_1255, g_1261; ++ ++int g_273[120] = {}; ++int *g_555; ++char **g_979; ++static int g_1092_0; ++static int g_1193; ++int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; } ++static struct S0 *func_50(); ++int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); } ++void safe_div_func_int64_t_s_s(int *); ++void safe_mod_func_uint32_t_u_u(struct S0); ++struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54, ++ int p_55) { ++ int __trans_tmp_30; ++ char __trans_tmp_22; ++ short __trans_tmp_19; ++ long l_985_1; ++ long l_1191[8]; ++ safe_div_func_int64_t_s_s(g_273); ++ __builtin_printf((char*)g_1261.f4); ++ safe_mod_func_uint32_t_u_u(g_834); ++ g_144.f0 += 1; ++ for (;;) { ++ struct S1 l_1350 = {&l_1350}; ++ for (; p_53.f3; p_53.f3 -= 1) ++ for (; g_1193 <= 2; g_1193 += 1) { ++ __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3], ++ p_55 % (**g_979 = 10)); ++ __trans_tmp_22 = g_1255.f1 * p_53.f4; ++ __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22; ++ if (__trans_tmp_30) ++ g_1261.f0 = p_51; ++ else { ++ g_1255.f0 = p_53.f3; ++ int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51; ++ g_555 = ~0; ++ g_1092_0 |= func_50___trans_tmp_31; ++ } ++ } ++ } ++} diff --git a/SOURCES/gcc8-rh2028609.patch b/SOURCES/gcc8-rh2028609.patch new file mode 100644 index 0000000..379153b --- /dev/null +++ b/SOURCES/gcc8-rh2028609.patch @@ -0,0 +1,101 @@ +The cprop_hardreg pass is built around the assumption that accessing a +register in a narrower mode is the same as accessing the lowpart of +the register. This unfortunately is not true for vector registers on +IBM Z. This caused a miscompile of LLVM with GCC 8.5. The problem +could not be reproduced with upstream GCC unfortunately but we have to +assume that it is latent there. The right fix would require +substantial changes to the cprop pass and is certainly something we +would want for our platform. But since this would not be acceptable +for older GCCs I'll go with what Vladimir proposed in the RedHat BZ +and introduce a hopefully temporary and undocumented target hook to +disable that specific transformation in regcprop.c. + +--- a/gcc/config/s390/s390.c ++++ b/gcc/config/s390/s390.c +@@ -10488,6 +10488,18 @@ s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode) + return false; + } + ++/* Implement TARGET_NARROW_MODE_REFERS_LOW_PART_P. */ ++ ++static bool ++s390_narrow_mode_refers_low_part_p (unsigned int regno) ++{ ++ if (reg_classes_intersect_p (VEC_REGS, REGNO_REG_CLASS (regno))) ++ return false; ++ ++ return true; ++} ++ ++ + /* Implement TARGET_MODES_TIEABLE_P. */ + + static bool +@@ -16956,6 +16968,9 @@ s390_case_values_threshold (void) + #undef TARGET_CASE_VALUES_THRESHOLD + #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold + ++#undef TARGET_NARROW_MODE_REFERS_LOW_PART_P ++#define TARGET_NARROW_MODE_REFERS_LOW_PART_P s390_narrow_mode_refers_low_part_p ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-s390.h" +--- a/gcc/regcprop.c ++++ b/gcc/regcprop.c +@@ -426,7 +426,8 @@ maybe_mode_change (machine_mode orig_mode, machine_mode copy_mode, + + if (orig_mode == new_mode) + return gen_raw_REG (new_mode, regno); +- else if (mode_change_ok (orig_mode, new_mode, regno)) ++ else if (mode_change_ok (orig_mode, new_mode, regno) ++ && targetm.narrow_mode_refers_low_part_p (copy_regno)) + { + int copy_nregs = hard_regno_nregs (copy_regno, copy_mode); + int use_nregs = hard_regno_nregs (copy_regno, new_mode); +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -5446,6 +5446,16 @@ value that the middle-end intended.", + bool, (machine_mode from, machine_mode to, reg_class_t rclass), + hook_bool_mode_mode_reg_class_t_true) + ++/* This hook is used to work around a problem in regcprop. Hardcoded ++assumptions currently prevent it from working correctly for targets ++where the low part of a multi-word register doesn't align to accessing ++the register with a narrower mode. */ ++DEFHOOK_UNDOC ++(narrow_mode_refers_low_part_p, ++"", ++bool, (unsigned int regno), ++hook_bool_uint_true) ++ + /* Change pseudo allocno class calculated by IRA. */ + DEFHOOK + (ira_change_pseudo_allocno_class, +--- a/gcc/hooks.h ++++ b/gcc/hooks.h +@@ -86,6 +86,7 @@ extern void hook_void_tree (tree); + extern void hook_void_tree_treeptr (tree, tree *); + extern void hook_void_int_int (int, int); + extern void hook_void_gcc_optionsp (struct gcc_options *); ++extern bool hook_bool_uint_true (unsigned int); + extern bool hook_bool_uint_uintp_false (unsigned int, unsigned int *); + + extern int hook_int_uint_mode_1 (unsigned int, machine_mode); +--- a/gcc/hooks.c ++++ b/gcc/hooks.c +@@ -498,6 +498,14 @@ hook_void_gcc_optionsp (struct gcc_optio + { + } + ++/* Generic hook that takes an unsigned int and returns true. */ ++ ++bool ++hook_bool_uint_true (unsigned int) ++{ ++ return true; ++} ++ + /* Generic hook that takes an unsigned int, an unsigned int pointer and + returns false. */ + diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec index ea4f09c..e029f89 100644 --- a/SPECS/gcc.spec +++ b/SPECS/gcc.spec @@ -4,7 +4,7 @@ %global gcc_major 8 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 4 +%global gcc_release 10 %global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f %global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24 %global _unpackaged_files_terminate_build 0 @@ -279,7 +279,11 @@ Patch18: gcc8-remove-old-demangle.patch Patch19: gcc8-rh1960701.patch Patch20: gcc8-pr100797.patch Patch21: gcc8-rh1981822.patch -Patch22: gcc8-add-Wbidirectional.patch +Patch22: gcc8-Wbidi-chars.patch +Patch23: gcc8-pr96796.patch +Patch24: gcc8-pch-tweaks.patch +Patch25: gcc8-aarch64-mtune-neoverse-512tvb.patch +Patch26: gcc8-rh2028609.patch Patch30: gcc8-rh1668903-1.patch Patch31: gcc8-rh1668903-2.patch @@ -861,6 +865,10 @@ to NVidia PTX capable devices if available. %patch20 -p0 -b .pr100797~ %patch21 -p0 -b .rh1981822~ %patch22 -p1 -b .bidi~ +%patch23 -p1 -b .pr96796~ +%patch24 -p1 -b .pch-tweaks~ +%patch25 -p1 -b .neoverse~ +%patch26 -p1 -b .rh2028609~ %patch30 -p0 -b .rh1668903-1~ %patch31 -p0 -b .rh1668903-2~ @@ -1359,7 +1367,7 @@ mkdir -p %{buildroot}/%{_lib} mv -f %{buildroot}%{_prefix}/%{_lib}/libgcc_s.so.1 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1 chmod 755 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1 ln -sf libgcc_s-%{gcc_major}-%{DATE}.so.1 %{buildroot}/%{_lib}/libgcc_s.so.1 -%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm} +%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm} aarch64 rm -f $FULLPATH/libgcc_s.so echo '/* GNU ld script Use the shared library, but some functions are only in @@ -3177,8 +3185,26 @@ fi %endif %changelog -* Fri Oct 29 2021 Marek Polacek 8.5.0-4 -- add -Wbidirectional patch (#2008391) +* Thu Jan 27 2022 Marek Polacek 8.5.0-10 +- fix typo in the cprop_hardreg patch (#2028609) + +* Mon Jan 24 2022 Marek Polacek 8.5.0-9 +- apply cprop_hardreg fix for narrow mode != lowpart targets (#2028609) + +* Mon Jan 24 2022 Marek Polacek 8.5.0-8 +- aarch64: Add -mtune=neoverse-512tvb (#1845932) + +* Fri Dec 10 2021 Marek Polacek 8.5.0-7 +- backport PCH tweaks (#2030878) + +* Fri Dec 3 2021 Marek Polacek 8.5.0-6 +- avoid cycling on certain subreg reloads (PR rtl-optimization/96796, #2028798) + +* Tue Nov 30 2021 Marek Polacek 8.5.0-5 +- when linking against libgcc_s, link libgcc.a too (#2022588) + +* Thu Nov 18 2021 Marek Polacek 8.5.0-4 +- add -Wbidi-chars patch (#2008392) * Tue Jul 13 2021 Marek Polacek 8.5.0-3 - fix mangling of lambdas in default args (PR c++/91241, #1981822)