dc03fd
commit 51c500269bf53749b107807d84271385fad35628
dc03fd
Author: Marek Polacek <polacek@redhat.com>
dc03fd
Date:   Wed Oct 6 14:33:59 2021 -0400
dc03fd
dc03fd
    libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
dc03fd
    
dc03fd
    From a link below:
dc03fd
    "An issue was discovered in the Bidirectional Algorithm in the Unicode
dc03fd
    Specification through 14.0. It permits the visual reordering of
dc03fd
    characters via control sequences, which can be used to craft source code
dc03fd
    that renders different logic than the logical ordering of tokens
dc03fd
    ingested by compilers and interpreters. Adversaries can leverage this to
dc03fd
    encode source code for compilers accepting Unicode such that targeted
dc03fd
    vulnerabilities are introduced invisibly to human reviewers."
dc03fd
    
dc03fd
    More info:
dc03fd
    https://nvd.nist.gov/vuln/detail/CVE-2021-42574
dc03fd
    https://trojansource.codes/
dc03fd
    
dc03fd
    This is not a compiler bug.  However, to mitigate the problem, this patch
dc03fd
    implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
dc03fd
    misleading Unicode bidirectional control characters the preprocessor may
dc03fd
    encounter.
dc03fd
    
dc03fd
    The default is =unpaired, which warns about improperly terminated
dc03fd
    bidirectional control characters; e.g. a LRE without its corresponding PDF.
dc03fd
    The level =any warns about any use of bidirectional control characters.
dc03fd
    
dc03fd
    This patch handles both UCNs and UTF-8 characters.  UCNs designating
dc03fd
    bidi characters in identifiers are accepted since r204886.  Then r217144
dc03fd
    enabled -fextended-identifiers by default.  Extended characters in C/C++
dc03fd
    identifiers have been accepted since r275979.  However, this patch still
dc03fd
    warns about mixing UTF-8 and UCN bidi characters; there seems to be no
dc03fd
    good reason to allow mixing them.
dc03fd
    
dc03fd
    We warn in different contexts: comments (both C and C++-style), string
dc03fd
    literals, character constants, and identifiers.  Expectedly, UCNs are ignored
dc03fd
    in comments and raw string literals.  The bidirectional control characters
dc03fd
    can nest so this patch handles that as well.
dc03fd
    
dc03fd
    I have not included nor tested this at all with Fortran (which also has
dc03fd
    string literals and line comments).
dc03fd
    
dc03fd
    Dave M. posted patches improving diagnostic involving Unicode characters.
dc03fd
    This patch does not make use of this new infrastructure yet.
dc03fd
    
dc03fd
            PR preprocessor/103026
dc03fd
    
dc03fd
    gcc/c-family/ChangeLog:
dc03fd
    
dc03fd
            * c.opt (Wbidi-chars, Wbidi-chars=): New option.
dc03fd
    
dc03fd
    gcc/ChangeLog:
dc03fd
    
dc03fd
            * doc/invoke.texi: Document -Wbidi-chars.
dc03fd
    
dc03fd
    libcpp/ChangeLog:
dc03fd
    
dc03fd
            * include/cpplib.h (enum cpp_bidirectional_level): New.
dc03fd
            (struct cpp_options): Add cpp_warn_bidirectional.
dc03fd
            (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
dc03fd
            * internal.h (struct cpp_reader): Add warn_bidi_p member
dc03fd
            function.
dc03fd
            * init.c (cpp_create_reader): Set cpp_warn_bidirectional.
dc03fd
            * lex.c (bidi): New namespace.
dc03fd
            (get_bidi_utf8): New function.
dc03fd
            (get_bidi_ucn): Likewise.
dc03fd
            (maybe_warn_bidi_on_close): Likewise.
dc03fd
            (maybe_warn_bidi_on_char): Likewise.
dc03fd
            (_cpp_skip_block_comment): Implement warning about bidirectional
dc03fd
            control characters.
dc03fd
            (skip_line_comment): Likewise.
dc03fd
            (forms_identifier_p): Likewise.
dc03fd
            (lex_identifier): Likewise.
dc03fd
            (lex_string): Likewise.
dc03fd
            (lex_raw_string): Likewise.
dc03fd
    
dc03fd
    gcc/testsuite/ChangeLog:
dc03fd
    
dc03fd
            * c-c++-common/Wbidi-chars-1.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-2.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-3.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-4.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-5.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-6.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-7.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-8.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-9.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-10.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-11.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-12.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-13.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-14.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-15.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-16.c: New test.
dc03fd
            * c-c++-common/Wbidi-chars-17.c: New test.
dc03fd
221e4e
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
dc03fd
index f591b39be5a..cf922812198 100644
221e4e
--- a/gcc/c-family/c.opt
221e4e
+++ b/gcc/c-family/c.opt
221e4e
@@ -334,6 +334,30 @@ Wbad-function-cast
221e4e
 C ObjC Var(warn_bad_function_cast) Warning
221e4e
 Warn about casting functions to incompatible types.
221e4e
 
dc03fd
+Wbidi-chars
dc03fd
+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
221e4e
+;
221e4e
+
dc03fd
+Wbidi-chars=
221e4e
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
dc03fd
+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
221e4e
+
221e4e
+; Required for these enum values.
221e4e
+SourceInclude
221e4e
+cpplib.h
221e4e
+
221e4e
+Enum
dc03fd
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
221e4e
+
221e4e
+EnumValue
221e4e
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
221e4e
+
221e4e
+EnumValue
221e4e
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
221e4e
+
221e4e
+EnumValue
221e4e
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
221e4e
+
221e4e
 Wbool-compare
221e4e
 C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
221e4e
 Warn about boolean expression compared with an integer value different from true/false.
221e4e
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
dc03fd
index 78ca7738df2..cc85c53aede 100644
221e4e
--- a/gcc/doc/invoke.texi
221e4e
+++ b/gcc/doc/invoke.texi
dc03fd
@@ -264,7 +264,8 @@ Objective-C and Objective-C++ Dialects}.
221e4e
 -Walloc-zero  -Walloc-size-larger-than=@var{n}
221e4e
 -Walloca  -Walloca-larger-than=@var{n} @gol
221e4e
 -Wno-aggressive-loop-optimizations  -Warray-bounds  -Warray-bounds=@var{n} @gol
221e4e
--Wno-attributes  -Wbool-compare  -Wbool-operation @gol
dc03fd
+-Wno-attributes  -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol 
221e4e
+-Wbool-compare  -Wbool-operation @gol
221e4e
 -Wno-builtin-declaration-mismatch @gol
221e4e
 -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
221e4e
 -Wc++-compat  -Wc++11-compat  -Wc++14-compat  @gol
dc03fd
@@ -5606,6 +5607,23 @@ Warn about declarations using the @code{alias} and similar attributes whose
221e4e
 target is incompatible with the type of the alias.  @xref{Function Attributes,
221e4e
 ,Declaring Attributes of Functions}.
221e4e
 
dc03fd
+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
dc03fd
+@opindex Wbidi-chars=
dc03fd
+@opindex Wbidi-chars
dc03fd
+@opindex Wno-bidi-chars
dc03fd
+Warn about possibly misleading UTF-8 bidirectional control characters in
dc03fd
+comments, string literals, character constants, and identifiers.  Such
dc03fd
+characters can change left-to-right writing direction into right-to-left
dc03fd
+(and vice versa), which can cause confusion between the logical order and
dc03fd
+visual order.  This may be dangerous; for instance, it may seem that a piece
dc03fd
+of code is not commented out, whereas it in fact is.
221e4e
+
221e4e
+There are three levels of warning supported by GCC@.  The default is
dc03fd
+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
dc03fd
+bidi contexts.  @option{-Wbidi-chars=none} turns the warning off.
dc03fd
+@option{-Wbidi-chars=any} warns about any use of bidirectional control
dc03fd
+characters.
221e4e
+
221e4e
 @item -Wbool-compare
221e4e
 @opindex Wno-bool-compare
221e4e
 @opindex Wbool-compare
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
221e4e
new file mode 100644
dc03fd
index 00000000000..34f5ac19271
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
dc03fd
@@ -0,0 +1,12 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
221e4e
+
221e4e
+int main() {
221e4e
+    int isAdmin = 0;
221e4e
+    /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
221e4e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
221e4e
+        __builtin_printf("You are an admin.\n");
221e4e
+    /* end admins only ‮ { ⁦*/
221e4e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
221e4e
+    return 0;
221e4e
+}
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
221e4e
new file mode 100644
dc03fd
index 00000000000..3f851b69e65
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
221e4e
@@ -0,0 +1,27 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
221e4e
+/* More nesting testing.  */
221e4e
+
221e4e
+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int LRE_\u202a_PDF_\u202c;
221e4e
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
221e4e
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
221e4e
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
221e4e
+int FSI_\u2068;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int FSI_\u2068_PDI_\u2069;
221e4e
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
221e4e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
221e4e
new file mode 100644
dc03fd
index 00000000000..44d044d82de
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
dc03fd
@@ -0,0 +1,9 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
221e4e
+/* Test that we warn when mixing UCN and UTF-8.  */
221e4e
+
221e4e
+const char *s1 = "LRE_‪_PDF_\u202c";
221e4e
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
221e4e
+const char *s2 = "LRE_\u202a_PDF_‬";
221e4e
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
221e4e
new file mode 100644
dc03fd
index 00000000000..b07eec1da91
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
dc03fd
@@ -0,0 +1,19 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile { target { c || c++11 } } } */
dc03fd
+/* { dg-options "-Wbidi-chars=any" } */
221e4e
+/* Test raw strings.  */
221e4e
+
221e4e
+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)";
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)";
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)";
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)";
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z";
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+const char *s8 = R"(a b c PDI⁩ x y )z";
221e4e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
221e4e
+const char *s9 = R"(a b c PDF‬ x y z)";
221e4e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
221e4e
new file mode 100644
dc03fd
index 00000000000..b2dd9fde752
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
dc03fd
@@ -0,0 +1,17 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile { target { c || c++11 } } } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
221e4e
+/* Test raw strings.  */
221e4e
+
221e4e
+const char *s1 = R"(a b c LRE‪ 1 2 3)";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+const char *s2 = R"(a b c RLE‫ 1 2 3)";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+const char *s3 = R"(a b c LRO‭ 1 2 3)";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+const char *s4 = R"(a b c FSI⁨ 1 2 3)";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+const char *s5 = R"(a b c LRI⁦ 1 2 3)";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+const char *s6 = R"(a b c RLI⁧ 1 2 3)";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
dc03fd
new file mode 100644
dc03fd
index 00000000000..ba5f75d9553
dc03fd
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
dc03fd
@@ -0,0 +1,38 @@
dc03fd
+/* PR preprocessor/103026 */
dc03fd
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
dc03fd
+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
dc03fd
+   or RLOs.  */
dc03fd
+
dc03fd
+/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩
dc03fd
+// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩
dc03fd
+// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩
dc03fd
+// FSI_⁨_RLO_‮_PDI_⁩
dc03fd
+// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+
dc03fd
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
dc03fd
+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int PDI_\u2069;
dc03fd
+int LRI_\u2066_PDI_\u2069;
dc03fd
+int RLI_\u2067_PDI_\u2069;
dc03fd
+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
dc03fd
+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
dc03fd
+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int RLO_\u202e_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
dc03fd
+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
221e4e
new file mode 100644
dc03fd
index 00000000000..a0ce8ff5e2c
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
dc03fd
@@ -0,0 +1,59 @@
dc03fd
+/* PR preprocessor/103026 */
dc03fd
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
dc03fd
+/* Test unpaired bidi control chars in multiline comments.  */
dc03fd
+
dc03fd
+/*
dc03fd
+ * LRE‪ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/*
dc03fd
+ * RLE‫ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/*
dc03fd
+ * LRO‭ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/*
dc03fd
+ * RLO‮ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/*
dc03fd
+ * LRI⁦ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/*
dc03fd
+ * RLI⁧ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/*
dc03fd
+ * FSI⁨ end
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/* LRE‪
dc03fd
+   PDF‬ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/* FSI⁨
dc03fd
+   PDI⁩ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+
dc03fd
+/* LRE<‪>
dc03fd
+ *
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
dc03fd
+
dc03fd
+/*
dc03fd
+ * LRE<‪>
dc03fd
+ */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+
dc03fd
+/*
dc03fd
+ *
dc03fd
+ * LRE<‪> */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+
dc03fd
+/* RLI<⁧> */ /* PDI<⁩> */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+/* LRE<‪> */ /* PDF<‬> */
dc03fd
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
dc03fd
new file mode 100644
dc03fd
index 00000000000..baa0159861c
dc03fd
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
dc03fd
@@ -0,0 +1,26 @@
dc03fd
+/* PR preprocessor/103026 */
dc03fd
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=any" } */
dc03fd
+/* Test LTR/RTL chars.  */
dc03fd
+
dc03fd
+/* LTR<‎> */
dc03fd
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
dc03fd
+// LTR<‎>
dc03fd
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
dc03fd
+/* RTL<‏> */
dc03fd
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
dc03fd
+// RTL<‏>
dc03fd
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
dc03fd
+
dc03fd
+const char *s1 = "LTR<‎>";
dc03fd
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
dc03fd
+const char *s2 = "LTR\u200e";
dc03fd
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
dc03fd
+const char *s3 = "LTR\u200E";
dc03fd
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
dc03fd
+const char *s4 = "RTL<‏>";
dc03fd
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
dc03fd
+const char *s5 = "RTL\u200f";
dc03fd
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
dc03fd
+const char *s6 = "RTL\u200F";
dc03fd
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
dc03fd
new file mode 100644
dc03fd
index 00000000000..07cb4321f96
dc03fd
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
dc03fd
@@ -0,0 +1,30 @@
dc03fd
+/* PR preprocessor/103026 */
dc03fd
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
dc03fd
+/* Test LTR/RTL chars.  */
dc03fd
+
dc03fd
+/* LTR<‎> */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+// LTR<‎>
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+/* RTL<‏> */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+// RTL<‏>
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+int ltr_\u200e;
dc03fd
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
dc03fd
+int rtl_\u200f;
dc03fd
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
dc03fd
+
dc03fd
+const char *s1 = "LTR<‎>";
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+const char *s2 = "LTR\u200e";
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+const char *s3 = "LTR\u200E";
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+const char *s4 = "RTL<‏>";
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+const char *s5 = "RTL\u200f";
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+const char *s6 = "RTL\u200F";
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
dc03fd
new file mode 100644
dc03fd
index 00000000000..2340374f276
dc03fd
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
dc03fd
@@ -0,0 +1,9 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
221e4e
+
221e4e
+int main() {
221e4e
+    /* Say hello; newline⁧/*/ return 0 ;
221e4e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
221e4e
+    __builtin_printf("Hello world.\n");
221e4e
+    return 0;
221e4e
+}
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
221e4e
new file mode 100644
dc03fd
index 00000000000..9dc7edb6e64
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
dc03fd
@@ -0,0 +1,11 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
221e4e
+
221e4e
+int main() {
221e4e
+    const char* access_level = "user";
221e4e
+    if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) {
221e4e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
221e4e
+        __builtin_printf("You are an admin.\n");
221e4e
+    }
221e4e
+    return 0;
221e4e
+}
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
221e4e
new file mode 100644
dc03fd
index 00000000000..49f856b9bfe
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
dc03fd
@@ -0,0 +1,172 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
221e4e
+/* Test all bidi chars in various contexts (identifiers, comments,
221e4e
+   string literals, character constants), both UCN and UTF-8.  The bidi
221e4e
+   chars here are properly terminated, except for the character constants.  */
221e4e
+
221e4e
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
221e4e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
221e4e
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+/* Same but C++ comments instead.  */
221e4e
+// a b c LRE‪ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLE‫ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+// a b c LRO‭ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLO‮ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
221e4e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
221e4e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
221e4e
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+/* Here we're closing an unopened context, warn when =any.  */
221e4e
+/* a b c PDI⁩ x y z */
221e4e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
221e4e
+/* a b c PDF‬ x y z */
221e4e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
221e4e
+// a b c PDI⁩ x y z
221e4e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
221e4e
+// a b c PDF‬ x y z
221e4e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
221e4e
+
dc03fd
+/* Multiline comments.  */
dc03fd
+/* a b c PDI⁩ x y z
dc03fd
+   */
dc03fd
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
dc03fd
+/* a b c PDF‬ x y z
dc03fd
+   */
dc03fd
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
dc03fd
+/* first
dc03fd
+   a b c PDI⁩ x y z
dc03fd
+   */
dc03fd
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
dc03fd
+/* first
dc03fd
+   a b c PDF‬ x y z
dc03fd
+   */
dc03fd
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
dc03fd
+/* first
dc03fd
+   a b c PDI⁩ x y z */
dc03fd
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
dc03fd
+/* first
dc03fd
+   a b c PDF‬ x y z */
dc03fd
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
dc03fd
+
221e4e
+void
221e4e
+g1 ()
221e4e
+{
221e4e
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
221e4e
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
221e4e
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+  const char *s8 = "a b c PDI⁩ x y z";
221e4e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
221e4e
+  const char *s9 = "a b c PDF‬ x y z";
221e4e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
221e4e
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
221e4e
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+}
221e4e
+
221e4e
+void
221e4e
+g2 ()
221e4e
+{
221e4e
+  const char c1 = '\u202a';
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+  const char c2 = '\u202A';
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+  const char c3 = '\u202b';
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+  const char c4 = '\u202B';
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+  const char c5 = '\u202d';
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+  const char c6 = '\u202D';
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+  const char c7 = '\u202e';
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+  const char c8 = '\u202E';
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+  const char c9 = '\u2066';
221e4e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
221e4e
+  const char c10 = '\u2067';
221e4e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
221e4e
+  const char c11 = '\u2068';
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+}
221e4e
+
221e4e
+int A\u202cY;
221e4e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
221e4e
+int A\u202CY2;
221e4e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+int d\u202ae\u202cf;
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+int d\u202Ae\u202cf2;
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+int d\u202be\u202cf;
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+int d\u202Be\u202cf2;
221e4e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
221e4e
+int d\u202de\u202cf;
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+int d\u202De\u202cf2;
221e4e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
221e4e
+int d\u202ee\u202cf;
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+int d\u202Ee\u202cf2;
221e4e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
221e4e
+int d\u2066e\u2069f;
221e4e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
221e4e
+int d\u2067e\u2069f;
221e4e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
221e4e
+int d\u2068e\u2069f;
221e4e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
221e4e
+int X\u2069;
221e4e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
221e4e
new file mode 100644
dc03fd
index 00000000000..f5776806c79
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
dc03fd
@@ -0,0 +1,172 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
221e4e
+/* Test all bidi chars in various contexts (identifiers, comments,
221e4e
+   string literals, character constants), both UCN and UTF-8.  The bidi
221e4e
+   chars here are properly terminated, except for the character constants.  */
221e4e
+
221e4e
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+/* Same but C++ comments instead.  */
221e4e
+// a b c LRE‪ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLE‫ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c LRO‭ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLO‮ 1 2 3 PDF‬ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+/* Here we're closing an unopened context, warn when =any.  */
221e4e
+/* a b c PDI⁩ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c PDF‬ x y z */
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c PDI⁩ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c PDF‬ x y z
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
dc03fd
+/* Multiline comments.  */
dc03fd
+/* a b c PDI⁩ x y z
dc03fd
+   */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/* a b c PDF‬ x y z
dc03fd
+   */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/* first
dc03fd
+   a b c PDI⁩ x y z
dc03fd
+   */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/* first
dc03fd
+   a b c PDF‬ x y z
dc03fd
+   */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
dc03fd
+/* first
dc03fd
+   a b c PDI⁩ x y z */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+/* first
dc03fd
+   a b c PDF‬ x y z */
dc03fd
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
+
221e4e
+void
221e4e
+g1 ()
221e4e
+{
221e4e
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s8 = "a b c PDI⁩ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s9 = "a b c PDF‬ x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+}
221e4e
+
221e4e
+void
221e4e
+g2 ()
221e4e
+{
221e4e
+  const char c1 = '\u202a';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c2 = '\u202A';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c3 = '\u202b';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c4 = '\u202B';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c5 = '\u202d';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c6 = '\u202D';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c7 = '\u202e';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c8 = '\u202E';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c9 = '\u2066';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c10 = '\u2067';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char c11 = '\u2068';
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+}
221e4e
+
221e4e
+int A\u202cY;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int A\u202CY2;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+int d\u202ae\u202cf;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202Ae\u202cf2;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202be\u202cf;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202Be\u202cf2;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202de\u202cf;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202De\u202cf2;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202ee\u202cf;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u202Ee\u202cf2;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u2066e\u2069f;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u2067e\u2069f;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int d\u2068e\u2069f;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int X\u2069;
221e4e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
221e4e
new file mode 100644
dc03fd
index 00000000000..a65d6faf60e
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
dc03fd
@@ -0,0 +1,130 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
221e4e
+/* Test nesting of bidi chars in various contexts.  */
221e4e
+
221e4e
+/* Terminated by the wrong char:  */
221e4e
+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* a b c FSI⁨ 1 2 3 PDF‬ x y  z*/
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+/* LRE‪ PDF‬ */
221e4e
+/* LRE‪ LRE‪ PDF‬ PDF‬ */
221e4e
+/* PDF‬ LRE‪ PDF‬ */
221e4e
+/* LRE‪ PDF‬ LRE‪ PDF‬ */
221e4e
+/* LRE‪ LRE‪ PDF‬ */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* PDF‬ LRE‪ */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+// a b c LRE‪ 1 2 3 PDI⁩ x y z
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c LRO‭ 1 2 3 PDI⁩ x y z 
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLO‮ 1 2 3 PDI⁩ x y z 
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c LRI⁦ 1 2 3 PDF‬ x y z 
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c RLI⁧ 1 2 3 PDF‬ x y z 
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// a b c FSI⁨ 1 2 3 PDF‬ x y  z
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+// LRE‪ PDF‬ 
221e4e
+// LRE‪ LRE‪ PDF‬ PDF‬
221e4e
+// PDF‬ LRE‪ PDF‬
221e4e
+// LRE‪ PDF‬ LRE‪ PDF‬
221e4e
+// LRE‪ LRE‪ PDF‬
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+// PDF‬ LRE‪
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+void
221e4e
+g1 ()
221e4e
+{
221e4e
+  const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y ";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\
221e4e
+    ";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
221e4e
+  const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s15 = "PDF‬ LRE‪";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s16 = "PDF\u202c LRE\u202a";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s17 = "LRE‪ PDF‬";
221e4e
+  const char *s18 = "LRE\u202a PDF\u202c";
221e4e
+  const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬";
221e4e
+  const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
221e4e
+  const char *s21 = "PDF‬ LRE‪ PDF‬";
221e4e
+  const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
221e4e
+  const char *s23 = "LRE‪ LRE‪ PDF‬";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s25 = "PDF‬ LRE‪";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s26 = "PDF\u202c LRE\u202a";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s27 = "PDF‬ LRE\u202a";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+  const char *s28 = "PDF\u202c LRE‪";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+}
221e4e
+
221e4e
+int A\u202aB\u2069C;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int a\u202bB\u2069c;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int a\u202db\u2069c2;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int a\u202eb\u2069;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int a\u2066b\u202c;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int a\u2067b\u202c;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int a\u2068b\u202c;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int A\u202aB\u202c;
221e4e
+int A\u202aA\u202aB\u202cB\u202c;
221e4e
+int a_\u202C_\u202a_\u202c;
221e4e
+int a_\u202a_\u202c_\u202a_\u202c_;
221e4e
+int a_\u202a_\u202c_\u202a_;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
221e4e
new file mode 100644
dc03fd
index 00000000000..d012d420ec0
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
dc03fd
@@ -0,0 +1,9 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=any" } */
221e4e
+/* Test we ignore UCNs in comments.  */
221e4e
+
221e4e
+// a b c \u202a 1 2 3
221e4e
+// a b c \u202A 1 2 3
221e4e
+/* a b c \u202a 1 2 3 */
221e4e
+/* a b c \u202A 1 2 3 */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
221e4e
new file mode 100644
dc03fd
index 00000000000..4f54c5092ec
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
dc03fd
@@ -0,0 +1,13 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=any" } */
221e4e
+/* Test \u vs \U.  */
221e4e
+
221e4e
+int a_\u202A;
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+int a_\u202a_2;
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+int a_\U0000202A_3;
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
221e4e
+int a_\U0000202a_4;
221e4e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
dc03fd
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
221e4e
new file mode 100644
dc03fd
index 00000000000..e2af1b1ca97
221e4e
--- /dev/null
dc03fd
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
dc03fd
@@ -0,0 +1,29 @@
dc03fd
+/* PR preprocessor/103026 */
221e4e
+/* { dg-do compile } */
dc03fd
+/* { dg-options "-Wbidi-chars=unpaired" } */
221e4e
+/* Test that we properly separate bidi contexts (comment/identifier/character
221e4e
+   constant/string literal).  */
221e4e
+
221e4e
+/* LRE ->‪<- */ int pdf_\u202c_1;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* RLE ->‫<- */ int pdf_\u202c_2;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* LRO ->‭<- */ int pdf_\u202c_3;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* RLO ->‮<- */ int pdf_\u202c_4;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* LRI ->⁦<-*/ int pdi_\u2069_1;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* RLI ->⁧<- */ int pdi_\u2069_12;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* FSI ->⁨<- */ int pdi_\u2069_3;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+
221e4e
+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+/* LRE ->‪<- */ const char *s2 = "PDF\u202c";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
+int lre_\u202a; const char *s4 = "PDF\u202c";
221e4e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
221e4e
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
dc03fd
index 3ad52d5e01e..e0dcb7f0529 100644
221e4e
--- a/libcpp/include/cpplib.h
221e4e
+++ b/libcpp/include/cpplib.h
221e4e
@@ -305,6 +305,17 @@ enum cpp_normalize_level {
221e4e
   normalized_none
221e4e
 };
221e4e
 
dc03fd
+/* The possible bidirectional control characters checking levels, from least
221e4e
+   restrictive to most.  */
221e4e
+enum cpp_bidirectional_level {
221e4e
+  /* No checking.  */
221e4e
+  bidirectional_none,
dc03fd
+  /* Only detect unpaired uses of bidirectional control characters.  */
221e4e
+  bidirectional_unpaired,
dc03fd
+  /* Detect any use of bidirectional control characters.  */
221e4e
+  bidirectional_any
221e4e
+};
221e4e
+
221e4e
 /* This structure is nested inside struct cpp_reader, and
221e4e
    carries all the options visible to the command line.  */
221e4e
 struct cpp_options
221e4e
@@ -506,6 +517,10 @@ struct cpp_options
221e4e
   /* True if warn about differences between C++98 and C++11.  */
221e4e
   bool cpp_warn_cxx11_compat;
221e4e
 
dc03fd
+  /* Nonzero if bidirectional control characters checking is on.  See enum
221e4e
+     cpp_bidirectional_level.  */
221e4e
+  unsigned char cpp_warn_bidirectional;
221e4e
+
221e4e
   /* Dependency generation.  */
221e4e
   struct
221e4e
   {
221e4e
@@ -1063,7 +1078,8 @@ enum {
221e4e
   CPP_W_PEDANTIC,
221e4e
   CPP_W_C90_C99_COMPAT,
221e4e
   CPP_W_CXX11_COMPAT,
221e4e
-  CPP_W_EXPANSION_TO_DEFINED
221e4e
+  CPP_W_EXPANSION_TO_DEFINED,
221e4e
+  CPP_W_BIDIRECTIONAL
221e4e
 };
221e4e
 
221e4e
 /* Output a diagnostic of some kind.  */
221e4e
diff --git a/libcpp/init.c b/libcpp/init.c
221e4e
index ca3fbaa5c05..5c15da82ff8 100644
221e4e
--- a/libcpp/init.c
221e4e
+++ b/libcpp/init.c
221e4e
@@ -208,6 +208,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
221e4e
       = ENABLE_CANONICAL_SYSTEM_HEADERS;
221e4e
   CPP_OPTION (pfile, ext_numeric_literals) = 1;
221e4e
   CPP_OPTION (pfile, warn_date_time) = 0;
221e4e
+  CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
221e4e
 
221e4e
   /* Default CPP arithmetic to something sensible for the host for the
221e4e
      benefit of dumb users like fix-header.  */
dc03fd
diff --git a/libcpp/internal.h b/libcpp/internal.h
dc03fd
index 4f74f995cec..53b4c0f4af7 100644
dc03fd
--- a/libcpp/internal.h
dc03fd
+++ b/libcpp/internal.h
dc03fd
@@ -576,6 +576,13 @@ struct cpp_reader
dc03fd
   /* If non-null, the lexer will use this location for the next token
dc03fd
      instead of getting a location from the linemap.  */
dc03fd
   source_location *forced_token_location_p;
dc03fd
+
dc03fd
+  /* Returns true iff we should warn about UTF-8 bidirectional control
dc03fd
+     characters.  */
dc03fd
+  bool warn_bidi_p () const
dc03fd
+  {
dc03fd
+    return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
dc03fd
+  }
dc03fd
 };
dc03fd
 
dc03fd
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.
221e4e
diff --git a/libcpp/lex.c b/libcpp/lex.c
dc03fd
index a408f912c5c..ea7f75e842e 100644
221e4e
--- a/libcpp/lex.c
221e4e
+++ b/libcpp/lex.c
dc03fd
@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
221e4e
     }
221e4e
 }
221e4e
 
221e4e
+namespace bidi {
221e4e
+  enum kind {
dc03fd
+    NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
221e4e
+  };
221e4e
+
221e4e
+  /* All the UTF-8 encodings of bidi characters start with E2.  */
221e4e
+  const uchar utf8_start = 0xe2;
221e4e
+
221e4e
+  /* A vector holding currently open bidi contexts.  We use a char for
221e4e
+     each context, its LSB is 1 if it represents a PDF context, 0 if it
221e4e
+     represents a PDI context.  The next bit is 1 if this context was open
221e4e
+     by a bidi character written as a UCN, and 0 when it was UTF-8.  */
221e4e
+  semi_embedded_vec <unsigned char, 16> vec;
221e4e
+
221e4e
+  /* Close the whole comment/identifier/string literal/character constant
221e4e
+     context.  */
221e4e
+  void on_close ()
221e4e
+  {
221e4e
+    vec.truncate (0);
221e4e
+  }
221e4e
+
221e4e
+  /* Pop the last element in the vector.  */
221e4e
+  void pop ()
221e4e
+  {
221e4e
+    unsigned int len = vec.count ();
221e4e
+    gcc_checking_assert (len > 0);
221e4e
+    vec.truncate (len - 1);
221e4e
+  }
221e4e
+
dc03fd
+  /* Return the context of the Ith element.  */
dc03fd
+  kind ctx_at (unsigned int i)
dc03fd
+  {
dc03fd
+    return (vec[i] & 1) ? PDF : PDI;
dc03fd
+  }
dc03fd
+
221e4e
+  /* Return which context is currently opened.  */
221e4e
+  kind current_ctx ()
221e4e
+  {
221e4e
+    unsigned int len = vec.count ();
221e4e
+    if (len == 0)
221e4e
+      return NONE;
dc03fd
+    return ctx_at (len - 1);
221e4e
+  }
221e4e
+
221e4e
+  /* Return true if the current context comes from a UCN origin, that is,
221e4e
+     the bidi char which started this bidi context was written as a UCN.  */
221e4e
+  bool current_ctx_ucn_p ()
221e4e
+  {
221e4e
+    unsigned int len = vec.count ();
221e4e
+    gcc_checking_assert (len > 0);
221e4e
+    return (vec[len - 1] >> 1) & 1;
221e4e
+  }
221e4e
+
221e4e
+  /* We've read a bidi char, update the current vector as necessary.  */
221e4e
+  void on_char (kind k, bool ucn_p)
221e4e
+  {
221e4e
+    switch (k)
221e4e
+      {
221e4e
+      case LRE:
221e4e
+      case RLE:
221e4e
+      case LRO:
221e4e
+      case RLO:
221e4e
+	vec.push (ucn_p ? 3u : 1u);
221e4e
+	break;
221e4e
+      case LRI:
221e4e
+      case RLI:
221e4e
+      case FSI:
221e4e
+	vec.push (ucn_p ? 2u : 0u);
221e4e
+	break;
dc03fd
+      /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
dc03fd
+	 whose scope has not yet been terminated.  */
221e4e
+      case PDF:
221e4e
+	if (current_ctx () == PDF)
221e4e
+	  pop ();
221e4e
+	break;
dc03fd
+      /* PDI terminates the scope of the last LRI, RLI, or FSI whose
dc03fd
+	 scope has not yet been terminated, as well as the scopes of
dc03fd
+	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
dc03fd
+	 yet been terminated.  */
221e4e
+      case PDI:
dc03fd
+	for (int i = vec.count () - 1; i >= 0; --i)
dc03fd
+	  if (ctx_at (i) == PDI)
dc03fd
+	    {
dc03fd
+	      vec.truncate (i);
dc03fd
+	      break;
dc03fd
+	    }
dc03fd
+	break;
dc03fd
+      case LTR:
dc03fd
+      case RTL:
dc03fd
+	/* These aren't popped by a PDF/PDI.  */
221e4e
+	break;
221e4e
+      [[likely]] case NONE:
221e4e
+	break;
221e4e
+      default:
221e4e
+	abort ();
221e4e
+      }
221e4e
+  }
221e4e
+
221e4e
+  /* Return a descriptive string for K.  */
221e4e
+  const char *to_str (kind k)
221e4e
+  {
221e4e
+    switch (k)
221e4e
+      {
221e4e
+      case LRE:
221e4e
+	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
221e4e
+      case RLE:
221e4e
+	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
221e4e
+      case LRO:
221e4e
+	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
221e4e
+      case RLO:
221e4e
+	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
221e4e
+      case LRI:
221e4e
+	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
221e4e
+      case RLI:
221e4e
+	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
221e4e
+      case FSI:
221e4e
+	return "U+2068 (FIRST STRONG ISOLATE)";
221e4e
+      case PDF:
221e4e
+	return "U+202C (POP DIRECTIONAL FORMATTING)";
221e4e
+      case PDI:
221e4e
+	return "U+2069 (POP DIRECTIONAL ISOLATE)";
dc03fd
+      case LTR:
dc03fd
+	return "U+200E (LEFT-TO-RIGHT MARK)";
dc03fd
+      case RTL:
dc03fd
+	return "U+200F (RIGHT-TO-LEFT MARK)";
221e4e
+      default:
221e4e
+	abort ();
221e4e
+      }
221e4e
+  }
221e4e
+}
221e4e
+
221e4e
+/* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
221e4e
+
221e4e
+static bidi::kind
221e4e
+get_bidi_utf8 (const unsigned char *const p)
221e4e
+{
221e4e
+  gcc_checking_assert (p[0] == bidi::utf8_start);
221e4e
+
221e4e
+  if (p[1] == 0x80)
221e4e
+    switch (p[2])
221e4e
+      {
221e4e
+      case 0xaa:
221e4e
+	return bidi::LRE;
221e4e
+      case 0xab:
221e4e
+	return bidi::RLE;
221e4e
+      case 0xac:
221e4e
+	return bidi::PDF;
221e4e
+      case 0xad:
221e4e
+	return bidi::LRO;
221e4e
+      case 0xae:
221e4e
+	return bidi::RLO;
dc03fd
+      case 0x8e:
dc03fd
+	return bidi::LTR;
dc03fd
+      case 0x8f:
dc03fd
+	return bidi::RTL;
221e4e
+      default:
221e4e
+	break;
221e4e
+      }
221e4e
+  else if (p[1] == 0x81)
221e4e
+    switch (p[2])
221e4e
+      {
221e4e
+      case 0xa6:
221e4e
+	return bidi::LRI;
221e4e
+      case 0xa7:
221e4e
+	return bidi::RLI;
221e4e
+      case 0xa8:
221e4e
+	return bidi::FSI;
221e4e
+      case 0xa9:
221e4e
+	return bidi::PDI;
221e4e
+      default:
221e4e
+	break;
221e4e
+      }
221e4e
+
221e4e
+  return bidi::NONE;
221e4e
+}
221e4e
+
221e4e
+/* Parse a UCN where P points just past \u or \U and return its bidi code.  */
221e4e
+
221e4e
+static bidi::kind
221e4e
+get_bidi_ucn (const unsigned char *p, bool is_U)
221e4e
+{
221e4e
+  /* 6.4.3 Universal Character Names
221e4e
+      \u hex-quad
221e4e
+      \U hex-quad hex-quad
221e4e
+     where \unnnn means \U0000nnnn.  */
221e4e
+
221e4e
+  if (is_U)
221e4e
+    {
221e4e
+      if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
221e4e
+	return bidi::NONE;
221e4e
+      /* Skip 4B so we can treat \u and \U the same below.  */
221e4e
+      p += 4;
221e4e
+    }
221e4e
+
221e4e
+  /* All code points we are looking for start with 20xx.  */
221e4e
+  if (p[0] != '2' || p[1] != '0')
221e4e
+    return bidi::NONE;
221e4e
+  else if (p[2] == '2')
221e4e
+    switch (p[3])
221e4e
+      {
221e4e
+      case 'a':
221e4e
+      case 'A':
221e4e
+	return bidi::LRE;
221e4e
+      case 'b':
221e4e
+      case 'B':
221e4e
+	return bidi::RLE;
221e4e
+      case 'c':
221e4e
+      case 'C':
221e4e
+	return bidi::PDF;
221e4e
+      case 'd':
221e4e
+      case 'D':
221e4e
+	return bidi::LRO;
221e4e
+      case 'e':
221e4e
+      case 'E':
221e4e
+	return bidi::RLO;
221e4e
+      default:
221e4e
+	break;
221e4e
+      }
221e4e
+  else if (p[2] == '6')
221e4e
+    switch (p[3])
221e4e
+      {
221e4e
+      case '6':
221e4e
+	return bidi::LRI;
221e4e
+      case '7':
221e4e
+	return bidi::RLI;
221e4e
+      case '8':
221e4e
+	return bidi::FSI;
221e4e
+      case '9':
221e4e
+	return bidi::PDI;
221e4e
+      default:
221e4e
+	break;
221e4e
+      }
dc03fd
+  else if (p[2] == '0')
dc03fd
+    switch (p[3])
dc03fd
+      {
dc03fd
+      case 'e':
dc03fd
+      case 'E':
dc03fd
+	return bidi::LTR;
dc03fd
+      case 'f':
dc03fd
+      case 'F':
dc03fd
+	return bidi::RTL;
dc03fd
+      default:
dc03fd
+	break;
dc03fd
+      }
221e4e
+
221e4e
+  return bidi::NONE;
221e4e
+}
221e4e
+
221e4e
+/* We're closing a bidi context, that is, we've encountered a newline,
221e4e
+   are closing a C-style comment, or are at the end of a string literal,
221e4e
+   character constant, or identifier.  Warn if this context was not
221e4e
+   properly terminated by a PDI or PDF.  P points to the last character
221e4e
+   in this context.  */
221e4e
+
221e4e
+static void
221e4e
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
221e4e
+{
221e4e
+  if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
221e4e
+      && bidi::vec.count () > 0)
221e4e
+    {
221e4e
+      const source_location loc
221e4e
+	= linemap_position_for_column (pfile->line_table,
221e4e
+				       CPP_BUF_COLUMN (pfile->buffer, p));
221e4e
+      cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
dc03fd
+			     "unpaired UTF-8 bidirectional control character "
221e4e
+			     "detected");
221e4e
+    }
221e4e
+  /* We're done with this context.  */
221e4e
+  bidi::on_close ();
221e4e
+}
221e4e
+
221e4e
+/* We're at the beginning or in the middle of an identifier/comment/string
221e4e
+   literal/character constant.  Warn if we've encountered a bidi character.
221e4e
+   KIND says which bidi character it was; P points to it in the character
221e4e
+   stream.  UCN_P is true iff this bidi character was written as a UCN.  */
221e4e
+
221e4e
+static void
221e4e
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
221e4e
+			 bool ucn_p)
221e4e
+{
221e4e
+  if (__builtin_expect (kind == bidi::NONE, 1))
221e4e
+    return;
221e4e
+
221e4e
+  const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
221e4e
+
221e4e
+  if (warn_bidi != bidirectional_none)
221e4e
+    {
221e4e
+      const source_location loc
221e4e
+	= linemap_position_for_column (pfile->line_table,
221e4e
+				       CPP_BUF_COLUMN (pfile->buffer, p));
221e4e
+      /* It seems excessive to warn about a PDI/PDF that is closing
221e4e
+	 an opened context because we've already warned about the
221e4e
+	 opening character.  Except warn when we have a UCN x UTF-8
221e4e
+	 mismatch.  */
221e4e
+      if (kind == bidi::current_ctx ())
221e4e
+	{
221e4e
+	  if (warn_bidi == bidirectional_unpaired
221e4e
+	      && bidi::current_ctx_ucn_p () != ucn_p)
221e4e
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
221e4e
+				   "UTF-8 vs UCN mismatch when closing "
221e4e
+				   "a context by \"%s\"", bidi::to_str (kind));
221e4e
+	}
221e4e
+      else if (warn_bidi == bidirectional_any)
221e4e
+	{
221e4e
+	  if (kind == bidi::PDF || kind == bidi::PDI)
221e4e
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
221e4e
+				   "\"%s\" is closing an unopened context",
221e4e
+				   bidi::to_str (kind));
221e4e
+	  else
221e4e
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
221e4e
+				   "found problematic Unicode character \"%s\"",
221e4e
+				   bidi::to_str (kind));
221e4e
+	}
221e4e
+    }
221e4e
+  /* We're done with this context.  */
221e4e
+  bidi::on_char (kind, ucn_p);
221e4e
+}
221e4e
+
221e4e
 /* Skip a C-style block comment.  We find the end of the comment by
221e4e
    seeing if an asterisk is before every '/' we encounter.  Returns
221e4e
    nonzero if comment terminated by EOF, zero otherwise.
dc03fd
@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile)
221e4e
   cpp_buffer *buffer = pfile->buffer;
221e4e
   const uchar *cur = buffer->cur;
221e4e
   uchar c;
dc03fd
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
dc03fd
 
221e4e
   cur++;
221e4e
   if (*cur == '/')
dc03fd
@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
221e4e
       if (c == '/')
221e4e
 	{
221e4e
 	  if (cur[-2] == '*')
221e4e
-	    break;
221e4e
+	    {
221e4e
+	      if (warn_bidi_p)
221e4e
+		maybe_warn_bidi_on_close (pfile, cur);
221e4e
+	      break;
221e4e
+	    }
221e4e
 
221e4e
 	  /* Warn about potential nested comments, but not if the '/'
221e4e
 	     comes immediately before the true comment delimiter.
dc03fd
@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
221e4e
 	{
221e4e
 	  unsigned int cols;
221e4e
 	  buffer->cur = cur - 1;
221e4e
+	  if (warn_bidi_p)
221e4e
+	    maybe_warn_bidi_on_close (pfile, cur);
221e4e
 	  _cpp_process_line_notes (pfile, true);
221e4e
 	  if (buffer->next_line >= buffer->rlimit)
221e4e
 	    return true;
dc03fd
@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
221e4e
 
221e4e
 	  cur = buffer->cur;
221e4e
 	}
221e4e
+      /* If this is a beginning of a UTF-8 encoding, it might be
dc03fd
+	 a bidirectional control character.  */
221e4e
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
221e4e
+	{
221e4e
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
221e4e
+	  maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
221e4e
+	}
221e4e
     }
221e4e
 
221e4e
   buffer->cur = cur;
dc03fd
@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
221e4e
 {
221e4e
   cpp_buffer *buffer = pfile->buffer;
221e4e
   source_location orig_line = pfile->line_table->highest_line;
dc03fd
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
221e4e
 
221e4e
-  while (*buffer->cur != '\n')
221e4e
-    buffer->cur++;
221e4e
+  if (!warn_bidi_p)
221e4e
+    while (*buffer->cur != '\n')
221e4e
+      buffer->cur++;
221e4e
+  else
221e4e
+    {
221e4e
+      while (*buffer->cur != '\n'
221e4e
+	     && *buffer->cur != bidi::utf8_start)
221e4e
+	buffer->cur++;
221e4e
+      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
221e4e
+	{
221e4e
+	  while (*buffer->cur != '\n')
221e4e
+	    {
221e4e
+	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
221e4e
+		{
221e4e
+		  bidi::kind kind = get_bidi_utf8 (buffer->cur);
221e4e
+		  maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
221e4e
+					   /*ucn_p=*/false);
221e4e
+		}
221e4e
+	      buffer->cur++;
221e4e
+	    }
221e4e
+	  maybe_warn_bidi_on_close (pfile, buffer->cur);
221e4e
+	}
221e4e
+    }
221e4e
 
221e4e
   _cpp_process_line_notes (pfile, true);
221e4e
   return orig_line != pfile->line_table->highest_line;
dc03fd
@@ -1315,11 +1669,13 @@ warn_about_normalization (cpp_reader *pfile,
221e4e
 
221e4e
 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
221e4e
    an identifier.  FIRST is TRUE if this starts an identifier.  */
221e4e
+
221e4e
 static bool
221e4e
 forms_identifier_p (cpp_reader *pfile, int first,
221e4e
 		    struct normalize_state *state)
221e4e
 {
221e4e
   cpp_buffer *buffer = pfile->buffer;
dc03fd
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
221e4e
 
221e4e
   if (*buffer->cur == '$')
221e4e
     {
dc03fd
@@ -1343,6 +1699,12 @@ forms_identifier_p (cpp_reader *pfile, int first,
221e4e
     {
221e4e
       cppchar_t s;
221e4e
       buffer->cur += 2;
221e4e
+      if (warn_bidi_p)
221e4e
+	{
221e4e
+	  bidi::kind kind = get_bidi_ucn (buffer->cur,
221e4e
+					  buffer->cur[-1] == 'U');
221e4e
+	  maybe_warn_bidi_on_char (pfile, buffer->cur, kind, /*ucn_p=*/true);
221e4e
+	}
221e4e
       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
221e4e
 			  state, &s, NULL, NULL))
221e4e
 	return true;
dc03fd
@@ -1450,6 +1812,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
221e4e
   const uchar *cur;
221e4e
   unsigned int len;
221e4e
   unsigned int hash = HT_HASHSTEP (0, *base);
dc03fd
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
221e4e
 
221e4e
   cur = pfile->buffer->cur;
221e4e
   if (! starts_ucn)
dc03fd
@@ -1472,6 +1835,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
221e4e
 	    pfile->buffer->cur++;
221e4e
 	  }
221e4e
       } while (forms_identifier_p (pfile, false, nst));
221e4e
+      if (warn_bidi_p)
221e4e
+	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
221e4e
       result = _cpp_interpret_identifier (pfile, base,
221e4e
 					  pfile->buffer->cur - base);
221e4e
       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
dc03fd
@@ -1673,6 +2038,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
221e4e
   _cpp_buff *first_buff = NULL, *last_buff = NULL;
221e4e
   size_t raw_prefix_start;
221e4e
   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
dc03fd
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
221e4e
 
221e4e
   type = (*base == 'L' ? CPP_WSTRING :
221e4e
 	  *base == 'U' ? CPP_STRING32 :
dc03fd
@@ -1909,8 +2275,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
221e4e
 	  cur = base = pfile->buffer->cur;
221e4e
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
221e4e
 	}
221e4e
+      else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
221e4e
+	       && warn_bidi_p)
dc03fd
+	maybe_warn_bidi_on_char (pfile, cur - 1, get_bidi_utf8 (cur - 1),
221e4e
+				 /*ucn_p=*/false);
221e4e
     }
221e4e
 
221e4e
+  if (warn_bidi_p)
221e4e
+    maybe_warn_bidi_on_close (pfile, cur);
221e4e
+
221e4e
   if (CPP_OPTION (pfile, user_literals))
221e4e
     {
221e4e
       /* If a string format macro, say from inttypes.h, is placed touching
dc03fd
@@ -2005,15 +2378,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
221e4e
   else
221e4e
     terminator = '>', type = CPP_HEADER_NAME;
221e4e
 
dc03fd
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
221e4e
   for (;;)
221e4e
     {
221e4e
       cppchar_t c = *cur++;
221e4e
 
221e4e
       /* In #include-style directives, terminators are not escapable.  */
221e4e
       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
221e4e
-	cur++;
221e4e
+	{
221e4e
+	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
221e4e
+	    {
221e4e
+	      bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
221e4e
+	      maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
221e4e
+	    }
221e4e
+	  cur++;
221e4e
+	}
221e4e
       else if (c == terminator)
221e4e
-	break;
221e4e
+	{
221e4e
+	  if (warn_bidi_p)
221e4e
+	    maybe_warn_bidi_on_close (pfile, cur - 1);
221e4e
+	  break;
221e4e
+	}
221e4e
       else if (c == '\n')
221e4e
 	{
221e4e
 	  cur--;
dc03fd
@@ -2030,6 +2415,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
221e4e
 	}
221e4e
       else if (c == '\0')
221e4e
 	saw_NUL = true;
221e4e
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
221e4e
+	{
221e4e
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
221e4e
+	  maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
221e4e
+	}
221e4e
     }
221e4e
 
221e4e
   if (saw_NUL && !pfile->state.skipping)