Blame SOURCES/gcc8-Wbidi-chars.patch

3f7692
commit 51c500269bf53749b107807d84271385fad35628
3f7692
Author: Marek Polacek <polacek@redhat.com>
3f7692
Date:   Wed Oct 6 14:33:59 2021 -0400
3f7692
3f7692
    libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
3f7692
    
3f7692
    From a link below:
3f7692
    "An issue was discovered in the Bidirectional Algorithm in the Unicode
3f7692
    Specification through 14.0. It permits the visual reordering of
3f7692
    characters via control sequences, which can be used to craft source code
3f7692
    that renders different logic than the logical ordering of tokens
3f7692
    ingested by compilers and interpreters. Adversaries can leverage this to
3f7692
    encode source code for compilers accepting Unicode such that targeted
3f7692
    vulnerabilities are introduced invisibly to human reviewers."
3f7692
    
3f7692
    More info:
3f7692
    https://nvd.nist.gov/vuln/detail/CVE-2021-42574
3f7692
    https://trojansource.codes/
3f7692
    
3f7692
    This is not a compiler bug.  However, to mitigate the problem, this patch
3f7692
    implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
3f7692
    misleading Unicode bidirectional control characters the preprocessor may
3f7692
    encounter.
3f7692
    
3f7692
    The default is =unpaired, which warns about improperly terminated
3f7692
    bidirectional control characters; e.g. a LRE without its corresponding PDF.
3f7692
    The level =any warns about any use of bidirectional control characters.
3f7692
    
3f7692
    This patch handles both UCNs and UTF-8 characters.  UCNs designating
3f7692
    bidi characters in identifiers are accepted since r204886.  Then r217144
3f7692
    enabled -fextended-identifiers by default.  Extended characters in C/C++
3f7692
    identifiers have been accepted since r275979.  However, this patch still
3f7692
    warns about mixing UTF-8 and UCN bidi characters; there seems to be no
3f7692
    good reason to allow mixing them.
3f7692
    
3f7692
    We warn in different contexts: comments (both C and C++-style), string
3f7692
    literals, character constants, and identifiers.  Expectedly, UCNs are ignored
3f7692
    in comments and raw string literals.  The bidirectional control characters
3f7692
    can nest so this patch handles that as well.
3f7692
    
3f7692
    I have not included nor tested this at all with Fortran (which also has
3f7692
    string literals and line comments).
3f7692
    
3f7692
    Dave M. posted patches improving diagnostic involving Unicode characters.
3f7692
    This patch does not make use of this new infrastructure yet.
3f7692
    
3f7692
            PR preprocessor/103026
3f7692
    
3f7692
    gcc/c-family/ChangeLog:
3f7692
    
3f7692
            * c.opt (Wbidi-chars, Wbidi-chars=): New option.
3f7692
    
3f7692
    gcc/ChangeLog:
3f7692
    
3f7692
            * doc/invoke.texi: Document -Wbidi-chars.
3f7692
    
3f7692
    libcpp/ChangeLog:
3f7692
    
3f7692
            * include/cpplib.h (enum cpp_bidirectional_level): New.
3f7692
            (struct cpp_options): Add cpp_warn_bidirectional.
3f7692
            (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
3f7692
            * internal.h (struct cpp_reader): Add warn_bidi_p member
3f7692
            function.
3f7692
            * init.c (cpp_create_reader): Set cpp_warn_bidirectional.
3f7692
            * lex.c (bidi): New namespace.
3f7692
            (get_bidi_utf8): New function.
3f7692
            (get_bidi_ucn): Likewise.
3f7692
            (maybe_warn_bidi_on_close): Likewise.
3f7692
            (maybe_warn_bidi_on_char): Likewise.
3f7692
            (_cpp_skip_block_comment): Implement warning about bidirectional
3f7692
            control characters.
3f7692
            (skip_line_comment): Likewise.
3f7692
            (forms_identifier_p): Likewise.
3f7692
            (lex_identifier): Likewise.
3f7692
            (lex_string): Likewise.
3f7692
            (lex_raw_string): Likewise.
3f7692
    
3f7692
    gcc/testsuite/ChangeLog:
3f7692
    
3f7692
            * c-c++-common/Wbidi-chars-1.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-2.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-3.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-4.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-5.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-6.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-7.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-8.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-9.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-10.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-11.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-12.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-13.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-14.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-15.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-16.c: New test.
3f7692
            * c-c++-common/Wbidi-chars-17.c: New test.
3f7692
3f7692
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
3f7692
index f591b39be5a..cf922812198 100644
3f7692
--- a/gcc/c-family/c.opt
3f7692
+++ b/gcc/c-family/c.opt
3f7692
@@ -334,6 +334,30 @@ Wbad-function-cast
3f7692
 C ObjC Var(warn_bad_function_cast) Warning
3f7692
 Warn about casting functions to incompatible types.
3f7692
 
3f7692
+Wbidi-chars
3f7692
+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
3f7692
+;
3f7692
+
3f7692
+Wbidi-chars=
3f7692
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
3f7692
+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
3f7692
+
3f7692
+; Required for these enum values.
3f7692
+SourceInclude
3f7692
+cpplib.h
3f7692
+
3f7692
+Enum
3f7692
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
3f7692
+
3f7692
+EnumValue
3f7692
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
3f7692
+
3f7692
+EnumValue
3f7692
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
3f7692
+
3f7692
+EnumValue
3f7692
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
3f7692
+
3f7692
 Wbool-compare
3f7692
 C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
3f7692
 Warn about boolean expression compared with an integer value different from true/false.
3f7692
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
3f7692
index 78ca7738df2..cc85c53aede 100644
3f7692
--- a/gcc/doc/invoke.texi
3f7692
+++ b/gcc/doc/invoke.texi
3f7692
@@ -264,7 +264,8 @@ Objective-C and Objective-C++ Dialects}.
3f7692
 -Walloc-zero  -Walloc-size-larger-than=@var{n}
3f7692
 -Walloca  -Walloca-larger-than=@var{n} @gol
3f7692
 -Wno-aggressive-loop-optimizations  -Warray-bounds  -Warray-bounds=@var{n} @gol
3f7692
--Wno-attributes  -Wbool-compare  -Wbool-operation @gol
3f7692
+-Wno-attributes  -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol 
3f7692
+-Wbool-compare  -Wbool-operation @gol
3f7692
 -Wno-builtin-declaration-mismatch @gol
3f7692
 -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
3f7692
 -Wc++-compat  -Wc++11-compat  -Wc++14-compat  @gol
3f7692
@@ -5606,6 +5607,23 @@ Warn about declarations using the @code{alias} and similar attributes whose
3f7692
 target is incompatible with the type of the alias.  @xref{Function Attributes,
3f7692
 ,Declaring Attributes of Functions}.
3f7692
 
3f7692
+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
3f7692
+@opindex Wbidi-chars=
3f7692
+@opindex Wbidi-chars
3f7692
+@opindex Wno-bidi-chars
3f7692
+Warn about possibly misleading UTF-8 bidirectional control characters in
3f7692
+comments, string literals, character constants, and identifiers.  Such
3f7692
+characters can change left-to-right writing direction into right-to-left
3f7692
+(and vice versa), which can cause confusion between the logical order and
3f7692
+visual order.  This may be dangerous; for instance, it may seem that a piece
3f7692
+of code is not commented out, whereas it in fact is.
3f7692
+
3f7692
+There are three levels of warning supported by GCC@.  The default is
3f7692
+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
3f7692
+bidi contexts.  @option{-Wbidi-chars=none} turns the warning off.
3f7692
+@option{-Wbidi-chars=any} warns about any use of bidirectional control
3f7692
+characters.
3f7692
+
3f7692
 @item -Wbool-compare
3f7692
 @opindex Wno-bool-compare
3f7692
 @opindex Wbool-compare
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
3f7692
new file mode 100644
3f7692
index 00000000000..34f5ac19271
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
3f7692
@@ -0,0 +1,12 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+
3f7692
+int main() {
3f7692
+    int isAdmin = 0;
3f7692
+    /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
3f7692
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
3f7692
+        __builtin_printf("You are an admin.\n");
3f7692
+    /* end admins only ‮ { ⁦*/
3f7692
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
3f7692
+    return 0;
3f7692
+}
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
3f7692
new file mode 100644
3f7692
index 00000000000..3f851b69e65
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
3f7692
@@ -0,0 +1,27 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* More nesting testing.  */
3f7692
+
3f7692
+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int LRE_\u202a_PDF_\u202c;
3f7692
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
3f7692
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
3f7692
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
3f7692
+int FSI_\u2068;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int FSI_\u2068_PDI_\u2069;
3f7692
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
3f7692
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
3f7692
new file mode 100644
3f7692
index 00000000000..44d044d82de
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
3f7692
@@ -0,0 +1,9 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test that we warn when mixing UCN and UTF-8.  */
3f7692
+
3f7692
+const char *s1 = "LRE_‪_PDF_\u202c";
3f7692
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
3f7692
+const char *s2 = "LRE_\u202a_PDF_‬";
3f7692
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
3f7692
new file mode 100644
3f7692
index 00000000000..b07eec1da91
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
3f7692
@@ -0,0 +1,19 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile { target { c || c++11 } } } */
3f7692
+/* { dg-options "-Wbidi-chars=any" } */
3f7692
+/* Test raw strings.  */
3f7692
+
3f7692
+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)";
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)";
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)";
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)";
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z";
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+const char *s8 = R"(a b c PDI⁩ x y )z";
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
3f7692
+const char *s9 = R"(a b c PDF‬ x y z)";
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
3f7692
new file mode 100644
3f7692
index 00000000000..b2dd9fde752
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
3f7692
@@ -0,0 +1,17 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile { target { c || c++11 } } } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test raw strings.  */
3f7692
+
3f7692
+const char *s1 = R"(a b c LRE‪ 1 2 3)";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s2 = R"(a b c RLE‫ 1 2 3)";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s3 = R"(a b c LRO‭ 1 2 3)";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s4 = R"(a b c FSI⁨ 1 2 3)";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s5 = R"(a b c LRI⁦ 1 2 3)";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s6 = R"(a b c RLI⁧ 1 2 3)";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
3f7692
new file mode 100644
3f7692
index 00000000000..ba5f75d9553
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
3f7692
@@ -0,0 +1,38 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
3f7692
+   or RLOs.  */
3f7692
+
3f7692
+/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩
3f7692
+// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩
3f7692
+// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩
3f7692
+// FSI_⁨_RLO_‮_PDI_⁩
3f7692
+// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
3f7692
+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int PDI_\u2069;
3f7692
+int LRI_\u2066_PDI_\u2069;
3f7692
+int RLI_\u2067_PDI_\u2069;
3f7692
+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
3f7692
+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
3f7692
+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLO_\u202e_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
3f7692
+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
3f7692
new file mode 100644
3f7692
index 00000000000..a0ce8ff5e2c
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
3f7692
@@ -0,0 +1,59 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test unpaired bidi control chars in multiline comments.  */
3f7692
+
3f7692
+/*
3f7692
+ * LRE‪ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/*
3f7692
+ * RLE‫ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/*
3f7692
+ * LRO‭ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/*
3f7692
+ * RLO‮ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/*
3f7692
+ * LRI⁦ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/*
3f7692
+ * RLI⁧ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/*
3f7692
+ * FSI⁨ end
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/* LRE‪
3f7692
+   PDF‬ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/* FSI⁨
3f7692
+   PDI⁩ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+
3f7692
+/* LRE<‪>
3f7692
+ *
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
3f7692
+
3f7692
+/*
3f7692
+ * LRE<‪>
3f7692
+ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+
3f7692
+/*
3f7692
+ *
3f7692
+ * LRE<‪> */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* RLI<⁧> */ /* PDI<⁩> */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* LRE<‪> */ /* PDF<‬> */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
3f7692
new file mode 100644
3f7692
index 00000000000..baa0159861c
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
3f7692
@@ -0,0 +1,26 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=any" } */
3f7692
+/* Test LTR/RTL chars.  */
3f7692
+
3f7692
+/* LTR<‎> */
3f7692
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
3f7692
+// LTR<‎>
3f7692
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
3f7692
+/* RTL<‏> */
3f7692
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
3f7692
+// RTL<‏>
3f7692
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+const char *s1 = "LTR<‎>";
3f7692
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
3f7692
+const char *s2 = "LTR\u200e";
3f7692
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
3f7692
+const char *s3 = "LTR\u200E";
3f7692
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
3f7692
+const char *s4 = "RTL<‏>";
3f7692
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
3f7692
+const char *s5 = "RTL\u200f";
3f7692
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
3f7692
+const char *s6 = "RTL\u200F";
3f7692
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
3f7692
new file mode 100644
3f7692
index 00000000000..07cb4321f96
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
3f7692
@@ -0,0 +1,30 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test LTR/RTL chars.  */
3f7692
+
3f7692
+/* LTR<‎> */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// LTR<‎>
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* RTL<‏> */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// RTL<‏>
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int ltr_\u200e;
3f7692
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
3f7692
+int rtl_\u200f;
3f7692
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+const char *s1 = "LTR<‎>";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s2 = "LTR\u200e";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s3 = "LTR\u200E";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s4 = "RTL<‏>";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s5 = "RTL\u200f";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s6 = "RTL\u200F";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
3f7692
new file mode 100644
3f7692
index 00000000000..2340374f276
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
3f7692
@@ -0,0 +1,9 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+
3f7692
+int main() {
3f7692
+    /* Say hello; newline⁧/*/ return 0 ;
3f7692
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
3f7692
+    __builtin_printf("Hello world.\n");
3f7692
+    return 0;
3f7692
+}
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
3f7692
new file mode 100644
3f7692
index 00000000000..9dc7edb6e64
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
3f7692
@@ -0,0 +1,11 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+
3f7692
+int main() {
3f7692
+    const char* access_level = "user";
3f7692
+    if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) {
3f7692
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
3f7692
+        __builtin_printf("You are an admin.\n");
3f7692
+    }
3f7692
+    return 0;
3f7692
+}
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
3f7692
new file mode 100644
3f7692
index 00000000000..49f856b9bfe
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
3f7692
@@ -0,0 +1,172 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
3f7692
+/* Test all bidi chars in various contexts (identifiers, comments,
3f7692
+   string literals, character constants), both UCN and UTF-8.  The bidi
3f7692
+   chars here are properly terminated, except for the character constants.  */
3f7692
+
3f7692
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
3f7692
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
3f7692
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* Same but C++ comments instead.  */
3f7692
+// a b c LRE‪ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLE‫ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+// a b c LRO‭ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLO‮ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
3f7692
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
3f7692
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
3f7692
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* Here we're closing an unopened context, warn when =any.  */
3f7692
+/* a b c PDI⁩ x y z */
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
3f7692
+/* a b c PDF‬ x y z */
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
+// a b c PDI⁩ x y z
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
3f7692
+// a b c PDF‬ x y z
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* Multiline comments.  */
3f7692
+/* a b c PDI⁩ x y z
3f7692
+   */
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
3f7692
+/* a b c PDF‬ x y z
3f7692
+   */
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
3f7692
+/* first
3f7692
+   a b c PDI⁩ x y z
3f7692
+   */
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
3f7692
+/* first
3f7692
+   a b c PDF‬ x y z
3f7692
+   */
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
3f7692
+/* first
3f7692
+   a b c PDI⁩ x y z */
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
3f7692
+/* first
3f7692
+   a b c PDF‬ x y z */
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+void
3f7692
+g1 ()
3f7692
+{
3f7692
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
3f7692
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
3f7692
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+  const char *s8 = "a b c PDI⁩ x y z";
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
3f7692
+  const char *s9 = "a b c PDF‬ x y z";
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
3f7692
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
3f7692
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+}
3f7692
+
3f7692
+void
3f7692
+g2 ()
3f7692
+{
3f7692
+  const char c1 = '\u202a';
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+  const char c2 = '\u202A';
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+  const char c3 = '\u202b';
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+  const char c4 = '\u202B';
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+  const char c5 = '\u202d';
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+  const char c6 = '\u202D';
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+  const char c7 = '\u202e';
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+  const char c8 = '\u202E';
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+  const char c9 = '\u2066';
3f7692
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
3f7692
+  const char c10 = '\u2067';
3f7692
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
3f7692
+  const char c11 = '\u2068';
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+}
3f7692
+
3f7692
+int A\u202cY;
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
+int A\u202CY2;
3f7692
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+int d\u202ae\u202cf;
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+int d\u202Ae\u202cf2;
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+int d\u202be\u202cf;
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+int d\u202Be\u202cf2;
3f7692
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
3f7692
+int d\u202de\u202cf;
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+int d\u202De\u202cf2;
3f7692
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
3f7692
+int d\u202ee\u202cf;
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+int d\u202Ee\u202cf2;
3f7692
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
3f7692
+int d\u2066e\u2069f;
3f7692
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
3f7692
+int d\u2067e\u2069f;
3f7692
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
3f7692
+int d\u2068e\u2069f;
3f7692
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
3f7692
+int X\u2069;
3f7692
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
3f7692
new file mode 100644
3f7692
index 00000000000..f5776806c79
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
3f7692
@@ -0,0 +1,172 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
3f7692
+/* Test all bidi chars in various contexts (identifiers, comments,
3f7692
+   string literals, character constants), both UCN and UTF-8.  The bidi
3f7692
+   chars here are properly terminated, except for the character constants.  */
3f7692
+
3f7692
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* Same but C++ comments instead.  */
3f7692
+// a b c LRE‪ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLE‫ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c LRO‭ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLO‮ 1 2 3 PDF‬ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* Here we're closing an unopened context, warn when =any.  */
3f7692
+/* a b c PDI⁩ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c PDF‬ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c PDI⁩ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c PDF‬ x y z
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* Multiline comments.  */
3f7692
+/* a b c PDI⁩ x y z
3f7692
+   */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/* a b c PDF‬ x y z
3f7692
+   */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/* first
3f7692
+   a b c PDI⁩ x y z
3f7692
+   */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/* first
3f7692
+   a b c PDF‬ x y z
3f7692
+   */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
3f7692
+/* first
3f7692
+   a b c PDI⁩ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* first
3f7692
+   a b c PDF‬ x y z */
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+void
3f7692
+g1 ()
3f7692
+{
3f7692
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s8 = "a b c PDI⁩ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s9 = "a b c PDF‬ x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+}
3f7692
+
3f7692
+void
3f7692
+g2 ()
3f7692
+{
3f7692
+  const char c1 = '\u202a';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c2 = '\u202A';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c3 = '\u202b';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c4 = '\u202B';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c5 = '\u202d';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c6 = '\u202D';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c7 = '\u202e';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c8 = '\u202E';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c9 = '\u2066';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c10 = '\u2067';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char c11 = '\u2068';
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+}
3f7692
+
3f7692
+int A\u202cY;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int A\u202CY2;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+int d\u202ae\u202cf;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202Ae\u202cf2;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202be\u202cf;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202Be\u202cf2;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202de\u202cf;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202De\u202cf2;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202ee\u202cf;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u202Ee\u202cf2;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u2066e\u2069f;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u2067e\u2069f;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int d\u2068e\u2069f;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int X\u2069;
3f7692
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
3f7692
new file mode 100644
3f7692
index 00000000000..a65d6faf60e
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
3f7692
@@ -0,0 +1,130 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test nesting of bidi chars in various contexts.  */
3f7692
+
3f7692
+/* Terminated by the wrong char:  */
3f7692
+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* a b c FSI⁨ 1 2 3 PDF‬ x y  z*/
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+/* LRE‪ PDF‬ */
3f7692
+/* LRE‪ LRE‪ PDF‬ PDF‬ */
3f7692
+/* PDF‬ LRE‪ PDF‬ */
3f7692
+/* LRE‪ PDF‬ LRE‪ PDF‬ */
3f7692
+/* LRE‪ LRE‪ PDF‬ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* PDF‬ LRE‪ */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+// a b c LRE‪ 1 2 3 PDI⁩ x y z
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c LRO‭ 1 2 3 PDI⁩ x y z 
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLO‮ 1 2 3 PDI⁩ x y z 
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c LRI⁦ 1 2 3 PDF‬ x y z 
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c RLI⁧ 1 2 3 PDF‬ x y z 
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// a b c FSI⁨ 1 2 3 PDF‬ x y  z
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+// LRE‪ PDF‬ 
3f7692
+// LRE‪ LRE‪ PDF‬ PDF‬
3f7692
+// PDF‬ LRE‪ PDF‬
3f7692
+// LRE‪ PDF‬ LRE‪ PDF‬
3f7692
+// LRE‪ LRE‪ PDF‬
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+// PDF‬ LRE‪
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+void
3f7692
+g1 ()
3f7692
+{
3f7692
+  const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y ";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\
3f7692
+    ";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
3f7692
+  const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s15 = "PDF‬ LRE‪";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s16 = "PDF\u202c LRE\u202a";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s17 = "LRE‪ PDF‬";
3f7692
+  const char *s18 = "LRE\u202a PDF\u202c";
3f7692
+  const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬";
3f7692
+  const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
3f7692
+  const char *s21 = "PDF‬ LRE‪ PDF‬";
3f7692
+  const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
3f7692
+  const char *s23 = "LRE‪ LRE‪ PDF‬";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s25 = "PDF‬ LRE‪";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s26 = "PDF\u202c LRE\u202a";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s27 = "PDF‬ LRE\u202a";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+  const char *s28 = "PDF\u202c LRE‪";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+}
3f7692
+
3f7692
+int A\u202aB\u2069C;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int a\u202bB\u2069c;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int a\u202db\u2069c2;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int a\u202eb\u2069;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int a\u2066b\u202c;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int a\u2067b\u202c;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int a\u2068b\u202c;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int A\u202aB\u202c;
3f7692
+int A\u202aA\u202aB\u202cB\u202c;
3f7692
+int a_\u202C_\u202a_\u202c;
3f7692
+int a_\u202a_\u202c_\u202a_\u202c_;
3f7692
+int a_\u202a_\u202c_\u202a_;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
3f7692
new file mode 100644
3f7692
index 00000000000..d012d420ec0
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
3f7692
@@ -0,0 +1,9 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=any" } */
3f7692
+/* Test we ignore UCNs in comments.  */
3f7692
+
3f7692
+// a b c \u202a 1 2 3
3f7692
+// a b c \u202A 1 2 3
3f7692
+/* a b c \u202a 1 2 3 */
3f7692
+/* a b c \u202A 1 2 3 */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
3f7692
new file mode 100644
3f7692
index 00000000000..4f54c5092ec
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
3f7692
@@ -0,0 +1,13 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=any" } */
3f7692
+/* Test \u vs \U.  */
3f7692
+
3f7692
+int a_\u202A;
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+int a_\u202a_2;
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+int a_\U0000202A_3;
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
+int a_\U0000202a_4;
3f7692
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
3f7692
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
3f7692
new file mode 100644
3f7692
index 00000000000..e2af1b1ca97
3f7692
--- /dev/null
3f7692
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
3f7692
@@ -0,0 +1,29 @@
3f7692
+/* PR preprocessor/103026 */
3f7692
+/* { dg-do compile } */
3f7692
+/* { dg-options "-Wbidi-chars=unpaired" } */
3f7692
+/* Test that we properly separate bidi contexts (comment/identifier/character
3f7692
+   constant/string literal).  */
3f7692
+
3f7692
+/* LRE ->‪<- */ int pdf_\u202c_1;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* RLE ->‫<- */ int pdf_\u202c_2;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* LRO ->‭<- */ int pdf_\u202c_3;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* RLO ->‮<- */ int pdf_\u202c_4;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* LRI ->⁦<-*/ int pdi_\u2069_1;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* RLI ->⁧<- */ int pdi_\u2069_12;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* FSI ->⁨<- */ int pdi_\u2069_3;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+
3f7692
+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+/* LRE ->‪<- */ const char *s2 = "PDF\u202c";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
+int lre_\u202a; const char *s4 = "PDF\u202c";
3f7692
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
3f7692
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
3f7692
index 3ad52d5e01e..e0dcb7f0529 100644
3f7692
--- a/libcpp/include/cpplib.h
3f7692
+++ b/libcpp/include/cpplib.h
3f7692
@@ -305,6 +305,17 @@ enum cpp_normalize_level {
3f7692
   normalized_none
3f7692
 };
3f7692
 
3f7692
+/* The possible bidirectional control characters checking levels, from least
3f7692
+   restrictive to most.  */
3f7692
+enum cpp_bidirectional_level {
3f7692
+  /* No checking.  */
3f7692
+  bidirectional_none,
3f7692
+  /* Only detect unpaired uses of bidirectional control characters.  */
3f7692
+  bidirectional_unpaired,
3f7692
+  /* Detect any use of bidirectional control characters.  */
3f7692
+  bidirectional_any
3f7692
+};
3f7692
+
3f7692
 /* This structure is nested inside struct cpp_reader, and
3f7692
    carries all the options visible to the command line.  */
3f7692
 struct cpp_options
3f7692
@@ -506,6 +517,10 @@ struct cpp_options
3f7692
   /* True if warn about differences between C++98 and C++11.  */
3f7692
   bool cpp_warn_cxx11_compat;
3f7692
 
3f7692
+  /* Nonzero if bidirectional control characters checking is on.  See enum
3f7692
+     cpp_bidirectional_level.  */
3f7692
+  unsigned char cpp_warn_bidirectional;
3f7692
+
3f7692
   /* Dependency generation.  */
3f7692
   struct
3f7692
   {
3f7692
@@ -1063,7 +1078,8 @@ enum {
3f7692
   CPP_W_PEDANTIC,
3f7692
   CPP_W_C90_C99_COMPAT,
3f7692
   CPP_W_CXX11_COMPAT,
3f7692
-  CPP_W_EXPANSION_TO_DEFINED
3f7692
+  CPP_W_EXPANSION_TO_DEFINED,
3f7692
+  CPP_W_BIDIRECTIONAL
3f7692
 };
3f7692
 
3f7692
 /* Output a diagnostic of some kind.  */
3f7692
diff --git a/libcpp/init.c b/libcpp/init.c
3f7692
index ca3fbaa5c05..5c15da82ff8 100644
3f7692
--- a/libcpp/init.c
3f7692
+++ b/libcpp/init.c
3f7692
@@ -208,6 +208,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
3f7692
       = ENABLE_CANONICAL_SYSTEM_HEADERS;
3f7692
   CPP_OPTION (pfile, ext_numeric_literals) = 1;
3f7692
   CPP_OPTION (pfile, warn_date_time) = 0;
3f7692
+  CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
3f7692
 
3f7692
   /* Default CPP arithmetic to something sensible for the host for the
3f7692
      benefit of dumb users like fix-header.  */
3f7692
diff --git a/libcpp/internal.h b/libcpp/internal.h
3f7692
index 4f74f995cec..53b4c0f4af7 100644
3f7692
--- a/libcpp/internal.h
3f7692
+++ b/libcpp/internal.h
3f7692
@@ -576,6 +576,13 @@ struct cpp_reader
3f7692
   /* If non-null, the lexer will use this location for the next token
3f7692
      instead of getting a location from the linemap.  */
3f7692
   source_location *forced_token_location_p;
3f7692
+
3f7692
+  /* Returns true iff we should warn about UTF-8 bidirectional control
3f7692
+     characters.  */
3f7692
+  bool warn_bidi_p () const
3f7692
+  {
3f7692
+    return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
3f7692
+  }
3f7692
 };
3f7692
 
3f7692
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.
3f7692
diff --git a/libcpp/lex.c b/libcpp/lex.c
3f7692
index a408f912c5c..ea7f75e842e 100644
3f7692
--- a/libcpp/lex.c
3f7692
+++ b/libcpp/lex.c
3f7692
@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
3f7692
     }
3f7692
 }
3f7692
 
3f7692
+namespace bidi {
3f7692
+  enum kind {
3f7692
+    NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
3f7692
+  };
3f7692
+
3f7692
+  /* All the UTF-8 encodings of bidi characters start with E2.  */
3f7692
+  const uchar utf8_start = 0xe2;
3f7692
+
3f7692
+  /* A vector holding currently open bidi contexts.  We use a char for
3f7692
+     each context, its LSB is 1 if it represents a PDF context, 0 if it
3f7692
+     represents a PDI context.  The next bit is 1 if this context was open
3f7692
+     by a bidi character written as a UCN, and 0 when it was UTF-8.  */
3f7692
+  semi_embedded_vec <unsigned char, 16> vec;
3f7692
+
3f7692
+  /* Close the whole comment/identifier/string literal/character constant
3f7692
+     context.  */
3f7692
+  void on_close ()
3f7692
+  {
3f7692
+    vec.truncate (0);
3f7692
+  }
3f7692
+
3f7692
+  /* Pop the last element in the vector.  */
3f7692
+  void pop ()
3f7692
+  {
3f7692
+    unsigned int len = vec.count ();
3f7692
+    gcc_checking_assert (len > 0);
3f7692
+    vec.truncate (len - 1);
3f7692
+  }
3f7692
+
3f7692
+  /* Return the context of the Ith element.  */
3f7692
+  kind ctx_at (unsigned int i)
3f7692
+  {
3f7692
+    return (vec[i] & 1) ? PDF : PDI;
3f7692
+  }
3f7692
+
3f7692
+  /* Return which context is currently opened.  */
3f7692
+  kind current_ctx ()
3f7692
+  {
3f7692
+    unsigned int len = vec.count ();
3f7692
+    if (len == 0)
3f7692
+      return NONE;
3f7692
+    return ctx_at (len - 1);
3f7692
+  }
3f7692
+
3f7692
+  /* Return true if the current context comes from a UCN origin, that is,
3f7692
+     the bidi char which started this bidi context was written as a UCN.  */
3f7692
+  bool current_ctx_ucn_p ()
3f7692
+  {
3f7692
+    unsigned int len = vec.count ();
3f7692
+    gcc_checking_assert (len > 0);
3f7692
+    return (vec[len - 1] >> 1) & 1;
3f7692
+  }
3f7692
+
3f7692
+  /* We've read a bidi char, update the current vector as necessary.  */
3f7692
+  void on_char (kind k, bool ucn_p)
3f7692
+  {
3f7692
+    switch (k)
3f7692
+      {
3f7692
+      case LRE:
3f7692
+      case RLE:
3f7692
+      case LRO:
3f7692
+      case RLO:
3f7692
+	vec.push (ucn_p ? 3u : 1u);
3f7692
+	break;
3f7692
+      case LRI:
3f7692
+      case RLI:
3f7692
+      case FSI:
3f7692
+	vec.push (ucn_p ? 2u : 0u);
3f7692
+	break;
3f7692
+      /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
3f7692
+	 whose scope has not yet been terminated.  */
3f7692
+      case PDF:
3f7692
+	if (current_ctx () == PDF)
3f7692
+	  pop ();
3f7692
+	break;
3f7692
+      /* PDI terminates the scope of the last LRI, RLI, or FSI whose
3f7692
+	 scope has not yet been terminated, as well as the scopes of
3f7692
+	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
3f7692
+	 yet been terminated.  */
3f7692
+      case PDI:
3f7692
+	for (int i = vec.count () - 1; i >= 0; --i)
3f7692
+	  if (ctx_at (i) == PDI)
3f7692
+	    {
3f7692
+	      vec.truncate (i);
3f7692
+	      break;
3f7692
+	    }
3f7692
+	break;
3f7692
+      case LTR:
3f7692
+      case RTL:
3f7692
+	/* These aren't popped by a PDF/PDI.  */
3f7692
+	break;
3f7692
+      [[likely]] case NONE:
3f7692
+	break;
3f7692
+      default:
3f7692
+	abort ();
3f7692
+      }
3f7692
+  }
3f7692
+
3f7692
+  /* Return a descriptive string for K.  */
3f7692
+  const char *to_str (kind k)
3f7692
+  {
3f7692
+    switch (k)
3f7692
+      {
3f7692
+      case LRE:
3f7692
+	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
3f7692
+      case RLE:
3f7692
+	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
3f7692
+      case LRO:
3f7692
+	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
3f7692
+      case RLO:
3f7692
+	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
3f7692
+      case LRI:
3f7692
+	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
3f7692
+      case RLI:
3f7692
+	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
3f7692
+      case FSI:
3f7692
+	return "U+2068 (FIRST STRONG ISOLATE)";
3f7692
+      case PDF:
3f7692
+	return "U+202C (POP DIRECTIONAL FORMATTING)";
3f7692
+      case PDI:
3f7692
+	return "U+2069 (POP DIRECTIONAL ISOLATE)";
3f7692
+      case LTR:
3f7692
+	return "U+200E (LEFT-TO-RIGHT MARK)";
3f7692
+      case RTL:
3f7692
+	return "U+200F (RIGHT-TO-LEFT MARK)";
3f7692
+      default:
3f7692
+	abort ();
3f7692
+      }
3f7692
+  }
3f7692
+}
3f7692
+
3f7692
+/* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
3f7692
+
3f7692
+static bidi::kind
3f7692
+get_bidi_utf8 (const unsigned char *const p)
3f7692
+{
3f7692
+  gcc_checking_assert (p[0] == bidi::utf8_start);
3f7692
+
3f7692
+  if (p[1] == 0x80)
3f7692
+    switch (p[2])
3f7692
+      {
3f7692
+      case 0xaa:
3f7692
+	return bidi::LRE;
3f7692
+      case 0xab:
3f7692
+	return bidi::RLE;
3f7692
+      case 0xac:
3f7692
+	return bidi::PDF;
3f7692
+      case 0xad:
3f7692
+	return bidi::LRO;
3f7692
+      case 0xae:
3f7692
+	return bidi::RLO;
3f7692
+      case 0x8e:
3f7692
+	return bidi::LTR;
3f7692
+      case 0x8f:
3f7692
+	return bidi::RTL;
3f7692
+      default:
3f7692
+	break;
3f7692
+      }
3f7692
+  else if (p[1] == 0x81)
3f7692
+    switch (p[2])
3f7692
+      {
3f7692
+      case 0xa6:
3f7692
+	return bidi::LRI;
3f7692
+      case 0xa7:
3f7692
+	return bidi::RLI;
3f7692
+      case 0xa8:
3f7692
+	return bidi::FSI;
3f7692
+      case 0xa9:
3f7692
+	return bidi::PDI;
3f7692
+      default:
3f7692
+	break;
3f7692
+      }
3f7692
+
3f7692
+  return bidi::NONE;
3f7692
+}
3f7692
+
3f7692
+/* Parse a UCN where P points just past \u or \U and return its bidi code.  */
3f7692
+
3f7692
+static bidi::kind
3f7692
+get_bidi_ucn (const unsigned char *p, bool is_U)
3f7692
+{
3f7692
+  /* 6.4.3 Universal Character Names
3f7692
+      \u hex-quad
3f7692
+      \U hex-quad hex-quad
3f7692
+     where \unnnn means \U0000nnnn.  */
3f7692
+
3f7692
+  if (is_U)
3f7692
+    {
3f7692
+      if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
3f7692
+	return bidi::NONE;
3f7692
+      /* Skip 4B so we can treat \u and \U the same below.  */
3f7692
+      p += 4;
3f7692
+    }
3f7692
+
3f7692
+  /* All code points we are looking for start with 20xx.  */
3f7692
+  if (p[0] != '2' || p[1] != '0')
3f7692
+    return bidi::NONE;
3f7692
+  else if (p[2] == '2')
3f7692
+    switch (p[3])
3f7692
+      {
3f7692
+      case 'a':
3f7692
+      case 'A':
3f7692
+	return bidi::LRE;
3f7692
+      case 'b':
3f7692
+      case 'B':
3f7692
+	return bidi::RLE;
3f7692
+      case 'c':
3f7692
+      case 'C':
3f7692
+	return bidi::PDF;
3f7692
+      case 'd':
3f7692
+      case 'D':
3f7692
+	return bidi::LRO;
3f7692
+      case 'e':
3f7692
+      case 'E':
3f7692
+	return bidi::RLO;
3f7692
+      default:
3f7692
+	break;
3f7692
+      }
3f7692
+  else if (p[2] == '6')
3f7692
+    switch (p[3])
3f7692
+      {
3f7692
+      case '6':
3f7692
+	return bidi::LRI;
3f7692
+      case '7':
3f7692
+	return bidi::RLI;
3f7692
+      case '8':
3f7692
+	return bidi::FSI;
3f7692
+      case '9':
3f7692
+	return bidi::PDI;
3f7692
+      default:
3f7692
+	break;
3f7692
+      }
3f7692
+  else if (p[2] == '0')
3f7692
+    switch (p[3])
3f7692
+      {
3f7692
+      case 'e':
3f7692
+      case 'E':
3f7692
+	return bidi::LTR;
3f7692
+      case 'f':
3f7692
+      case 'F':
3f7692
+	return bidi::RTL;
3f7692
+      default:
3f7692
+	break;
3f7692
+      }
3f7692
+
3f7692
+  return bidi::NONE;
3f7692
+}
3f7692
+
3f7692
+/* We're closing a bidi context, that is, we've encountered a newline,
3f7692
+   are closing a C-style comment, or are at the end of a string literal,
3f7692
+   character constant, or identifier.  Warn if this context was not
3f7692
+   properly terminated by a PDI or PDF.  P points to the last character
3f7692
+   in this context.  */
3f7692
+
3f7692
+static void
3f7692
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
3f7692
+{
3f7692
+  if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
3f7692
+      && bidi::vec.count () > 0)
3f7692
+    {
3f7692
+      const source_location loc
3f7692
+	= linemap_position_for_column (pfile->line_table,
3f7692
+				       CPP_BUF_COLUMN (pfile->buffer, p));
3f7692
+      cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
3f7692
+			     "unpaired UTF-8 bidirectional control character "
3f7692
+			     "detected");
3f7692
+    }
3f7692
+  /* We're done with this context.  */
3f7692
+  bidi::on_close ();
3f7692
+}
3f7692
+
3f7692
+/* We're at the beginning or in the middle of an identifier/comment/string
3f7692
+   literal/character constant.  Warn if we've encountered a bidi character.
3f7692
+   KIND says which bidi character it was; P points to it in the character
3f7692
+   stream.  UCN_P is true iff this bidi character was written as a UCN.  */
3f7692
+
3f7692
+static void
3f7692
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
3f7692
+			 bool ucn_p)
3f7692
+{
3f7692
+  if (__builtin_expect (kind == bidi::NONE, 1))
3f7692
+    return;
3f7692
+
3f7692
+  const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
3f7692
+
3f7692
+  if (warn_bidi != bidirectional_none)
3f7692
+    {
3f7692
+      const source_location loc
3f7692
+	= linemap_position_for_column (pfile->line_table,
3f7692
+				       CPP_BUF_COLUMN (pfile->buffer, p));
3f7692
+      /* It seems excessive to warn about a PDI/PDF that is closing
3f7692
+	 an opened context because we've already warned about the
3f7692
+	 opening character.  Except warn when we have a UCN x UTF-8
3f7692
+	 mismatch.  */
3f7692
+      if (kind == bidi::current_ctx ())
3f7692
+	{
3f7692
+	  if (warn_bidi == bidirectional_unpaired
3f7692
+	      && bidi::current_ctx_ucn_p () != ucn_p)
3f7692
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
3f7692
+				   "UTF-8 vs UCN mismatch when closing "
3f7692
+				   "a context by \"%s\"", bidi::to_str (kind));
3f7692
+	}
3f7692
+      else if (warn_bidi == bidirectional_any)
3f7692
+	{
3f7692
+	  if (kind == bidi::PDF || kind == bidi::PDI)
3f7692
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
3f7692
+				   "\"%s\" is closing an unopened context",
3f7692
+				   bidi::to_str (kind));
3f7692
+	  else
3f7692
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
3f7692
+				   "found problematic Unicode character \"%s\"",
3f7692
+				   bidi::to_str (kind));
3f7692
+	}
3f7692
+    }
3f7692
+  /* We're done with this context.  */
3f7692
+  bidi::on_char (kind, ucn_p);
3f7692
+}
3f7692
+
3f7692
 /* Skip a C-style block comment.  We find the end of the comment by
3f7692
    seeing if an asterisk is before every '/' we encounter.  Returns
3f7692
    nonzero if comment terminated by EOF, zero otherwise.
3f7692
@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile)
3f7692
   cpp_buffer *buffer = pfile->buffer;
3f7692
   const uchar *cur = buffer->cur;
3f7692
   uchar c;
3f7692
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
3f7692
 
3f7692
   cur++;
3f7692
   if (*cur == '/')
3f7692
@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
3f7692
       if (c == '/')
3f7692
 	{
3f7692
 	  if (cur[-2] == '*')
3f7692
-	    break;
3f7692
+	    {
3f7692
+	      if (warn_bidi_p)
3f7692
+		maybe_warn_bidi_on_close (pfile, cur);
3f7692
+	      break;
3f7692
+	    }
3f7692
 
3f7692
 	  /* Warn about potential nested comments, but not if the '/'
3f7692
 	     comes immediately before the true comment delimiter.
3f7692
@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
3f7692
 	{
3f7692
 	  unsigned int cols;
3f7692
 	  buffer->cur = cur - 1;
3f7692
+	  if (warn_bidi_p)
3f7692
+	    maybe_warn_bidi_on_close (pfile, cur);
3f7692
 	  _cpp_process_line_notes (pfile, true);
3f7692
 	  if (buffer->next_line >= buffer->rlimit)
3f7692
 	    return true;
3f7692
@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
3f7692
 
3f7692
 	  cur = buffer->cur;
3f7692
 	}
3f7692
+      /* If this is a beginning of a UTF-8 encoding, it might be
3f7692
+	 a bidirectional control character.  */
3f7692
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
3f7692
+	{
3f7692
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
3f7692
+	  maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
3f7692
+	}
3f7692
     }
3f7692
 
3f7692
   buffer->cur = cur;
3f7692
@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
3f7692
 {
3f7692
   cpp_buffer *buffer = pfile->buffer;
3f7692
   source_location orig_line = pfile->line_table->highest_line;
3f7692
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
3f7692
 
3f7692
-  while (*buffer->cur != '\n')
3f7692
-    buffer->cur++;
3f7692
+  if (!warn_bidi_p)
3f7692
+    while (*buffer->cur != '\n')
3f7692
+      buffer->cur++;
3f7692
+  else
3f7692
+    {
3f7692
+      while (*buffer->cur != '\n'
3f7692
+	     && *buffer->cur != bidi::utf8_start)
3f7692
+	buffer->cur++;
3f7692
+      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
3f7692
+	{
3f7692
+	  while (*buffer->cur != '\n')
3f7692
+	    {
3f7692
+	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
3f7692
+		{
3f7692
+		  bidi::kind kind = get_bidi_utf8 (buffer->cur);
3f7692
+		  maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
3f7692
+					   /*ucn_p=*/false);
3f7692
+		}
3f7692
+	      buffer->cur++;
3f7692
+	    }
3f7692
+	  maybe_warn_bidi_on_close (pfile, buffer->cur);
3f7692
+	}
3f7692
+    }
3f7692
 
3f7692
   _cpp_process_line_notes (pfile, true);
3f7692
   return orig_line != pfile->line_table->highest_line;
3f7692
@@ -1315,11 +1669,13 @@ warn_about_normalization (cpp_reader *pfile,
3f7692
 
3f7692
 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
3f7692
    an identifier.  FIRST is TRUE if this starts an identifier.  */
3f7692
+
3f7692
 static bool
3f7692
 forms_identifier_p (cpp_reader *pfile, int first,
3f7692
 		    struct normalize_state *state)
3f7692
 {
3f7692
   cpp_buffer *buffer = pfile->buffer;
3f7692
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
3f7692
 
3f7692
   if (*buffer->cur == '$')
3f7692
     {
3f7692
@@ -1343,6 +1699,12 @@ forms_identifier_p (cpp_reader *pfile, int first,
3f7692
     {
3f7692
       cppchar_t s;
3f7692
       buffer->cur += 2;
3f7692
+      if (warn_bidi_p)
3f7692
+	{
3f7692
+	  bidi::kind kind = get_bidi_ucn (buffer->cur,
3f7692
+					  buffer->cur[-1] == 'U');
3f7692
+	  maybe_warn_bidi_on_char (pfile, buffer->cur, kind, /*ucn_p=*/true);
3f7692
+	}
3f7692
       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
3f7692
 			  state, &s, NULL, NULL))
3f7692
 	return true;
3f7692
@@ -1450,6 +1812,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
3f7692
   const uchar *cur;
3f7692
   unsigned int len;
3f7692
   unsigned int hash = HT_HASHSTEP (0, *base);
3f7692
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
3f7692
 
3f7692
   cur = pfile->buffer->cur;
3f7692
   if (! starts_ucn)
3f7692
@@ -1472,6 +1835,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
3f7692
 	    pfile->buffer->cur++;
3f7692
 	  }
3f7692
       } while (forms_identifier_p (pfile, false, nst));
3f7692
+      if (warn_bidi_p)
3f7692
+	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
3f7692
       result = _cpp_interpret_identifier (pfile, base,
3f7692
 					  pfile->buffer->cur - base);
3f7692
       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
3f7692
@@ -1673,6 +2038,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
3f7692
   _cpp_buff *first_buff = NULL, *last_buff = NULL;
3f7692
   size_t raw_prefix_start;
3f7692
   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
3f7692
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
3f7692
 
3f7692
   type = (*base == 'L' ? CPP_WSTRING :
3f7692
 	  *base == 'U' ? CPP_STRING32 :
3f7692
@@ -1909,8 +2275,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
3f7692
 	  cur = base = pfile->buffer->cur;
3f7692
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
3f7692
 	}
3f7692
+      else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
3f7692
+	       && warn_bidi_p)
3f7692
+	maybe_warn_bidi_on_char (pfile, cur - 1, get_bidi_utf8 (cur - 1),
3f7692
+				 /*ucn_p=*/false);
3f7692
     }
3f7692
 
3f7692
+  if (warn_bidi_p)
3f7692
+    maybe_warn_bidi_on_close (pfile, cur);
3f7692
+
3f7692
   if (CPP_OPTION (pfile, user_literals))
3f7692
     {
3f7692
       /* If a string format macro, say from inttypes.h, is placed touching
3f7692
@@ -2005,15 +2378,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
3f7692
   else
3f7692
     terminator = '>', type = CPP_HEADER_NAME;
3f7692
 
3f7692
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
3f7692
   for (;;)
3f7692
     {
3f7692
       cppchar_t c = *cur++;
3f7692
 
3f7692
       /* In #include-style directives, terminators are not escapable.  */
3f7692
       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
3f7692
-	cur++;
3f7692
+	{
3f7692
+	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
3f7692
+	    {
3f7692
+	      bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
3f7692
+	      maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
3f7692
+	    }
3f7692
+	  cur++;
3f7692
+	}
3f7692
       else if (c == terminator)
3f7692
-	break;
3f7692
+	{
3f7692
+	  if (warn_bidi_p)
3f7692
+	    maybe_warn_bidi_on_close (pfile, cur - 1);
3f7692
+	  break;
3f7692
+	}
3f7692
       else if (c == '\n')
3f7692
 	{
3f7692
 	  cur--;
3f7692
@@ -2030,6 +2415,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
3f7692
 	}
3f7692
       else if (c == '\0')
3f7692
 	saw_NUL = true;
3f7692
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
3f7692
+	{
3f7692
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
3f7692
+	  maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
3f7692
+	}
3f7692
     }
3f7692
 
3f7692
   if (saw_NUL && !pfile->state.skipping)