Blame SOURCES/gcc11-add-Wbidirectional.patch

4c2713
From a241a9b727f03afe62a161a2662a0f1192fd523a Mon Sep 17 00:00:00 2001
4c2713
From: Marek Polacek <polacek@redhat.com>
4c2713
Date: Wed, 6 Oct 2021 14:33:59 -0400
4c2713
Subject: [PATCH] cpp: Implement -Wbidirectional=[none|unpaired|any]
4c2713
4c2713
This patch implements -Wbidirectional=[none|unpaired|any] to warn about
4c2713
possibly dangerous bidirectional characters.
4c2713
4c2713
gcc/c-family/ChangeLog:
4c2713
4c2713
	* c.opt (Wbidirectional, Wbidirectional=): New option.
4c2713
4c2713
gcc/ChangeLog:
4c2713
4c2713
	* doc/invoke.texi: Document -Wbidirectional.
4c2713
4c2713
libcpp/ChangeLog:
4c2713
4c2713
	* include/cpplib.h (enum cpp_bidirectional_level): New.
4c2713
	(struct cpp_options): Add cpp_warn_bidirectional.
4c2713
	(enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
4c2713
	* init.c (cpp_create_reader): Set cpp_warn_bidirectional.
4c2713
	* lex.c (bidi): New namespace.
4c2713
	(get_bidi_utf8): New function.
4c2713
	(get_bidi_ucn): Likewise.
4c2713
	(maybe_warn_bidi_on_close): Likewise.
4c2713
	(maybe_warn_bidi_on_char): Likewise.
4c2713
	(_cpp_skip_block_comment): Implement warning about bidirectional
4c2713
	characters.
4c2713
	(skip_line_comment): Likewise.
4c2713
	(forms_identifier_p): Likewise.
4c2713
	(lex_identifier): Likewise.
4c2713
	(lex_string): Likewise.
4c2713
	(lex_raw_string): Likewise.
4c2713
4c2713
gcc/testsuite/ChangeLog:
4c2713
4c2713
	* c-c++-common/Wbidirectional-1.c: New test.
4c2713
	* c-c++-common/Wbidirectional-2.c: New test.
4c2713
	* c-c++-common/Wbidirectional-3.c: New test.
4c2713
	* c-c++-common/Wbidirectional-4.c: New test.
4c2713
	* c-c++-common/Wbidirectional-5.c: New test.
4c2713
	* c-c++-common/Wbidirectional-6.c: New test.
4c2713
	* c-c++-common/Wbidirectional-7.c: New test.
4c2713
	* c-c++-common/Wbidirectional-8.c: New test.
4c2713
	* c-c++-common/Wbidirectional-9.c: New test.
4c2713
	* c-c++-common/Wbidirectional-10.c: New test.
4c2713
	* c-c++-common/Wbidirectional-11.c: New test.
4c2713
	* c-c++-common/Wbidirectional-12.c: New test.
4c2713
	* c-c++-common/Wbidirectional-13.c: New test.
4c2713
---
4c2713
 gcc/c-family/c.opt                            |  24 ++
4c2713
 gcc/doc/invoke.texi                           |  19 +-
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-1.c |  11 +
4c2713
 .../c-c++-common/Wbidirectional-10.c          |  27 ++
4c2713
 .../c-c++-common/Wbidirectional-11.c          |  12 +
4c2713
 .../c-c++-common/Wbidirectional-12.c          |  18 +
4c2713
 .../c-c++-common/Wbidirectional-13.c          |  16 +
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-2.c |   8 +
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-3.c |  10 +
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-4.c | 165 ++++++++
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-5.c | 165 ++++++++
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-6.c | 154 +++++++
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-7.c |   8 +
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-8.c |  12 +
4c2713
 gcc/testsuite/c-c++-common/Wbidirectional-9.c |  28 ++
4c2713
 libcpp/include/cpplib.h                       |  18 +-
4c2713
 libcpp/init.c                                 |   1 +
4c2713
 libcpp/lex.c                                  | 391 +++++++++++++++++-
4c2713
 18 files changed, 1072 insertions(+), 15 deletions(-)
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-1.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-10.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-11.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-12.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-13.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-2.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-3.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-4.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-5.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-6.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-7.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-8.c
4c2713
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-9.c
4c2713
4c2713
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
4c2713
index 06457ac739e..09391824676 100644
4c2713
--- a/gcc/c-family/c.opt
4c2713
+++ b/gcc/c-family/c.opt
4c2713
@@ -374,6 +374,30 @@ Wbad-function-cast
4c2713
 C ObjC Var(warn_bad_function_cast) Warning
4c2713
 Warn about casting functions to incompatible types.
4c2713
 
4c2713
+Wbidirectional
4c2713
+C ObjC C++ ObjC++ Warning Alias(Wbidirectional=,any,none)
4c2713
+;
4c2713
+
4c2713
+Wbidirectional=
4c2713
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
4c2713
+-Wbidirectional=[none|unpaired|any] Warn about UTF-8 bidirectional characters.
4c2713
+
4c2713
+; Required for these enum values.
4c2713
+SourceInclude
4c2713
+cpplib.h
4c2713
+
4c2713
+Enum
4c2713
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidirectional%> not recognized)
4c2713
+
4c2713
+EnumValue
4c2713
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
4c2713
+
4c2713
+EnumValue
4c2713
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
4c2713
+
4c2713
+EnumValue
4c2713
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
4c2713
+
4c2713
 Wbool-compare
4c2713
 C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
4c2713
 Warn about boolean expression compared with an integer value different from true/false.
4c2713
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
4c2713
index b64ec18ae46..e32858ce767 100644
4c2713
--- a/gcc/doc/invoke.texi
4c2713
+++ b/gcc/doc/invoke.texi
4c2713
@@ -325,7 +325,9 @@ Objective-C and Objective-C++ Dialects}.
4c2713
 -Warith-conversion @gol
4c2713
 -Warray-bounds  -Warray-bounds=@var{n} @gol
4c2713
 -Wno-attributes  -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
4c2713
--Wno-attribute-warning  -Wbool-compare  -Wbool-operation @gol
4c2713
+-Wno-attribute-warning  @gol
4c2713
+-Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
4c2713
+-Wbool-compare  -Wbool-operation @gol
4c2713
 -Wno-builtin-declaration-mismatch @gol
4c2713
 -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
4c2713
 -Wc11-c2x-compat @gol
4c2713
@@ -7557,6 +7559,21 @@ Attributes considered include @code{allo
4c2713
 This is the default.  You can disable these warnings with either
4c2713
 @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
4c2713
 
4c2713
+@item -Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]}
4c2713
+@opindex Wbidirectional=
4c2713
+@opindex Wbidirectional
4c2713
+@opindex Wno-bidirectional
4c2713
+Warn about UTF-8 bidirectional characters.  Such characters can change
4c2713
+left-to-right writing direction into right-to-left (and vice versa),
4c2713
+which can cause confusion between the logical order and visual order.
4c2713
+This may be dangerous; for instance, it may seem that a piece of code
4c2713
+is not commented out, whereas it in fact is.
4c2713
+
4c2713
+There are three levels of warning supported by GCC@.  The default is
4c2713
+@option{-Wbidirectional=unpaired}, which warns about improperly terminated
4c2713
+bidi contexts.  @option{-Wbidirectional=none} turns the warning off.
4c2713
+@option{-Wbidirectional=any} warns about any use of bidirectional characters.
4c2713
+
4c2713
 @item -Wbool-compare
4c2713
 @opindex Wno-bool-compare
4c2713
 @opindex Wbool-compare
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-1.c b/gcc/testsuite/c-c++-common/Wbidirectional-1.c
4c2713
new file mode 100644
4c2713
index 00000000000..750de81fdd8
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-1.c
4c2713
@@ -0,0 +1,11 @@
4c2713
+/* { dg-do compile } */
4c2713
+
4c2713
+int main() {
4c2713
+    int isAdmin = 0;
4c2713
+    /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
4c2713
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
4c2713
+        __builtin_printf("You are an admin.\n");
4c2713
+    /* end admins only ‮ { ⁦*/
4c2713
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
4c2713
+    return 0;
4c2713
+}
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-10.c b/gcc/testsuite/c-c++-common/Wbidirectional-10.c
4c2713
new file mode 100644
4c2713
index 00000000000..cd4abeeefbd
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-10.c
4c2713
@@ -0,0 +1,27 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=unpaired" } */
4c2713
+/* More nesting testing.  */
4c2713
+
4c2713
+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int LRE_\u202a_PDF_\u202c;
4c2713
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
4c2713
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
4c2713
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int FSI_\u2068;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int FSI_\u2068_PDI_\u2069;
4c2713
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
4c2713
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-11.c b/gcc/testsuite/c-c++-common/Wbidirectional-11.c
4c2713
new file mode 100644
4c2713
index 00000000000..43d699acc64
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-11.c
4c2713
@@ -0,0 +1,12 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=unpaired" } */
4c2713
+/* Test that we warn when mixing UCN and UTF-8.  */
4c2713
+
4c2713
+int LRE_‪_PDF_\u202c;
4c2713
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
4c2713
+int LRE_\u202a_PDF_‬_;
4c2713
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
4c2713
+const char *s1 = "LRE_‪_PDF_\u202c";
4c2713
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
4c2713
+const char *s2 = "LRE_\u202a_PDF_‬";
4c2713
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-12.c b/gcc/testsuite/c-c++-common/Wbidirectional-12.c
4c2713
new file mode 100644
4c2713
index 00000000000..20d1566401a
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-12.c
4c2713
@@ -0,0 +1,18 @@
4c2713
+/* { dg-do compile { target { c || c++11 } } } */
4c2713
+/* { dg-options "-Wbidirectional=any" } */
4c2713
+/* Test raw strings.  */
4c2713
+
4c2713
+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)";
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)";
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)";
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)";
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z";
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+const char *s8 = R"(a b c PDI⁩ x y )z";
4c2713
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
4c2713
+const char *s9 = R"(a b c PDF‬ x y z)";
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-13.c b/gcc/testsuite/c-c++-common/Wbidirectional-13.c
4c2713
new file mode 100644
4c2713
index 00000000000..08010e3b37b
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-13.c
4c2713
@@ -0,0 +1,16 @@
4c2713
+/* { dg-do compile { target { c || c++11 } } } */
4c2713
+/* { dg-options "-Wbidirectional=unpaired" } */
4c2713
+/* Test raw strings.  */
4c2713
+
4c2713
+const char *s1 = R"(a b c LRE‪ 1 2 3)";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+const char *s2 = R"(a b c RLE‫ 1 2 3)";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+const char *s3 = R"(a b c LRO‭ 1 2 3)";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+const char *s4 = R"(a b c FSI⁨ 1 2 3)";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+const char *s5 = R"(a b c LRI⁦ 1 2 3)";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+const char *s6 = R"(a b c RLI⁧ 1 2 3)";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-2.c b/gcc/testsuite/c-c++-common/Wbidirectional-2.c
4c2713
new file mode 100644
4c2713
index 00000000000..4e04202e058
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-2.c
4c2713
@@ -0,0 +1,8 @@
4c2713
+/* { dg-do compile } */
4c2713
+
4c2713
+int main() {
4c2713
+    /* Say hello; newline⁧/*/ return 0 ;
4c2713
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
4c2713
+    __builtin_printf("Hello world.\n");
4c2713
+    return 0;
4c2713
+}
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-3.c b/gcc/testsuite/c-c++-common/Wbidirectional-3.c
4c2713
new file mode 100644
4c2713
index 00000000000..921300e94e0
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-3.c
4c2713
@@ -0,0 +1,10 @@
4c2713
+/* { dg-do compile } */
4c2713
+
4c2713
+int main() {
4c2713
+    const char* access_level = "user";
4c2713
+    if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) {
4c2713
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
4c2713
+        __builtin_printf("You are an admin.\n");
4c2713
+    }
4c2713
+    return 0;
4c2713
+}
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-4.c b/gcc/testsuite/c-c++-common/Wbidirectional-4.c
4c2713
new file mode 100644
4c2713
index 00000000000..e6638aecc6a
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-4.c
4c2713
@@ -0,0 +1,165 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=any -Wno-multichar -Wno-overflow" } */
4c2713
+/* Test all bidi chars in various contexts (identifiers, comments,
4c2713
+   string literals, character constants), both UCN and UTF-8.  The bidi
4c2713
+   chars here are properly terminated, except for the character constants.  */
4c2713
+
4c2713
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+/* Same but C++ comments instead.  */
4c2713
+// a b c LRE‪ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLE‫ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+// a b c LRO‭ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLO‮ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+/* Here we're closing an unopened context, warn when =any.  */
4c2713
+/* a b c PDI⁩ x y z */
4c2713
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
4c2713
+/* a b c PDF‬ x y z */
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
+// a b c PDI⁩ x y z
4c2713
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
4c2713
+// a b c PDF‬ x y z
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+void
4c2713
+g1 ()
4c2713
+{
4c2713
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+  const char *s8 = "a b c PDI⁩ x y z";
4c2713
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
4c2713
+  const char *s9 = "a b c PDF‬ x y z";
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+}
4c2713
+
4c2713
+void
4c2713
+g2 ()
4c2713
+{
4c2713
+  const char c1 = '\u202a';
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+  const char c2 = '\u202A';
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+  const char c3 = '\u202b';
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+  const char c4 = '\u202B';
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+  const char c5 = '\u202d';
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+  const char c6 = '\u202D';
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+  const char c7 = '\u202e';
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+  const char c8 = '\u202E';
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+  const char c9 = '\u2066';
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+  const char c10 = '\u2067';
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+  const char c11 = '\u2068';
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+}
4c2713
+
4c2713
+int a‪b‬c;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+int a‫b‬c;
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+int a‭b‬c;
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+int a‮b‬c;
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+int a⁦b⁩c;
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+int a⁧b⁩c;
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+int a⁨b⁩c;
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+int A‬X;
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
+int A\u202cY;
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
+int A\u202CY2;
4c2713
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+int d\u202ae\u202cf;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+int d\u202Ae\u202cf2;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+int d\u202be\u202cf;
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+int d\u202Be\u202cf2;
4c2713
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
4c2713
+int d\u202de\u202cf;
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+int d\u202De\u202cf2;
4c2713
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
4c2713
+int d\u202ee\u202cf;
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+int d\u202Ee\u202cf2;
4c2713
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
4c2713
+int d\u2066e\u2069f;
4c2713
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
4c2713
+int d\u2067e\u2069f;
4c2713
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
4c2713
+int d\u2068e\u2069f;
4c2713
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
4c2713
+int X\u2069;
4c2713
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-5.c b/gcc/testsuite/c-c++-common/Wbidirectional-5.c
4c2713
new file mode 100644
4c2713
index 00000000000..45d3402c941
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-5.c
4c2713
@@ -0,0 +1,165 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=unpaired -Wno-multichar -Wno-overflow" } */
4c2713
+/* Test all bidi chars in various contexts (identifiers, comments,
4c2713
+   string literals, character constants), both UCN and UTF-8.  The bidi
4c2713
+   chars here are properly terminated, except for the character constants.  */
4c2713
+
4c2713
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+/* Same but C++ comments instead.  */
4c2713
+// a b c LRE‪ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLE‫ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c LRO‭ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLO‮ 1 2 3 PDF‬ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+/* Here we're closing an unopened context, warn when =any.  */
4c2713
+/* a b c PDI⁩ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c PDF‬ x y z */
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c PDI⁩ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c PDF‬ x y z
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+void
4c2713
+g1 ()
4c2713
+{
4c2713
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s8 = "a b c PDI⁩ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s9 = "a b c PDF‬ x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+}
4c2713
+
4c2713
+void
4c2713
+g2 ()
4c2713
+{
4c2713
+  const char c1 = '\u202a';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c2 = '\u202A';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c3 = '\u202b';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c4 = '\u202B';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c5 = '\u202d';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c6 = '\u202D';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c7 = '\u202e';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c8 = '\u202E';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c9 = '\u2066';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c10 = '\u2067';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char c11 = '\u2068';
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+}
4c2713
+
4c2713
+int a‪b‬c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a‫b‬c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a‭b‬c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a‮b‬c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a⁦b⁩c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a⁧b⁩c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a⁨b⁩c;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int A‬X;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int A\u202cY;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int A\u202CY2;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+int d\u202ae\u202cf;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202Ae\u202cf2;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202be\u202cf;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202Be\u202cf2;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202de\u202cf;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202De\u202cf2;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202ee\u202cf;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u202Ee\u202cf2;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u2066e\u2069f;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u2067e\u2069f;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int d\u2068e\u2069f;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int X\u2069;
4c2713
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-6.c b/gcc/testsuite/c-c++-common/Wbidirectional-6.c
4c2713
new file mode 100644
4c2713
index 00000000000..1be017f828d
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-6.c
4c2713
@@ -0,0 +1,154 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=unpaired" } */
4c2713
+/* Test nesting of bidi chars in various contexts.  */
4c2713
+
4c2713
+/* Terminated by the wrong char:  */
4c2713
+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* a b c FSI⁨ 1 2 3 PDF‬ x y  z*/
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+/* LRE‪ PDF‬ */
4c2713
+/* LRE‪ LRE‪ PDF‬ PDF‬ */
4c2713
+/* PDF‬ LRE‪ PDF‬ */
4c2713
+/* LRE‪ PDF‬ LRE‪ PDF‬ */
4c2713
+/* LRE‪ LRE‪ PDF‬ */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* PDF‬ LRE‪ */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+// a b c LRE‪ 1 2 3 PDI⁩ x y z
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c LRO‭ 1 2 3 PDI⁩ x y z 
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLO‮ 1 2 3 PDI⁩ x y z 
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c LRI⁦ 1 2 3 PDF‬ x y z 
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c RLI⁧ 1 2 3 PDF‬ x y z 
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// a b c FSI⁨ 1 2 3 PDF‬ x y  z
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+// LRE‪ PDF‬ 
4c2713
+// LRE‪ LRE‪ PDF‬ PDF‬
4c2713
+// PDF‬ LRE‪ PDF‬
4c2713
+// LRE‪ PDF‬ LRE‪ PDF‬
4c2713
+// LRE‪ LRE‪ PDF‬
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+// PDF‬ LRE‪
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+void
4c2713
+g1 ()
4c2713
+{
4c2713
+  const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y ";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\
4c2713
+    ";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
4c2713
+  const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s15 = "PDF‬ LRE‪";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s16 = "PDF\u202c LRE\u202a";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s17 = "LRE‪ PDF‬";
4c2713
+  const char *s18 = "LRE\u202a PDF\u202c";
4c2713
+  const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬";
4c2713
+  const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
4c2713
+  const char *s21 = "PDF‬ LRE‪ PDF‬";
4c2713
+  const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
4c2713
+  const char *s23 = "LRE‪ LRE‪ PDF‬";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s25 = "PDF‬ LRE‪";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s26 = "PDF\u202c LRE\u202a";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s27 = "PDF‬ LRE\u202a";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+  const char *s28 = "PDF\u202c LRE‪";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+}
4c2713
+
4c2713
+int aLRE‪bPDI⁩;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int A\u202aB\u2069C;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aRLE‫bPDI⁩;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a\u202bB\u2069c;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aLRO‭bPDI⁩;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a\u202db\u2069c2;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aRLO‮bPDI⁩;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a\u202eb\u2069;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aLRI⁦bPDF‬;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a\u2066b\u202c;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aRLI⁧bPDF‬c
4c2713
+;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
4c2713
+int a\u2067b\u202c;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aFSI⁨bPDF‬;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a\u2068b\u202c;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aFSI⁨bPD\u202C;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aFSI\u2068bPDF‬_;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int aLRE‪bPDF‬b; 
4c2713
+int A\u202aB\u202c;
4c2713
+int a_LRE‪_LRE‪_b_PDF‬_PDF‬;
4c2713
+int A\u202aA\u202aB\u202cB\u202c;
4c2713
+int aPDF‬bLREadPDF‬;
4c2713
+int a_\u202C_\u202a_\u202c;
4c2713
+int a_LRE‪_b_PDF‬_c_LRE‪_PDF‬;
4c2713
+int a_\u202a_\u202c_\u202a_\u202c_;
4c2713
+int a_LRE‪_b_PDF‬_c_LRE‪;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int a_\u202a_\u202c_\u202a_;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-7.c b/gcc/testsuite/c-c++-common/Wbidirectional-7.c
4c2713
new file mode 100644
4c2713
index 00000000000..f0f7b3ca14a
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-7.c
4c2713
@@ -0,0 +1,8 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=any" } */
4c2713
+/* Test we ignore UCNs in comments.  */
4c2713
+
4c2713
+// a b c \u202a 1 2 3
4c2713
+// a b c \u202A 1 2 3
4c2713
+/* a b c \u202a 1 2 3 */
4c2713
+/* a b c \u202A 1 2 3 */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-8.c b/gcc/testsuite/c-c++-common/Wbidirectional-8.c
4c2713
new file mode 100644
4c2713
index 00000000000..c7d02193131
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-8.c
4c2713
@@ -0,0 +1,12 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=any" } */
4c2713
+/* Test \u vs \U.  */
4c2713
+
4c2713
+int a_\u202A;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+int a_\u202a_2;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+int a_\U0000202A_3;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
+int a_\U0000202a_4;
4c2713
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
4c2713
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-9.c b/gcc/testsuite/c-c++-common/Wbidirectional-9.c
4c2713
new file mode 100644
4c2713
index 00000000000..d029209babb
4c2713
--- /dev/null
4c2713
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-9.c
4c2713
@@ -0,0 +1,28 @@
4c2713
+/* { dg-do compile } */
4c2713
+/* { dg-options "-Wbidirectional=unpaired" } */
4c2713
+/* Test that we properly separate bidi contexts (comment/identifier/character
4c2713
+   constant/string literal).  */
4c2713
+
4c2713
+/* LRE ->‪<- */ int pdf_\u202c_1;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* RLE ->‫<- */ int pdf_\u202c_2;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* LRO ->‭<- */ int pdf_\u202c_3;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* RLO ->‮<- */ int pdf_\u202c_4;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* LRI ->⁦<-*/ int pdi_\u2069_1;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* RLI ->⁧<- */ int pdi_\u2069_12;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* FSI ->⁨<- */ int pdi_\u2069_3;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+
4c2713
+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+/* LRE ->‪<- */ const char *s2 = "PDF\u202c";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
+int lre_\u202a; const char *s4 = "PDF\u202c";
4c2713
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
4c2713
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
4c2713
index 6e2fcb6b1f2..e48d13c4ee1 100644
4c2713
--- a/libcpp/include/cpplib.h
4c2713
+++ b/libcpp/include/cpplib.h
4c2713
@@ -318,6 +318,17 @@ enum cpp_main_search
4c2713
   CMS_system,  /* Search the system INCLUDE path.  */
4c2713
 };
4c2713
 
4c2713
+/* The possible bidirectional characters checking levels, from least
4c2713
+   restrictive to most.  */
4c2713
+enum cpp_bidirectional_level {
4c2713
+  /* No checking.  */
4c2713
+  bidirectional_none,
4c2713
+  /* Only detect unpaired uses of bidirectional characters.  */
4c2713
+  bidirectional_unpaired,
4c2713
+  /* Detect any use of bidirectional characters.  */
4c2713
+  bidirectional_any
4c2713
+};
4c2713
+
4c2713
 /* This structure is nested inside struct cpp_reader, and
4c2713
    carries all the options visible to the command line.  */
4c2713
 struct cpp_options
4c2713
@@ -538,6 +549,10 @@ struct cpp_options
4c2713
   /* True if warn about differences between C++98 and C++11.  */
4c2713
   bool cpp_warn_cxx11_compat;
4c2713
 
4c2713
+  /* Nonzero of bidirectional characters checking is on.  See enum
4c2713
+     cpp_bidirectional_level.  */
4c2713
+  unsigned char cpp_warn_bidirectional;
4c2713
+
4c2713
   /* Dependency generation.  */
4c2713
   struct
4c2713
   {
4c2713
@@ -642,7 +657,8 @@ enum cpp_warning_reason {
4c2713
   CPP_W_C90_C99_COMPAT,
4c2713
   CPP_W_C11_C2X_COMPAT,
4c2713
   CPP_W_CXX11_COMPAT,
4c2713
-  CPP_W_EXPANSION_TO_DEFINED
4c2713
+  CPP_W_EXPANSION_TO_DEFINED,
4c2713
+  CPP_W_BIDIRECTIONAL
4c2713
 };
4c2713
 
4c2713
 /* Callback for header lookup for HEADER, which is the name of a
4c2713
diff --git a/libcpp/init.c b/libcpp/init.c
4c2713
index 5a424e23553..f9a8f5f088f 100644
4c2713
--- a/libcpp/init.c
4c2713
+++ b/libcpp/init.c
4c2713
@@ -223,6 +223,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
4c2713
       = ENABLE_CANONICAL_SYSTEM_HEADERS;
4c2713
   CPP_OPTION (pfile, ext_numeric_literals) = 1;
4c2713
   CPP_OPTION (pfile, warn_date_time) = 0;
4c2713
+  CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
4c2713
 
4c2713
   /* Default CPP arithmetic to something sensible for the host for the
4c2713
      benefit of dumb users like fix-header.  */
4c2713
diff --git a/libcpp/lex.c b/libcpp/lex.c
4c2713
index 8e3ef096bbe..d9c39a4105f 100644
4c2713
--- a/libcpp/lex.c
4c2713
+++ b/libcpp/lex.c
4c2713
@@ -1164,6 +1164,284 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
4c2713
     }
4c2713
 }
4c2713
 
4c2713
+namespace bidi {
4c2713
+  enum class kind {
4c2713
+    NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI
4c2713
+  };
4c2713
+
4c2713
+  /* All the UTF-8 encodings of bidi characters start with E2.  */
4c2713
+  constexpr uchar utf8_start = 0xe2;
4c2713
+
4c2713
+  /* A vector holding currently open bidi contexts.  We use a char for
4c2713
+     each context, its LSB is 1 if it represents a PDF context, 0 if it
4c2713
+     represents a PDI context.  The next bit is 1 if this context was open
4c2713
+     by a bidi character written as a UCN, and 0 when it was UTF-8.  */
4c2713
+  semi_embedded_vec <unsigned char, 16> vec;
4c2713
+
4c2713
+  /* Close the whole comment/identifier/string literal/character constant
4c2713
+     context.  */
4c2713
+  void on_close ()
4c2713
+  {
4c2713
+    vec.truncate (0);
4c2713
+  }
4c2713
+
4c2713
+  /* Pop the last element in the vector.  */
4c2713
+  void pop ()
4c2713
+  {
4c2713
+    unsigned int len = vec.count ();
4c2713
+    gcc_checking_assert (len > 0);
4c2713
+    vec.truncate (len - 1);
4c2713
+  }
4c2713
+
4c2713
+  /* Return which context is currently opened.  */
4c2713
+  kind current_ctx ()
4c2713
+  {
4c2713
+    unsigned int len = vec.count ();
4c2713
+    if (len == 0)
4c2713
+      return kind::NONE;
4c2713
+    return (vec[len - 1] & 1) ? kind::PDF : kind::PDI;
4c2713
+  }
4c2713
+
4c2713
+  /* Return true if the current context comes from a UCN origin, that is,
4c2713
+     the bidi char which started this bidi context was written as a UCN.  */
4c2713
+  bool current_ctx_ucn_p ()
4c2713
+  {
4c2713
+    unsigned int len = vec.count ();
4c2713
+    gcc_checking_assert (len > 0);
4c2713
+    return (vec[len - 1] >> 1) & 1;
4c2713
+  }
4c2713
+
4c2713
+  /* We've read a bidi char, update the current vector as necessary.  */
4c2713
+  void on_char (kind k, bool ucn_p)
4c2713
+  {
4c2713
+    switch (k)
4c2713
+      {
4c2713
+      case kind::LRE:
4c2713
+      case kind::RLE:
4c2713
+      case kind::LRO:
4c2713
+      case kind::RLO:
4c2713
+	vec.push (ucn_p ? 3u : 1u);
4c2713
+	break;
4c2713
+      case kind::LRI:
4c2713
+      case kind::RLI:
4c2713
+      case kind::FSI:
4c2713
+	vec.push (ucn_p ? 2u : 0u);
4c2713
+	break;
4c2713
+      case kind::PDF:
4c2713
+	if (current_ctx () == kind::PDF)
4c2713
+	  pop ();
4c2713
+	break;
4c2713
+      case kind::PDI:
4c2713
+	if (current_ctx () == kind::PDI)
4c2713
+	  pop ();
4c2713
+	break;
4c2713
+      [[likely]] case kind::NONE:
4c2713
+	break;
4c2713
+      default:
4c2713
+	abort ();
4c2713
+      }
4c2713
+  }
4c2713
+
4c2713
+  /* Return a descriptive string for K.  */
4c2713
+  const char *to_str (kind k)
4c2713
+  {
4c2713
+    switch (k)
4c2713
+      {
4c2713
+      case kind::LRE:
4c2713
+	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
4c2713
+      case kind::RLE:
4c2713
+	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
4c2713
+      case kind::LRO:
4c2713
+	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
4c2713
+      case kind::RLO:
4c2713
+	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
4c2713
+      case kind::LRI:
4c2713
+	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
4c2713
+      case kind::RLI:
4c2713
+	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
4c2713
+      case kind::FSI:
4c2713
+	return "U+2068 (FIRST STRONG ISOLATE)";
4c2713
+      case kind::PDF:
4c2713
+	return "U+202C (POP DIRECTIONAL FORMATTING)";
4c2713
+      case kind::PDI:
4c2713
+	return "U+2069 (POP DIRECTIONAL ISOLATE)";
4c2713
+      default:
4c2713
+	abort ();
4c2713
+      }
4c2713
+  }
4c2713
+}
4c2713
+
4c2713
+/* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
4c2713
+
4c2713
+static bidi::kind
4c2713
+get_bidi_utf8 (const unsigned char *const p)
4c2713
+{
4c2713
+  gcc_checking_assert (p[0] == bidi::utf8_start);
4c2713
+
4c2713
+  if (p[1] == 0x80)
4c2713
+    switch (p[2])
4c2713
+      {
4c2713
+      case 0xaa:
4c2713
+	return bidi::kind::LRE;
4c2713
+      case 0xab:
4c2713
+	return bidi::kind::RLE;
4c2713
+      case 0xac:
4c2713
+	return bidi::kind::PDF;
4c2713
+      case 0xad:
4c2713
+	return bidi::kind::LRO;
4c2713
+      case 0xae:
4c2713
+	return bidi::kind::RLO;
4c2713
+      default:
4c2713
+	break;
4c2713
+      }
4c2713
+  else if (p[1] == 0x81)
4c2713
+    switch (p[2])
4c2713
+      {
4c2713
+      case 0xa6:
4c2713
+	return bidi::kind::LRI;
4c2713
+      case 0xa7:
4c2713
+	return bidi::kind::RLI;
4c2713
+      case 0xa8:
4c2713
+	return bidi::kind::FSI;
4c2713
+      case 0xa9:
4c2713
+	return bidi::kind::PDI;
4c2713
+      default:
4c2713
+	break;
4c2713
+      }
4c2713
+
4c2713
+  return bidi::kind::NONE;
4c2713
+}
4c2713
+
4c2713
+/* Parse a UCN where P points just past \u or \U and return its bidi code.  */
4c2713
+
4c2713
+static bidi::kind
4c2713
+get_bidi_ucn (const unsigned char *p, bool is_U)
4c2713
+{
4c2713
+  /* 6.4.3 Universal Character Names
4c2713
+      \u hex-quad
4c2713
+      \U hex-quad hex-quad
4c2713
+     where \unnnn means \U0000nnnn.  */
4c2713
+
4c2713
+  if (is_U)
4c2713
+    {
4c2713
+      if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
4c2713
+	return bidi::kind::NONE;
4c2713
+      /* Skip 4B so we can treat \u and \U the same below.  */
4c2713
+      p += 4;
4c2713
+    }
4c2713
+
4c2713
+  /* All code points we are looking for start with 20xx.  */
4c2713
+  if (p[0] != '2' || p[1] != '0')
4c2713
+    return bidi::kind::NONE;
4c2713
+  else if (p[2] == '2')
4c2713
+    switch (p[3])
4c2713
+      {
4c2713
+      case 'a':
4c2713
+      case 'A':
4c2713
+	return bidi::kind::LRE;
4c2713
+      case 'b':
4c2713
+      case 'B':
4c2713
+	return bidi::kind::RLE;
4c2713
+      case 'c':
4c2713
+      case 'C':
4c2713
+	return bidi::kind::PDF;
4c2713
+      case 'd':
4c2713
+      case 'D':
4c2713
+	return bidi::kind::LRO;
4c2713
+      case 'e':
4c2713
+      case 'E':
4c2713
+	return bidi::kind::RLO;
4c2713
+      default:
4c2713
+	break;
4c2713
+      }
4c2713
+  else if (p[2] == '6')
4c2713
+    switch (p[3])
4c2713
+      {
4c2713
+      case '6':
4c2713
+	return bidi::kind::LRI;
4c2713
+      case '7':
4c2713
+	return bidi::kind::RLI;
4c2713
+      case '8':
4c2713
+	return bidi::kind::FSI;
4c2713
+      case '9':
4c2713
+	return bidi::kind::PDI;
4c2713
+      default:
4c2713
+	break;
4c2713
+      }
4c2713
+
4c2713
+  return bidi::kind::NONE;
4c2713
+}
4c2713
+
4c2713
+/* We're closing a bidi context, that is, we've encountered a newline,
4c2713
+   are closing a C-style comment, or are at the end of a string literal,
4c2713
+   character constant, or identifier.  Warn if this context was not
4c2713
+   properly terminated by a PDI or PDF.  P points to the last character
4c2713
+   in this context.  */
4c2713
+
4c2713
+static void
4c2713
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
4c2713
+{
4c2713
+  if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
4c2713
+      && bidi::vec.count () > 0)
4c2713
+    {
4c2713
+      const location_t loc
4c2713
+	= linemap_position_for_column (pfile->line_table,
4c2713
+				       CPP_BUF_COLUMN (pfile->buffer, p));
4c2713
+      cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
4c2713
+			     "unpaired UTF-8 bidirectional character "
4c2713
+			     "detected");
4c2713
+    }
4c2713
+  /* We're done with this context.  */
4c2713
+  bidi::on_close ();
4c2713
+}
4c2713
+
4c2713
+/* We're at the beginning or in the middle of an identifier/comment/string
4c2713
+   literal/character constant.  Warn if we've encountered a bidi character.
4c2713
+   KIND says which bidi character it was; P points to it in the character
4c2713
+   stream.  UCN_P is true iff this bidi character was written as a UCN.  */
4c2713
+
4c2713
+static void
4c2713
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
4c2713
+			 bool ucn_p)
4c2713
+{
4c2713
+  if (__builtin_expect (kind == bidi::kind::NONE, 1))
4c2713
+    return;
4c2713
+
4c2713
+  const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
4c2713
+
4c2713
+  if (warn_bidi != bidirectional_none)
4c2713
+    {
4c2713
+      const location_t loc
4c2713
+	= linemap_position_for_column (pfile->line_table,
4c2713
+				       CPP_BUF_COLUMN (pfile->buffer, p));
4c2713
+      /* It seems excessive to warn about a PDI/PDF that is closing
4c2713
+	 an opened context because we've already warned about the
4c2713
+	 opening character.  Except warn when we have a UCN x UTF-8
4c2713
+	 mismatch.  */
4c2713
+      if (kind == bidi::current_ctx ())
4c2713
+	{
4c2713
+	  if (warn_bidi == bidirectional_unpaired
4c2713
+	      && bidi::current_ctx_ucn_p () != ucn_p)
4c2713
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
4c2713
+				   "UTF-8 vs UCN mismatch when closing "
4c2713
+				   "a context by \"%s\"", bidi::to_str (kind));
4c2713
+	}
4c2713
+      else if (warn_bidi == bidirectional_any)
4c2713
+	{
4c2713
+	  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
4c2713
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
4c2713
+				   "\"%s\" is closing an unopened context",
4c2713
+				   bidi::to_str (kind));
4c2713
+	  else
4c2713
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
4c2713
+				   "found problematic Unicode character \"%s\"",
4c2713
+				   bidi::to_str (kind));
4c2713
+	}
4c2713
+    }
4c2713
+  /* We're done with this context.  */
4c2713
+  bidi::on_char (kind, ucn_p);
4c2713
+}
4c2713
+
4c2713
 /* Skip a C-style block comment.  We find the end of the comment by
4c2713
    seeing if an asterisk is before every '/' we encounter.  Returns
4c2713
    nonzero if comment terminated by EOF, zero otherwise.
4c2713
@@ -1175,7 +1453,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
4c2713
   cpp_buffer *buffer = pfile->buffer;
4c2713
   const uchar *cur = buffer->cur;
4c2713
   uchar c;
4c2713
-
4c2713
+  const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
4c2713
+			    != bidirectional_none);
4c2713
   cur++;
4c2713
   if (*cur == '/')
4c2713
     cur++;
4c2713
@@ -1189,7 +1468,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
4c2713
       if (c == '/')
4c2713
 	{
4c2713
 	  if (cur[-2] == '*')
4c2713
-	    break;
4c2713
+	    {
4c2713
+	      if (warn_bidi_p)
4c2713
+		maybe_warn_bidi_on_close (pfile, cur);
4c2713
+	      break;
4c2713
+	    }
4c2713
 
4c2713
 	  /* Warn about potential nested comments, but not if the '/'
4c2713
 	     comes immediately before the true comment delimiter.
4c2713
@@ -1208,6 +1491,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
4c2713
 	{
4c2713
 	  unsigned int cols;
4c2713
 	  buffer->cur = cur - 1;
4c2713
+	  if (warn_bidi_p)
4c2713
+	    maybe_warn_bidi_on_close (pfile, cur);
4c2713
 	  _cpp_process_line_notes (pfile, true);
4c2713
 	  if (buffer->next_line >= buffer->rlimit)
4c2713
 	    return true;
4c2713
@@ -1218,6 +1503,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
4c2713
 
4c2713
 	  cur = buffer->cur;
4c2713
 	}
4c2713
+      /* If this is a beginning of a UTF-8 encoding, it might be
4c2713
+	 a bidirectional character.  */
4c2713
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
4c2713
+	{
4c2713
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
4c2713
+	  maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
4c2713
+	}
4c2713
     }
4c2713
 
4c2713
   buffer->cur = cur;
4c2713
@@ -1233,9 +1525,32 @@ skip_line_comment (cpp_reader *pfile)
4c2713
 {
4c2713
   cpp_buffer *buffer = pfile->buffer;
4c2713
   location_t orig_line = pfile->line_table->highest_line;
4c2713
+  const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
4c2713
+			    != bidirectional_none);
4c2713
 
4c2713
-  while (*buffer->cur != '\n')
4c2713
-    buffer->cur++;
4c2713
+  if (!warn_bidi_p)
4c2713
+    while (*buffer->cur != '\n')
4c2713
+      buffer->cur++;
4c2713
+  else
4c2713
+    {
4c2713
+      while (*buffer->cur != '\n'
4c2713
+	     && *buffer->cur != bidi::utf8_start)
4c2713
+	buffer->cur++;
4c2713
+      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
4c2713
+	{
4c2713
+	  while (*buffer->cur != '\n')
4c2713
+	    {
4c2713
+	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
4c2713
+		{
4c2713
+		  bidi::kind kind = get_bidi_utf8 (buffer->cur);
4c2713
+		  maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
4c2713
+					   /*ucn_p=*/false);
4c2713
+		}
4c2713
+	      buffer->cur++;
4c2713
+	    }
4c2713
+	  maybe_warn_bidi_on_close (pfile, buffer->cur);
4c2713
+	}
4c2713
+    }
4c2713
 
4c2713
   _cpp_process_line_notes (pfile, true);
4c2713
   return orig_line != pfile->line_table->highest_line;
4c2713
@@ -1320,11 +1635,14 @@ static const cppchar_t utf8_signifier = 0xC0;
4c2713
 
4c2713
 /* Returns TRUE if the sequence starting at buffer->cur is valid in
4c2713
    an identifier.  FIRST is TRUE if this starts an identifier.  */
4c2713
+
4c2713
 static bool
4c2713
 forms_identifier_p (cpp_reader *pfile, int first,
4c2713
 		    struct normalize_state *state)
4c2713
 {
4c2713
   cpp_buffer *buffer = pfile->buffer;
4c2713
+  const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
4c2713
+			    != bidirectional_none);
4c2713
 
4c2713
   if (*buffer->cur == '$')
4c2713
     {
4c2713
@@ -1347,6 +1665,13 @@ forms_identifier_p (cpp_reader *pfile, int first,
4c2713
       cppchar_t s;
4c2713
       if (*buffer->cur >= utf8_signifier)
4c2713
 	{
4c2713
+	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
4c2713
+	      && warn_bidi_p)
4c2713
+	    {
4c2713
+	      bidi::kind kind = get_bidi_utf8 (buffer->cur);
4c2713
+	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
4c2713
+				       /*ucn_p=*/false);
4c2713
+	    }
4c2713
 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
4c2713
 			       state, &s))
4c2713
 	    return true;
4c2713
@@ -1355,6 +1680,13 @@ forms_identifier_p (cpp_reader *pfile, int first,
4c2713
 	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
4c2713
 	{
4c2713
 	  buffer->cur += 2;
4c2713
+	  if (warn_bidi_p)
4c2713
+	    {
4c2713
+	      bidi::kind kind = get_bidi_ucn (buffer->cur,
4c2713
+					      buffer->cur[-1] == 'U');
4c2713
+	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
4c2713
+				       /*ucn_p=*/true);
4c2713
+	    }
4c2713
 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
4c2713
 			      state, &s, NULL, NULL))
4c2713
 	    return true;
4c2713
@@ -1463,6 +1795,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
4c2713
   const uchar *cur;
4c2713
   unsigned int len;
4c2713
   unsigned int hash = HT_HASHSTEP (0, *base);
4c2713
+  const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
4c2713
+			    != bidirectional_none);
4c2713
 
4c2713
   cur = pfile->buffer->cur;
4c2713
   if (! starts_ucn)
4c2713
@@ -1479,13 +1813,17 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
4c2713
     {
4c2713
       /* Slower version for identifiers containing UCNs
4c2713
 	 or extended chars (including $).  */
4c2713
-      do {
4c2713
-	while (ISIDNUM (*pfile->buffer->cur))
4c2713
-	  {
4c2713
-	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
4c2713
-	    pfile->buffer->cur++;
4c2713
-	  }
4c2713
-      } while (forms_identifier_p (pfile, false, nst));
4c2713
+      do
4c2713
+	{
4c2713
+	  while (ISIDNUM (*pfile->buffer->cur))
4c2713
+	    {
4c2713
+	      NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
4c2713
+	      pfile->buffer->cur++;
4c2713
+	    }
4c2713
+	}
4c2713
+      while (forms_identifier_p (pfile, false, nst));
4c2713
+      if (warn_bidi_p)
4c2713
+	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
4c2713
       result = _cpp_interpret_identifier (pfile, base,
4c2713
 					  pfile->buffer->cur - base);
4c2713
       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
4c2713
@@ -1732,6 +2070,8 @@ static void
4c2713
 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
4c2713
 {
4c2713
   const uchar *pos = base;
4c2713
+  const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
4c2713
+			    != bidirectional_none);
4c2713
 
4c2713
   /* 'tis a pity this information isn't passed down from the lexer's
4c2713
      initial categorization of the token.  */
4c2713
@@ -1968,8 +2308,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
4c2713
 	  pos = base = pfile->buffer->cur;
4c2713
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
4c2713
 	}
4c2713
+      else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
4c2713
+	       && warn_bidi_p)
4c2713
+	maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
4c2713
+				 /*ucn_p=*/false);
4c2713
     }
4c2713
 
4c2713
+  if (warn_bidi_p)
4c2713
+    maybe_warn_bidi_on_close (pfile, pos);
4c2713
+
4c2713
   if (CPP_OPTION (pfile, user_literals))
4c2713
     {
4c2713
       /* If a string format macro, say from inttypes.h, is placed touching
4c2713
@@ -2064,15 +2411,28 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
4c2713
   else
4c2713
     terminator = '>', type = CPP_HEADER_NAME;
4c2713
 
4c2713
+  const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
4c2713
+			    != bidirectional_none);
4c2713
   for (;;)
4c2713
     {
4c2713
       cppchar_t c = *cur++;
4c2713
 
4c2713
       /* In #include-style directives, terminators are not escapable.  */
4c2713
       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
4c2713
-	cur++;
4c2713
+	{
4c2713
+	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
4c2713
+	    {
4c2713
+	      bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
4c2713
+	      maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
4c2713
+	    }
4c2713
+	  cur++;
4c2713
+	}
4c2713
       else if (c == terminator)
4c2713
-	break;
4c2713
+	{
4c2713
+	  if (warn_bidi_p)
4c2713
+	    maybe_warn_bidi_on_close (pfile, cur - 1);
4c2713
+	  break;
4c2713
+	}
4c2713
       else if (c == '\n')
4c2713
 	{
4c2713
 	  cur--;
4c2713
@@ -2089,6 +2449,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
4c2713
 	}
4c2713
       else if (c == '\0')
4c2713
 	saw_NUL = true;
4c2713
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
4c2713
+	{
4c2713
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
4c2713
+	  maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
4c2713
+	}
4c2713
     }
4c2713
 
4c2713
   if (saw_NUL && !pfile->state.skipping)
4c2713
4c2713
base-commit: b0b1d8d5d90d7c499e2733e8d01ba8b73217f332
4c2713
-- 
4c2713
2.31.1
4c2713