Tree - rpms/rust - CentOS Git server

rpms / rust

Blame SOURCES/rustc-1.54.0-unicode-control-codepoints.patch

Blob History Raw

		16df65	`From 7eaba1c3599f414294e5451e472456eed533475b Mon Sep 17 00:00:00 2001`
		16df65	`From: =?UTF-8?q?Esteban=20K=C3=BCber?= <esteban@kuber.com.ar>`
		16df65	`Date: Thu, 19 Aug 2021 11:40:00 -0700`
		16df65	`Subject: [PATCH] Lint against RTL unicode codepoints in literals and comments`
		16df65
		16df65	`Address CVE-2021-42574.`
		16df65
		16df65	`Backported-by: Josh Stone <jistone@redhat.com>`
		16df65	`---`
		16df65	`Cargo.lock \| 1 +`
		16df65	`compiler/rustc_errors/src/emitter.rs \| 20 +-`
		16df65	`compiler/rustc_lint/src/context.rs \| 39 +++-`
		16df65	`.../src/hidden_unicode_codepoints.rs \| 161 +++++++++++++++`
		16df65	`compiler/rustc_lint/src/lib.rs \| 3 +`
		16df65	`compiler/rustc_lint_defs/src/builtin.rs \| 28 +++`
		16df65	`compiler/rustc_lint_defs/src/lib.rs \| 1 +`
		16df65	`compiler/rustc_parse/Cargo.toml \| 1 +`
		16df65	`compiler/rustc_parse/src/lexer/mod.rs \| 40 +++-`
		16df65	`.../src/lexer/unescape_error_reporting.rs \| 16 +-`
		16df65	`.../ui/parser/unicode-control-codepoints.rs \| 39 ++++`
		16df65	`.../parser/unicode-control-codepoints.stderr \| 184 ++++++++++++++++++`
		16df65	`12 files changed, 525 insertions(+), 8 deletions(-)`
		16df65	`create mode 100644 compiler/rustc_lint/src/hidden_unicode_codepoints.rs`
		16df65	`create mode 100644 src/test/ui/parser/unicode-control-codepoints.rs`
		16df65	`create mode 100644 src/test/ui/parser/unicode-control-codepoints.stderr`
		16df65
		16df65	`diff --git a/Cargo.lock b/Cargo.lock`
		16df65	`index de110c55a4b2..140026dc6ae1 100644`
		16df65	`--- a/Cargo.lock`
		16df65	`+++ b/Cargo.lock`
		16df65	`@@ -4170,6 +4170,7 @@ dependencies = [`
		16df65	`"smallvec",`
		16df65	`"tracing",`
		16df65	`"unicode-normalization",`
		16df65	`+ "unicode-width",`
		16df65	`]`
		16df65
		16df65	`[[package]]`
		16df65	`diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs`
		16df65	`index d3f92bf3047b..db7bbe58c80c 100644`
		16df65	`--- a/compiler/rustc_errors/src/emitter.rs`
		16df65	`+++ b/compiler/rustc_errors/src/emitter.rs`
		16df65	`@@ -1977,8 +1977,26 @@ fn num_decimal_digits(num: usize) -> usize {`
		16df65	`MAX_DIGITS`
		16df65	`}`
		16df65
		16df65	`+// We replace some characters so the CLI output is always consistent and underlines aligned.`
		16df65	`+const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[`
		16df65	`+ ('\t', " "), // We do our own tab replacement`
		16df65	`+ ('\u{202A}', ""), // The following unicode text flow control characters are inconsistently`
		16df65	`+ ('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk`
		16df65	`+ ('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.`
		16df65	`+ ('\u{202E}', ""),`
		16df65	`+ ('\u{2066}', ""),`
		16df65	`+ ('\u{2067}', ""),`
		16df65	`+ ('\u{2068}', ""),`
		16df65	`+ ('\u{202C}', ""),`
		16df65	`+ ('\u{2069}', ""),`
		16df65	`+];`
		16df65	`+`
		16df65	`fn replace_tabs(str: &str) -> String {`
		16df65	`- str.replace('\t', " ")`
		16df65	`+ let mut s = str.to_string();`
		16df65	`+ for (c, replacement) in OUTPUT_REPLACEMENTS {`
		16df65	`+ s = s.replace(*c, replacement);`
		16df65	`+ }`
		16df65	`+ s`
		16df65	`}`
		16df65
		16df65	`fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {`
		16df65	`diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs`
		16df65	`index a8df1b0952c1..0a2b55dbbada 100644`
		16df65	`--- a/compiler/rustc_lint/src/context.rs`
		16df65	`+++ b/compiler/rustc_lint/src/context.rs`
		16df65	`@@ -16,6 +16,7 @@`
		16df65
		16df65	`use self::TargetLint::*;`
		16df65
		16df65	`+use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS;`
		16df65	`use crate::levels::LintLevelsBuilder;`
		16df65	`use crate::passes::{EarlyLintPassObject, LateLintPassObject};`
		16df65	`use rustc_ast as ast;`
		16df65	`@@ -40,7 +41,7 @@`
		16df65	`use rustc_session::Session;`
		16df65	`use rustc_session::SessionLintStore;`
		16df65	`use rustc_span::lev_distance::find_best_match_for_name;`
		16df65	`-use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};`
		16df65	`+use rustc_span::{symbol::Symbol, BytePos, MultiSpan, Span, DUMMY_SP};`
		16df65	`use rustc_target::abi::LayoutOf;`
		16df65	`use tracing::debug;`
		16df65
		16df65	`@@ -601,6 +602,42 @@ fn lookup_with_diagnostics(`
		16df65	`// Now, set up surrounding context.`
		16df65	`let sess = self.sess();`
		16df65	`match diagnostic {`
		16df65	`+ BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => {`
		16df65	`+ let spans: Vec<_> = content`
		16df65	`+ .char_indices()`
		16df65	`+ .filter_map(\|(i, c)\| {`
		16df65	`+ UNICODE_TEXT_FLOW_CHARS.contains(&c).then(\|\| {`
		16df65	`+ let lo = span.lo() + BytePos(2 + i as u32);`
		16df65	`+ (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))`
		16df65	`+ })`
		16df65	`+ })`
		16df65	`+ .collect();`
		16df65	`+ let (an, s) = match spans.len() {`
		16df65	`+ 1 => ("an ", ""),`
		16df65	`+ _ => ("", "s"),`
		16df65	`+ };`
		16df65	`+ db.span_label(span, &format!(`
		16df65	`+ "this comment contains {}invisible unicode text flow control codepoint{}",`
		16df65	`+ an,`
		16df65	`+ s,`
		16df65	`+ ));`
		16df65	`+ for (c, span) in &spans {`
		16df65	`+ db.span_label(*span, format!("{:?}", c));`
		16df65	`+ }`
		16df65	`+ db.note(`
		16df65	`+ "these kind of unicode codepoints change the way text flows on \`
		16df65	`+ applications that support them, but can cause confusion because they \`
		16df65	`+ change the order of characters on the screen",`
		16df65	`+ );`
		16df65	`+ if !spans.is_empty() {`
		16df65	`+ db.multipart_suggestion_with_style(`
		16df65	`+ "if their presence wasn't intentional, you can remove them",`
		16df65	`+ spans.into_iter().map(\|(_, span)\| (span, "".to_string())).collect(),`
		16df65	`+ Applicability::MachineApplicable,`
		16df65	`+ SuggestionStyle::HideCodeAlways,`
		16df65	`+ );`
		16df65	`+ }`
		16df65	`+ },`
		16df65	`BuiltinLintDiagnostics::Normal => (),`
		16df65	`BuiltinLintDiagnostics::BareTraitObject(span, is_global) => {`
		16df65	`let (sugg, app) = match sess.source_map().span_to_snippet(span) {`
		16df65	`diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs`
		16df65	`new file mode 100644`
		16df65	`index 000000000000..1bcdcb806fc4`
		16df65	`--- /dev/null`
		16df65	`+++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs`
		16df65	`@@ -0,0 +1,161 @@`
		16df65	`+use crate::{EarlyContext, EarlyLintPass, LintContext};`
		16df65	`+use rustc_ast as ast;`
		16df65	`+use rustc_errors::{Applicability, SuggestionStyle};`
		16df65	`+use rustc_span::{BytePos, Span, Symbol};`
		16df65	`+`
		16df65	`+declare_lint! {`
		16df65	+ /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
		16df65	`+ /// visual representation of text on screen in a way that does not correspond to their on`
		16df65	`+ /// memory representation.`
		16df65	`+ ///`
		16df65	`+ /// ### Explanation`
		16df65	`+ ///`
		16df65	+ /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
		16df65	+ /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
		16df65	`+ /// its direction on software that supports these codepoints. This makes the text "abc" display`
		16df65	`+ /// as "cba" on screen. By leveraging software that supports these, people can write specially`
		16df65	`+ /// crafted literals that make the surrounding code seem like it's performing one action, when`
		16df65	`+ /// in reality it is performing another. Because of this, we proactively lint against their`
		16df65	`+ /// presence to avoid surprises.`
		16df65	`+ ///`
		16df65	`+ /// ### Example`
		16df65	`+ ///`
		16df65	+ /// ```rust,compile_fail
		16df65	`+ /// #![deny(text_direction_codepoint_in_literal)]`
		16df65	`+ /// fn main() {`
		16df65	`+ /// println!("{:?}", '‮');`
		16df65	`+ /// }`
		16df65	+ /// ```
		16df65	`+ ///`
		16df65	`+ /// {{produces}}`
		16df65	`+ ///`
		16df65	`+ pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,`
		16df65	`+ Deny,`
		16df65	`+ "detect special Unicode codepoints that affect the visual representation of text on screen, \`
		16df65	`+ changing the direction in which text flows",`
		16df65	`+}`
		16df65	`+`
		16df65	`+declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);`
		16df65	`+`
		16df65	`+crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[`
		16df65	`+ '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',`
		16df65	`+ '\u{2069}',`
		16df65	`+];`
		16df65	`+`
		16df65	`+impl HiddenUnicodeCodepoints {`
		16df65	`+ fn lint_text_direction_codepoint(`
		16df65	`+ &self,`
		16df65	`+ cx: &EarlyContext<'_>,`
		16df65	`+ text: Symbol,`
		16df65	`+ span: Span,`
		16df65	`+ padding: u32,`
		16df65	`+ point_at_inner_spans: bool,`
		16df65	`+ label: &str,`
		16df65	`+ ) {`
		16df65	+ // Obtain the `Span`s for each of the forbidden chars.
		16df65	`+ let spans: Vec<_> = text`
		16df65	`+ .as_str()`
		16df65	`+ .char_indices()`
		16df65	`+ .filter_map(\|(i, c)\| {`
		16df65	`+ UNICODE_TEXT_FLOW_CHARS.contains(&c).then(\|\| {`
		16df65	`+ let lo = span.lo() + BytePos(i as u32 + padding);`
		16df65	`+ (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))`
		16df65	`+ })`
		16df65	`+ })`
		16df65	`+ .collect();`
		16df65	`+`
		16df65	`+ cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, \|lint\| {`
		16df65	`+ let mut err = lint.build(&format!(`
		16df65	`+ "unicode codepoint changing visible direction of text present in {}",`
		16df65	`+ label`
		16df65	`+ ));`
		16df65	`+ let (an, s) = match spans.len() {`
		16df65	`+ 1 => ("an ", ""),`
		16df65	`+ _ => ("", "s"),`
		16df65	`+ };`
		16df65	`+ err.span_label(`
		16df65	`+ span,`
		16df65	`+ &format!(`
		16df65	`+ "this {} contains {}invisible unicode text flow control codepoint{}",`
		16df65	`+ label, an, s,`
		16df65	`+ ),`
		16df65	`+ );`
		16df65	`+ if point_at_inner_spans {`
		16df65	`+ for (c, span) in &spans {`
		16df65	`+ err.span_label(*span, format!("{:?}", c));`
		16df65	`+ }`
		16df65	`+ }`
		16df65	`+ err.note(`
		16df65	`+ "these kind of unicode codepoints change the way text flows on applications that \`
		16df65	`+ support them, but can cause confusion because they change the order of \`
		16df65	`+ characters on the screen",`
		16df65	`+ );`
		16df65	`+ if point_at_inner_spans && !spans.is_empty() {`
		16df65	`+ err.multipart_suggestion_with_style(`
		16df65	`+ "if their presence wasn't intentional, you can remove them",`
		16df65	`+ spans.iter().map(\|(_, span)\| (*span, "".to_string())).collect(),`
		16df65	`+ Applicability::MachineApplicable,`
		16df65	`+ SuggestionStyle::HideCodeAlways,`
		16df65	`+ );`
		16df65	`+ err.multipart_suggestion(`
		16df65	`+ "if you want to keep them but make them visible in your source code, you can \`
		16df65	`+ escape them",`
		16df65	`+ spans`
		16df65	`+ .into_iter()`
		16df65	`+ .map(\|(c, span)\| {`
		16df65	`+ let c = format!("{:?}", c);`
		16df65	`+ (span, c[1..c.len() - 1].to_string())`
		16df65	`+ })`
		16df65	`+ .collect(),`
		16df65	`+ Applicability::MachineApplicable,`
		16df65	`+ );`
		16df65	`+ } else {`
		16df65	`+ // FIXME: in other suggestions we've reversed the inner spans of doc comments. We`
		16df65	`+ // should do the same here to provide the same good suggestions as we do for`
		16df65	`+ // literals above.`
		16df65	`+ err.note("if their presence wasn't intentional, you can remove them");`
		16df65	`+ err.note(&format!(`
		16df65	`+ "if you want to keep them but make them visible in your source code, you can \`
		16df65	`+ escape them: {}",`
		16df65	`+ spans`
		16df65	`+ .into_iter()`
		16df65	`+ .map(\|(c, _)\| { format!("{:?}", c) })`
		16df65	`+ .collect::<Vec<String>>()`
		16df65	`+ .join(", "),`
		16df65	`+ ));`
		16df65	`+ }`
		16df65	`+ err.emit();`
		16df65	`+ });`
		16df65	`+ }`
		16df65	`+}`
		16df65	`+impl EarlyLintPass for HiddenUnicodeCodepoints {`
		16df65	`+ fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {`
		16df65	`+ if let ast::AttrKind::DocComment(_, comment) = attr.kind {`
		16df65	`+ if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {`
		16df65	`+ self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");`
		16df65	`+ }`
		16df65	`+ }`
		16df65	`+ }`
		16df65	`+`
		16df65	`+ fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {`
		16df65	+ // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
		16df65	`+ let (text, span, padding) = match &expr.kind {`
		16df65	`+ ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {`
		16df65	`+ let text = token.symbol;`
		16df65	`+ if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {`
		16df65	`+ return;`
		16df65	`+ }`
		16df65	`+ let padding = match kind {`
		16df65	+ // account for `"` or `'`
		16df65	`+ ast::LitKind::Str(_, ast::StrStyle::Cooked) \| ast::LitKind::Char(_) => 1,`
		16df65	+ // account for `r###"`
		16df65	`+ ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2,`
		16df65	`+ _ => return,`
		16df65	`+ };`
		16df65	`+ (text, span, padding)`
		16df65	`+ }`
		16df65	`+ _ => return,`
		16df65	`+ };`
		16df65	`+ self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal");`
		16df65	`+ }`
		16df65	`+}`
		16df65	`diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs`
		16df65	`index 4f59460aa82a..89612eb72b48 100644`
		16df65	`--- a/compiler/rustc_lint/src/lib.rs`
		16df65	`+++ b/compiler/rustc_lint/src/lib.rs`
		16df65	`@@ -48,6 +48,7 @@`
		16df65	`pub mod builtin;`
		16df65	`mod context;`
		16df65	`mod early;`
		16df65	`+pub mod hidden_unicode_codepoints;`
		16df65	`mod internal;`
		16df65	`mod late;`
		16df65	`mod levels;`
		16df65	`@@ -75,6 +76,7 @@`
		16df65
		16df65	`use array_into_iter::ArrayIntoIter;`
		16df65	`use builtin::*;`
		16df65	`+use hidden_unicode_codepoints::*;`
		16df65	`use internal::*;`
		16df65	`use methods::*;`
		16df65	`use non_ascii_idents::*;`
		16df65	`@@ -126,6 +128,7 @@ macro_rules! early_lint_passes {`
		16df65	`DeprecatedAttr: DeprecatedAttr::new(),`
		16df65	`WhileTrue: WhileTrue,`
		16df65	`NonAsciiIdents: NonAsciiIdents,`
		16df65	`+ HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,`
		16df65	`IncompleteFeatures: IncompleteFeatures,`
		16df65	`RedundantSemicolons: RedundantSemicolons,`
		16df65	`UnusedDocComment: UnusedDocComment,`
		16df65	`diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs`
		16df65	`index 352146d64635..cd4d5c6e5f1c 100644`
		16df65	`--- a/compiler/rustc_lint_defs/src/builtin.rs`
		16df65	`+++ b/compiler/rustc_lint_defs/src/builtin.rs`
		16df65	`@@ -3240,3 +3240,31 @@`
		16df65	`Allow,`
		16df65	`"detects usage of old versions of or-patterns",`
		16df65	`}`
		16df65	`+`
		16df65	`+declare_lint! {`
		16df65	+ /// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
		16df65	`+ /// change the visual representation of text on screen in a way that does not correspond to`
		16df65	`+ /// their on memory representation.`
		16df65	`+ ///`
		16df65	`+ /// ### Example`
		16df65	`+ ///`
		16df65	+ /// ```rust,compile_fail
		16df65	`+ /// #![deny(text_direction_codepoint_in_comment)]`
		16df65	`+ /// fn main() {`
		16df65	`+ /// println!("{:?}"); // '‮');`
		16df65	`+ /// }`
		16df65	+ /// ```
		16df65	`+ ///`
		16df65	`+ /// {{produces}}`
		16df65	`+ ///`
		16df65	`+ /// ### Explanation`
		16df65	`+ ///`
		16df65	`+ /// Unicode allows changing the visual flow of text on screen in order to support scripts that`
		16df65	`+ /// are written right-to-left, but a specially crafted comment can make code that will be`
		16df65	`+ /// compiled appear to be part of a comment, depending on the software used to read the code.`
		16df65	`+ /// To avoid potential problems or confusion, such as in CVE-2021-42574, by default we deny`
		16df65	`+ /// their use.`
		16df65	`+ pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,`
		16df65	`+ Deny,`
		16df65	`+ "invisible directionality-changing codepoints in comment"`
		16df65	`+}`
		16df65	`diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs`
		16df65	`index f1c4e5fb4a36..1ec50fe5fbb5 100644`
		16df65	`--- a/compiler/rustc_lint_defs/src/lib.rs`
		16df65	`+++ b/compiler/rustc_lint_defs/src/lib.rs`
		16df65	`@@ -272,6 +272,7 @@ pub enum BuiltinLintDiagnostics {`
		16df65	`ExternDepSpec(String, ExternDepSpec),`
		16df65	`ProcMacroBackCompat(String),`
		16df65	`OrPatternsBackCompat(Span, String),`
		16df65	`+ UnicodeTextFlow(Span, String),`
		16df65	`}`
		16df65
		16df65	/// Lints that are buffered up early on in the `Session` before the
		16df65	`diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml`
		16df65	`index c887729c3557..96a215628dfb 100644`
		16df65	`--- a/compiler/rustc_parse/Cargo.toml`
		16df65	`+++ b/compiler/rustc_parse/Cargo.toml`
		16df65	`@@ -19,4 +19,5 @@ rustc_session = { path = "../rustc_session" }`
		16df65	`rustc_span = { path = "../rustc_span" }`
		16df65	`rustc_ast = { path = "../rustc_ast" }`
		16df65	`unicode-normalization = "0.1.11"`
		16df65	`+unicode-width = "0.1.4"`
		16df65	`smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }`
		16df65	`diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs`
		16df65	`index 1c2f9a9645fe..97a9a5aee42a 100644`
		16df65	`--- a/compiler/rustc_parse/src/lexer/mod.rs`
		16df65	`+++ b/compiler/rustc_parse/src/lexer/mod.rs`
		16df65	`@@ -1,9 +1,11 @@`
		16df65	`-use rustc_ast::ast::AttrStyle;`
		16df65	`+use rustc_ast::ast::{self, AttrStyle};`
		16df65	`use rustc_ast::token::{self, CommentKind, Token, TokenKind};`
		16df65	`use rustc_ast::tokenstream::{Spacing, TokenStream};`
		16df65	`use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};`
		16df65	`use rustc_lexer::unescape::{self, Mode};`
		16df65	`use rustc_lexer::{Base, DocStyle, RawStrError};`
		16df65	`+use rustc_session::lint::builtin::TEXT_DIRECTION_CODEPOINT_IN_COMMENT;`
		16df65	`+use rustc_session::lint::BuiltinLintDiagnostics;`
		16df65	`use rustc_session::parse::ParseSess;`
		16df65	`use rustc_span::symbol::{sym, Symbol};`
		16df65	`use rustc_span::{BytePos, Pos, Span};`
		16df65	`@@ -127,6 +129,28 @@ fn struct_fatal_span_char(`
		16df65	`.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))`
		16df65	`}`
		16df65
		16df65	`+ /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly`
		16df65	`+ /// complain about it.`
		16df65	`+ fn lint_unicode_text_flow(&self, start: BytePos) {`
		16df65	`+ // Opening delimiter of the length 2 is not included into the comment text.`
		16df65	`+ let content_start = start + BytePos(2);`
		16df65	`+ let content = self.str_from(content_start);`
		16df65	`+ let span = self.mk_sp(start, self.pos);`
		16df65	`+ const UNICODE_TEXT_FLOW_CHARS: &[char] = &[`
		16df65	`+ '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',`
		16df65	`+ '\u{202C}', '\u{2069}',`
		16df65	`+ ];`
		16df65	`+ if content.contains(UNICODE_TEXT_FLOW_CHARS) {`
		16df65	`+ self.sess.buffer_lint_with_diagnostic(`
		16df65	`+ &TEXT_DIRECTION_CODEPOINT_IN_COMMENT,`
		16df65	`+ span,`
		16df65	`+ ast::CRATE_NODE_ID,`
		16df65	`+ "unicode codepoint changing visible direction of text present in comment",`
		16df65	`+ BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),`
		16df65	`+ );`
		16df65	`+ }`
		16df65	`+ }`
		16df65	`+`
		16df65	/// Turns simple `rustc_lexer::TokenKind` enum into a rich
		16df65	/// `rustc_ast::TokenKind`. This turns strings into interned
		16df65	`/// symbols and runs additional validation.`
		16df65	`@@ -134,7 +158,12 @@ fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Opt`
		16df65	`Some(match token {`
		16df65	`rustc_lexer::TokenKind::LineComment { doc_style } => {`
		16df65	`// Skip non-doc comments`
		16df65	`- let doc_style = doc_style?;`
		16df65	`+ let doc_style = if let Some(doc_style) = doc_style {`
		16df65	`+ doc_style`
		16df65	`+ } else {`
		16df65	`+ self.lint_unicode_text_flow(start);`
		16df65	`+ return None;`
		16df65	`+ };`
		16df65
		16df65	`// Opening delimiter of the length 3 is not included into the symbol.`
		16df65	`let content_start = start + BytePos(3);`
		16df65	`@@ -156,7 +185,12 @@ fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Opt`
		16df65	`}`
		16df65
		16df65	`// Skip non-doc comments`
		16df65	`- let doc_style = doc_style?;`
		16df65	`+ let doc_style = if let Some(doc_style) = doc_style {`
		16df65	`+ doc_style`
		16df65	`+ } else {`
		16df65	`+ self.lint_unicode_text_flow(start);`
		16df65	`+ return None;`
		16df65	`+ };`
		16df65
		16df65	`// Opening delimiter of the length 3 and closing delimiter of the length 2`
		16df65	`// are not included into the symbol.`
		16df65	`diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs`
		16df65	`index a580f0c55d0e..1e673e03d67f 100644`
		16df65	`--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs`
		16df65	`+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs`
		16df65	`@@ -153,9 +153,14 @@ pub(crate) fn emit_unescape_error(`
		16df65	`EscapeError::NonAsciiCharInByte => {`
		16df65	`assert!(mode.is_bytes());`
		16df65	`let (c, span) = last_char();`
		16df65	`+ let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {`
		16df65	`+ format!(" but is {:?}", c)`
		16df65	`+ } else {`
		16df65	`+ String::new()`
		16df65	`+ };`
		16df65	`handler`
		16df65	`.struct_span_err(span, "non-ASCII character in byte constant")`
		16df65	`- .span_label(span, "byte constant must be ASCII")`
		16df65	`+ .span_label(span, &format!("byte constant must be ASCII{}", postfix))`
		16df65	`.span_suggestion(`
		16df65	`span,`
		16df65	`"use a \\xHH escape for a non-ASCII byte",`
		16df65	`@@ -166,10 +171,15 @@ pub(crate) fn emit_unescape_error(`
		16df65	`}`
		16df65	`EscapeError::NonAsciiCharInByteString => {`
		16df65	`assert!(mode.is_bytes());`
		16df65	`- let (_c, span) = last_char();`
		16df65	`+ let (c, span) = last_char();`
		16df65	`+ let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {`
		16df65	`+ format!(" but is {:?}", c)`
		16df65	`+ } else {`
		16df65	`+ String::new()`
		16df65	`+ };`
		16df65	`handler`
		16df65	`.struct_span_err(span, "raw byte string must be ASCII")`
		16df65	`- .span_label(span, "must be ASCII")`
		16df65	`+ .span_label(span, &format!("must be ASCII{}", postfix))`
		16df65	`.emit();`
		16df65	`}`
		16df65	`EscapeError::OutOfRangeHexEscape => {`
		16df65	`diff --git a/src/test/ui/parser/unicode-control-codepoints.rs b/src/test/ui/parser/unicode-control-codepoints.rs`
		16df65	`new file mode 100644`
		16df65	`index 000000000000..5af0b585a127`
		16df65	`--- /dev/null`
		16df65	`+++ b/src/test/ui/parser/unicode-control-codepoints.rs`
		16df65	`@@ -0,0 +1,39 @@`
		16df65	`+fn main() {`
		16df65	`+ // if access_level != "us‫e‪r" { // Check if admin`
		16df65	`+ //~^ ERROR unicode codepoint changing visible direction of text present in comment`
		16df65	`+ println!("us\u{202B}e\u{202A}r");`
		16df65	`+ println!("{:?}", r#"us\u{202B}e\u{202A}r"#);`
		16df65	`+ println!("{:?}", b"us\u{202B}e\u{202A}r");`
		16df65	`+ //~^ ERROR unicode escape in byte string`
		16df65	`+ //~\| ERROR unicode escape in byte string`
		16df65	`+ println!("{:?}", br##"us\u{202B}e\u{202A}r"##);`
		16df65	`+`
		16df65	`+ println!("{:?}", "/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only ");`
		16df65	`+ //~^ ERROR unicode codepoint changing visible direction of text present in literal`
		16df65	`+`
		16df65	`+ println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##);`
		16df65	`+ //~^ ERROR unicode codepoint changing visible direction of text present in literal`
		16df65	`+ println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only ");`
		16df65	`+ //~^ ERROR non-ASCII character in byte constant`
		16df65	`+ //~\| ERROR non-ASCII character in byte constant`
		16df65	`+ //~\| ERROR non-ASCII character in byte constant`
		16df65	`+ //~\| ERROR non-ASCII character in byte constant`
		16df65	`+ println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##);`
		16df65	`+ //~^ ERROR raw byte string must be ASCII`
		16df65	`+ //~\| ERROR raw byte string must be ASCII`
		16df65	`+ //~\| ERROR raw byte string must be ASCII`
		16df65	`+ //~\| ERROR raw byte string must be ASCII`
		16df65	`+ println!("{:?}", '‮');`
		16df65	`+ //~^ ERROR unicode codepoint changing visible direction of text present in literal`
		16df65	`+}`
		16df65	`+`
		16df65	`+//"/‮ } ⁦if isAdmin⁩ ⁦ begin admins only /"`
		16df65	`+//~^ ERROR unicode codepoint changing visible direction of text present in comment`
		16df65	`+`
		16df65	`+/** '‮'); */fn foo() {}`
		16df65	`+//~^ ERROR unicode codepoint changing visible direction of text present in doc comment`
		16df65	`+`
		16df65	`+/**`
		16df65	`+ *`
		16df65	`+ * '‮'); */fn bar() {}`
		16df65	`+//~^^^ ERROR unicode codepoint changing visible direction of text present in doc comment`
		16df65	`diff --git a/src/test/ui/parser/unicode-control-codepoints.stderr b/src/test/ui/parser/unicode-control-codepoints.stderr`
		16df65	`new file mode 100644`
		16df65	`index 000000000000..650cc74feed0`
		16df65	`--- /dev/null`
		16df65	`+++ b/src/test/ui/parser/unicode-control-codepoints.stderr`
		16df65	`@@ -0,0 +1,184 @@`
		16df65	`+error: unicode escape in byte string`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:6:26`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", b"us\u{202B}e\u{202A}r");`
		16df65	`+ \| ^^^^^^^^ unicode escape in byte string`
		16df65	`+ \|`
		16df65	`+ = help: unicode escape sequences cannot be used as a byte or in a byte string`
		16df65	`+`
		16df65	`+error: unicode escape in byte string`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:6:35`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", b"us\u{202B}e\u{202A}r");`
		16df65	`+ \| ^^^^^^^^ unicode escape in byte string`
		16df65	`+ \|`
		16df65	`+ = help: unicode escape sequences cannot be used as a byte or in a byte string`
		16df65	`+`
		16df65	`+error: non-ASCII character in byte constant`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:16:26`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", b"/* } if isAdmin begin admins only ");`
		16df65	`+ \| ^`
		16df65	`+ \| \|`
		16df65	`+ \| byte constant must be ASCII but is '\u{202e}'`
		16df65	+ \| help: use a \xHH escape for a non-ASCII byte: `\x202E`
		16df65	`+`
		16df65	`+error: non-ASCII character in byte constant`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:16:30`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", b"/* } if isAdmin begin admins only ");`
		16df65	`+ \| ^`
		16df65	`+ \| \|`
		16df65	`+ \| byte constant must be ASCII but is '\u{2066}'`
		16df65	+ \| help: use a \xHH escape for a non-ASCII byte: `\x2066`
		16df65	`+`
		16df65	`+error: non-ASCII character in byte constant`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:16:41`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", b"/* } if isAdmin begin admins only ");`
		16df65	`+ \| ^`
		16df65	`+ \| \|`
		16df65	`+ \| byte constant must be ASCII but is '\u{2069}'`
		16df65	+ \| help: use a \xHH escape for a non-ASCII byte: `\x2069`
		16df65	`+`
		16df65	`+error: non-ASCII character in byte constant`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:16:43`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", b"/* } if isAdmin begin admins only ");`
		16df65	`+ \| ^`
		16df65	`+ \| \|`
		16df65	`+ \| byte constant must be ASCII but is '\u{2066}'`
		16df65	+ \| help: use a \xHH escape for a non-ASCII byte: `\x2066`
		16df65	`+`
		16df65	`+error: raw byte string must be ASCII`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:21:29`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", br##"/* } if isAdmin begin admins only "##);`
		16df65	`+ \| ^ must be ASCII but is '\u{202e}'`
		16df65	`+`
		16df65	`+error: raw byte string must be ASCII`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:21:33`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", br##"/* } if isAdmin begin admins only "##);`
		16df65	`+ \| ^ must be ASCII but is '\u{2066}'`
		16df65	`+`
		16df65	`+error: raw byte string must be ASCII`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:21:44`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", br##"/* } if isAdmin begin admins only "##);`
		16df65	`+ \| ^ must be ASCII but is '\u{2069}'`
		16df65	`+`
		16df65	`+error: raw byte string must be ASCII`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:21:46`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", br##"/* } if isAdmin begin admins only "##);`
		16df65	`+ \| ^ must be ASCII but is '\u{2066}'`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in comment`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:2:5`
		16df65	`+ \|`
		16df65	`+LL \| // if access_level != "user" { // Check if admin`
		16df65	`+ \| ^^^^^^^^^^^^^^^^^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^`
		16df65	`+ \| \| \|\|`
		16df65	`+ \| \| \|'\u{202a}'`
		16df65	`+ \| \| '\u{202b}'`
		16df65	`+ \| this comment contains invisible unicode text flow control codepoints`
		16df65	`+ \|`
		16df65	+ = note: `#[deny(text_direction_codepoint_in_comment)]` on by default
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = help: if their presence wasn't intentional, you can remove them`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in comment`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:30:1`
		16df65	`+ \|`
		16df65	`+LL \| //"/* } if isAdmin begin admins only */"`
		16df65	`+ \| ^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^`
		16df65	`+ \| \| \| \| \|\|`
		16df65	`+ \| \| \| \| \|'\u{2066}'`
		16df65	`+ \| \| \| \| '\u{2069}'`
		16df65	`+ \| \| \| '\u{2066}'`
		16df65	`+ \| \| '\u{202e}'`
		16df65	`+ \| this comment contains invisible unicode text flow control codepoints`
		16df65	`+ \|`
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = help: if their presence wasn't intentional, you can remove them`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in literal`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:11:22`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", "/* } if isAdmin begin admins only ");`
		16df65	`+ \| ^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^`
		16df65	`+ \| \| \| \| \|\|`
		16df65	`+ \| \| \| \| \|'\u{2066}'`
		16df65	`+ \| \| \| \| '\u{2069}'`
		16df65	`+ \| \| \| '\u{2066}'`
		16df65	`+ \| \| '\u{202e}'`
		16df65	`+ \| this literal contains invisible unicode text flow control codepoints`
		16df65	`+ \|`
		16df65	+ = note: `#[deny(text_direction_codepoint_in_literal)]` on by default
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = help: if their presence wasn't intentional, you can remove them`
		16df65	`+help: if you want to keep them but make them visible in your source code, you can escape them`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begin admins only ");`
		16df65	`+ \| ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in literal`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:14:22`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", r##"/* } if isAdmin begin admins only "##);`
		16df65	`+ \| ^^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^`
		16df65	`+ \| \| \| \| \|\|`
		16df65	`+ \| \| \| \| \|'\u{2066}'`
		16df65	`+ \| \| \| \| '\u{2069}'`
		16df65	`+ \| \| \| '\u{2066}'`
		16df65	`+ \| \| '\u{202e}'`
		16df65	`+ \| this literal contains invisible unicode text flow control codepoints`
		16df65	`+ \|`
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = help: if their presence wasn't intentional, you can remove them`
		16df65	`+help: if you want to keep them but make them visible in your source code, you can escape them`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begin admins only "##);`
		16df65	`+ \| ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in literal`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:26:22`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", '');`
		16df65	`+ \| ^-`
		16df65	`+ \| \|\|`
		16df65	`+ \| \|'\u{202e}'`
		16df65	`+ \| this literal contains an invisible unicode text flow control codepoint`
		16df65	`+ \|`
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = help: if their presence wasn't intentional, you can remove them`
		16df65	`+help: if you want to keep them but make them visible in your source code, you can escape them`
		16df65	`+ \|`
		16df65	`+LL \| println!("{:?}", '\u{202e}');`
		16df65	`+ \| ^^^^^^^^`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in doc comment`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:33:1`
		16df65	`+ \|`
		16df65	`+LL \| /** ''); */fn foo() {}`
		16df65	`+ \| ^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint`
		16df65	`+ \|`
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = note: if their presence wasn't intentional, you can remove them`
		16df65	`+ = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'`
		16df65	`+`
		16df65	`+error: unicode codepoint changing visible direction of text present in doc comment`
		16df65	`+ --> $DIR/unicode-control-codepoints.rs:36:1`
		16df65	`+ \|`
		16df65	`+LL \| / /**`
		16df65	`+LL \| \| *`
		16df65	`+LL \| \| * ''); */fn bar() {}`
		16df65	`+ \| \|___________^ this doc comment contains an invisible unicode text flow control codepoint`
		16df65	`+ \|`
		16df65	`+ = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen`
		16df65	`+ = note: if their presence wasn't intentional, you can remove them`
		16df65	`+ = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'`
		16df65	`+`
		16df65	`+error: aborting due to 17 previous errors`
		16df65	`+`
		16df65	`--`
		16df65	`2.31.1`
		16df65

rpms / rust

Source Code

Blame SOURCES/rustc-1.54.0-unicode-control-codepoints.patch