Skip to content

Commit

Permalink
Convert raw strings to non-raw when fixes add escape sequences (astra…
Browse files Browse the repository at this point in the history
  • Loading branch information
ThatsJustCheesy committed Oct 23, 2024
1 parent f335fe4 commit 5696f1e
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 10 deletions.
Binary file not shown.
6 changes: 3 additions & 3 deletions crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use ruff_diagnostics::Diagnostic;
use ruff_python_index::Indexer;
use ruff_python_parser::Tokens;
use ruff_source_file::Locator;
use ruff_text_size::Ranged;

use crate::directives::TodoComment;
use crate::registry::{AsRule, Rule};
Expand Down Expand Up @@ -93,11 +92,12 @@ pub(crate) fn check_tokens(
Rule::InvalidCharacterNul,
Rule::InvalidCharacterZeroWidthSpace,
]) {
let mut last_fstring_start = None;
for token in tokens {
pylint::rules::invalid_string_characters(
&mut diagnostics,
token.kind(),
token.range(),
token,
&mut last_fstring_start,
locator,
);
}
Expand Down
112 changes: 105 additions & 7 deletions crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
use ruff_python_ast::str::Quote;
use ruff_python_ast::StringFlags;
use ruff_python_parser::Token;
use ruff_text_size::Ranged;
use ruff_text_size::{TextLen, TextRange, TextSize};

use ruff_diagnostics::AlwaysFixableViolation;
Expand Down Expand Up @@ -172,19 +176,33 @@ impl AlwaysFixableViolation for InvalidCharacterZeroWidthSpace {
}

/// PLE2510, PLE2512, PLE2513, PLE2514, PLE2515
pub(crate) fn invalid_string_characters(
pub(crate) fn invalid_string_characters<'a>(
diagnostics: &mut Vec<Diagnostic>,
token: TokenKind,
range: TextRange,
token: &'a Token,
last_fstring_start: &mut Option<&'a Token>,
locator: &Locator,
) {
let text = match token {
struct InvalidCharacterDiagnostic {
diagnostic: Diagnostic,
edit: Edit,
}

let kind = token.kind();
let range = token.range();

let text = match kind {
// We can't use the `value` field since it's decoded and e.g. for f-strings removed a curly
// brace that escaped another curly brace, which would gives us wrong column information.
TokenKind::String | TokenKind::FStringMiddle => locator.slice(range),
TokenKind::FStringStart => {
*last_fstring_start = Some(token);
return;
}
_ => return,
};

// Accumulate diagnostics here to postpone generating shared fixes until we know we need them.
let mut new_diagnostics: Vec<InvalidCharacterDiagnostic> = Vec::new();
for (column, match_) in text.match_indices(&['\x08', '\x1A', '\x1B', '\0', '\u{200b}']) {
let c = match_.chars().next().unwrap();
let (replacement, rule): (&str, DiagnosticKind) = match c {
Expand All @@ -201,8 +219,88 @@ pub(crate) fn invalid_string_characters(
let location = range.start() + TextSize::try_from(column).unwrap();
let range = TextRange::at(location, c.text_len());

diagnostics.push(Diagnostic::new(rule, range).with_fix(Fix::safe_edit(
Edit::range_replacement(replacement.to_string(), range),
)));
new_diagnostics.push(InvalidCharacterDiagnostic {
diagnostic: Diagnostic::new(rule, range),
// This is integrated with other fixes and attached to the diagnostic below.
edit: Edit::range_replacement(replacement.to_string(), range),
});
}
if new_diagnostics.is_empty() {
// No issues, nothing to fix.
return;
}

// Convert raw strings to non-raw strings when fixes are applied:
// https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180
let mut string_conversion_edits = Vec::new();
if token.is_raw_string() {
let string_flags = token.string_flags();
let prefix = string_flags.prefix().as_str();

// 1. Remove the raw string prefix.
for (column, match_) in prefix.match_indices(&['r', 'R']) {
let c = match_.chars().next().unwrap();

let entire_string_range = match kind {
TokenKind::String => range,
_ => last_fstring_start.unwrap().range(),
};
let location = entire_string_range.start() + TextSize::try_from(column).unwrap();
let range = TextRange::at(location, c.text_len());

string_conversion_edits.push(Edit::range_deletion(range));
}

// 2. Escape '\' and quote characters inside the string content.
let (content_start, content_end): (TextSize, TextSize) = match kind {
TokenKind::String => (
prefix.text_len() + string_flags.quote_len(),
TextSize::try_from(text.len()).unwrap() - string_flags.quote_len(),
),
_ => (0.into(), text.len().try_into().unwrap()),
};
let string_content = &text[content_start.to_usize()..content_end.to_usize()];
for (column, match_) in string_content.match_indices(&['\\', '\'', '"']) {
let c = match_.chars().next().unwrap();
let replacement: &str = match c {
'\\' => "\\\\",
'\'' | '"' => {
if string_flags.is_triple_quoted() {
continue;
}
match (c, string_flags.quote_style()) {
('\'', Quote::Single) => "\\'",
('"', Quote::Double) => "\\\"",
_ => {
continue;
}
}
}
_ => {
continue;
}
};

let location = range.start() + content_start + TextSize::try_from(column).unwrap();
let range = TextRange::at(location, c.text_len());

string_conversion_edits.push(Edit::range_replacement(replacement.to_string(), range));
}

// 3. Add back '\' characters for line continuation in non-triple-quoted strings.
if !string_flags.is_triple_quoted() {
for (column, _match) in string_content.match_indices("\\\n") {
let location = range.start() + content_start + TextSize::try_from(column).unwrap();
string_conversion_edits.push(Edit::insertion(
"\\n\\".to_string(),
location + TextSize::from(1),
));
}
}
}

for InvalidCharacterDiagnostic { diagnostic, edit } in new_diagnostics {
diagnostics
.push(diagnostic.with_fix(Fix::safe_edits(edit, string_conversion_edits.clone())));
}
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
20 changes: 20 additions & 0 deletions crates/ruff_python_parser/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ impl Token {
self.flags.is_triple_quoted()
}

/// Returns `true` if the current token is a raw string of any kind.
///
/// # Panics
///
/// If it isn't a string or any f-string tokens.
pub const fn is_raw_string(self) -> bool {
assert!(self.is_any_string());
self.flags.is_raw_string()
}

/// Returns the [`Quote`] style for the current string token of any kind.
///
/// # Panics
Expand All @@ -64,6 +74,16 @@ impl Token {
self.flags.quote_style()
}

/// Returns the string flags for the current string token of any kind.
///
/// # Panics
///
/// If it isn't a string or any f-string tokens.
pub fn string_flags(self) -> AnyStringFlags {
assert!(self.is_any_string());
self.flags.as_any_string_flags()
}

/// Returns `true` if this is any kind of string token.
const fn is_any_string(self) -> bool {
matches!(
Expand Down

0 comments on commit 5696f1e

Please sign in to comment.