Skip to content

Commit

Permalink
Do not unescape backslashes in datafusion-cli (#14844)
Browse files Browse the repository at this point in the history
* correctly treat backslash in datafusion-cli

* Remove all cli unescaping

* remove unnessasary check

* refine test

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Lordworms and alamb authored Feb 28, 2025
1 parent 32dab3f commit cf2b7e6
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 62 deletions.
5 changes: 2 additions & 3 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::helper::split_from_semicolon;
use crate::print_format::PrintFormat;
use crate::{
command::{Command, OutputFormat},
helper::{unescape_input, CliHelper},
helper::CliHelper,
object_storage::get_object_store,
print_options::{MaxRows, PrintOptions},
};
Expand Down Expand Up @@ -168,7 +168,7 @@ pub async fn exec_from_repl(
}
}
Ok(line) => {
let lines = split_from_semicolon(line);
let lines = split_from_semicolon(&line);
for line in lines {
rl.add_history_entry(line.trim_end())?;
tokio::select! {
Expand Down Expand Up @@ -211,7 +211,6 @@ pub(super) async fn exec_and_print(
sql: String,
) -> Result<()> {
let now = Instant::now();
let sql = unescape_input(&sql)?;
let task_ctx = ctx.task_ctx();
let dialect = &task_ctx.session_config().options().sql_parser.dialect;
let dialect = dialect_from_str(dialect).ok_or_else(|| {
Expand Down
73 changes: 14 additions & 59 deletions datafusion-cli/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,8 @@ use std::borrow::Cow;

use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};

use datafusion::common::sql_datafusion_err;
use datafusion::error::DataFusionError;
use datafusion::sql::parser::{DFParser, Statement};
use datafusion::sql::sqlparser::dialect::dialect_from_str;
use datafusion::sql::sqlparser::parser::ParserError;

use rustyline::completion::{Completer, FilenameCompleter, Pair};
use rustyline::error::ReadlineError;
Expand Down Expand Up @@ -63,15 +60,6 @@ impl CliHelper {

fn validate_input(&self, input: &str) -> Result<ValidationResult> {
if let Some(sql) = input.strip_suffix(';') {
let sql = match unescape_input(sql) {
Ok(sql) => sql,
Err(err) => {
return Ok(ValidationResult::Invalid(Some(format!(
" 🤔 Invalid statement: {err}",
))))
}
};

let dialect = match dialect_from_str(&self.dialect) {
Some(dialect) => dialect,
None => {
Expand Down Expand Up @@ -166,40 +154,8 @@ impl Validator for CliHelper {

impl Helper for CliHelper {}

/// Unescape input string from readline.
///
/// The data read from stdio will be escaped, so we need to unescape the input before executing the input
pub fn unescape_input(input: &str) -> datafusion::error::Result<String> {
let mut chars = input.chars();

let mut result = String::with_capacity(input.len());
while let Some(char) = chars.next() {
if char == '\\' {
if let Some(next_char) = chars.next() {
// https://static.rust-lang.org/doc/master/reference.html#literals
result.push(match next_char {
'0' => '\0',
'n' => '\n',
'r' => '\r',
't' => '\t',
'\\' => '\\',
_ => {
return Err(sql_datafusion_err!(ParserError::TokenizerError(
format!("unsupported escape char: '\\{}'", next_char)
)))
}
});
}
} else {
result.push(char);
}
}

Ok(result)
}

/// Splits a string which consists of multiple queries.
pub(crate) fn split_from_semicolon(sql: String) -> Vec<String> {
pub(crate) fn split_from_semicolon(sql: &str) -> Vec<String> {
let mut commands = Vec::new();
let mut current_command = String::new();
let mut in_single_quote = false;
Expand Down Expand Up @@ -310,14 +266,13 @@ mod tests {
)?;
assert!(matches!(result, ValidationResult::Valid(None)));

// should be invalid
let result = readline_direct(
Cursor::new(
r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\u{07}');"
.as_bytes()),
&validator,
)?;
assert!(matches!(result, ValidationResult::Invalid(Some(_))));
Cursor::new(
r"select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';".as_bytes(),
),
&validator,
)?;
assert!(matches!(result, ValidationResult::Valid(None)));

Ok(())
}
Expand Down Expand Up @@ -346,34 +301,34 @@ mod tests {
fn test_split_from_semicolon() {
let sql = "SELECT 1; SELECT 2;";
let expected = vec!["SELECT 1;", "SELECT 2;"];
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);

let sql = r#"SELECT ";";"#;
let expected = vec![r#"SELECT ";";"#];
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);

let sql = "SELECT ';';";
let expected = vec!["SELECT ';';"];
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);

let sql = r#"SELECT 1; SELECT 'value;value'; SELECT 1 as "text;text";"#;
let expected = vec![
"SELECT 1;",
"SELECT 'value;value';",
r#"SELECT 1 as "text;text";"#,
];
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);

let sql = "";
let expected: Vec<String> = Vec::new();
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);

let sql = "SELECT 1";
let expected = vec!["SELECT 1;"];
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);

let sql = "SELECT 1; ";
let expected = vec!["SELECT 1;"];
assert_eq!(split_from_semicolon(sql.to_string()), expected);
assert_eq!(split_from_semicolon(sql), expected);
}
}
4 changes: 4 additions & 0 deletions datafusion-cli/tests/cli_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ fn init() {
["--command", "select 1; select 2;", "--format", "json", "-q"],
"[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n"
)]
#[case::exec_backslash(
["--file", "tests/data/backslash.txt", "--format", "json", "-q"],
"[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\\\t\\\")\":\"\\\\t\",\"Utf8(\\\"\\\\0\\\")\":\"\\\\0\",\"Utf8(\\\"\\\\n\\\")\":\"\\\\n\"}]\n"
)]
#[case::exec_from_files(
["--file", "tests/data/sql.txt", "--format", "json", "-q"],
"[{\"Int64(1)\":1}]\n"
Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/tests/data/backslash.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';

0 comments on commit cf2b7e6

Please sign in to comment.