Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
JanJakes committed Jan 9, 2025
1 parent 90f1f29 commit 68eb288
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 42 deletions.
124 changes: 82 additions & 42 deletions tests/WP_SQLite_Driver_Tests.php
Original file line number Diff line number Diff line change
Expand Up @@ -3033,13 +3033,13 @@ public function testTranslatesUtf8SELECT() {
$this->assertQuery( 'DELETE FROM _options' );
}

public function testTranslateLikeBinaryAndGlob() {
public function testTranslateLikeBinary() {
// Create a temporary table for testing
$this->assertQuery(
"CREATE TABLE _tmp_table (
"CREATE TABLE _tmp_table (
ID INTEGER PRIMARY KEY AUTO_INCREMENT NOT NULL,
name varchar(20) NOT NULL default ''
);"
name varchar(20)
)"
);

// Insert data into the table
Expand All @@ -3052,70 +3052,110 @@ public function testTranslateLikeBinaryAndGlob() {
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special%chars');" );
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special_chars');" );
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special\\chars');" );
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('aste*risk');" );
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('question?mark');" );

// Test case-sensitive LIKE BINARY
// Test exact string
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );

// Test case-sensitive LIKE BINARY with wildcard %
// Test exact string with no matches
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" );
$this->assertCount( 0, $result );

// Test mixed case
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" );
$this->assertCount( 0, $result );

// Test % wildcard
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f%'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );

// Test case-sensitive LIKE BINARY with wildcard _
// Test % wildcard with no matches
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x%'" );
$this->assertCount( 0, $result );

// Test "%" character (not a wildcard)
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'special%chars', $result[0]->name );

// Test _ wildcard
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f_rst'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );

// Test case-insensitive LIKE
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" );
$this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST'
// Test _ wildcard with no matches
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x_yz'" );
$this->assertCount( 0, $result );

// Test mixed case with LIKE BINARY
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" );
$this->assertCount( 0, $result );
// Test "_" character (not a wildcard)
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\_chars'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'special_chars', $result[0]->name );

// Test no matches with LIKE BINARY
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" );
$this->assertCount( 0, $result );
// Test escaping of "*"
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'aste*risk'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'aste*risk', $result[0]->name );

// Test GLOB equivalent for case-sensitive matching with wildcard
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f*'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );
// Test escaping of "*" with no matches
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f*'" );
$this->assertCount( 0, $result );

// Test GLOB with single character wildcard
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f?rst'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );

// Test GLOB with no matches
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'S*'" );
$this->assertCount( 0, $result );
// Test escaping of "?"
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'question?mark'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'question?mark', $result[0]->name );

// Test GLOB case sensitivity with LIKE and GLOB
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'first';" );
$this->assertCount( 1, $result ); // Should only match 'first'
// Test escaping of "?" with no matches
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f?rst'" );
$this->assertCount( 0, $result );

$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'FIRST';" );
$this->assertCount( 1, $result ); // Should only match 'FIRST'
// Test escaping of character class
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '[f]irst'" );
$this->assertCount( 0, $result );

// Test NULL comparison with LIKE BINARY
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first';" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );

$result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL;' );
$this->assertCount( 0, $result ); // NULL comparison should return no results
// Test NULL
$result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL' );
$this->assertCount( 0, $result );

// Test pattern with special characters using LIKE BINARY
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%';" );
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%'" );
$this->assertCount( 4, $result );
$this->assertEquals( '%special%', $result[0]->name );
$this->assertEquals( 'special%chars', $result[1]->name );
$this->assertEquals( 'special_chars', $result[2]->name );
$this->assertEquals( 'specialchars', $result[3]->name );
$this->assertEquals( 'special\chars', $result[3]->name );

// Test escaping - "\t" is a tab character
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\t'" );
$this->assertCount( 0, $result );

// Test escaping - "\\t" is "t" (input resolves to "\t", which LIKE resolves to "t")
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\\\t'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'first', $result[0]->name );

// Test escaping - "\%" is a "%" literal
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'special%chars', $result[0]->name );

// Test escaping - "\\%" is also a "%" literal
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\%chars'" );
$this->assertCount( 1, $result );
$this->assertEquals( 'special%chars', $result[0]->name );

// Test escaping - "\\\%" is "\" and a wildcard
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\\\%chars'" );
$this->assertCount( 0, $result );

// Test LIKE without BINARY
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" );
$this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST'
}

public function testOnConflictReplace() {
Expand Down
52 changes: 52 additions & 0 deletions wp-includes/sqlite-ast/class-wp-sqlite-driver.php
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,14 @@ private function translate( $ast ) {
throw $this->not_supported_exception(
sprintf( 'data type: %s', $child->value )
);
case 'predicateOperations':
$token = $ast->get_child_token();
if ( WP_MySQL_Lexer::LIKE_SYMBOL === $token->id ) {
return $this->translate_like( $ast );
} else if ( WP_MySQL_Lexer::REGEXP_SYMBOL === $token->id ) {
return $this->translate_regexp_functions( $ast );
}
return $this->translate_sequence( $ast->get_children() );
case 'systemVariable':
// @TODO: Emulate some system variables, or use reasonable defaults.
// See: https://dev.mysql.com/doc/refman/8.4/en/server-system-variable-reference.html
Expand All @@ -1336,6 +1344,43 @@ private function translate( $ast ) {
}
}

private function translate_regexp_functions( WP_Parser_Node $node ): string {
$tokens = $node->get_descendant_tokens();
$is_binary = isset($tokens[1]) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id;

Check failure on line 1349 in wp-includes/sqlite-ast/class-wp-sqlite-driver.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces after opening parenthesis; 0 found

Check failure on line 1349 in wp-includes/sqlite-ast/class-wp-sqlite-driver.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces before closing parenthesis; 0 found

/*
* If the query says REGEXP BINARY, the comparison is byte-by-byte
* and letter casing matters – lowercase and uppercase letters are
* represented using different byte codes.
*
* The REGEXP function can't be easily made to accept two
* parameters, so we'll have to use a hack to get around this.
*
* If the first character of the pattern is a null byte, we'll
* remove it and make the comparison case-sensitive. This should
* be reasonably safe since PHP does not allow null bytes in
* regular expressions anyway.
*/
if ( true === $is_binary ) {
return 'REGEXP CHAR(0) || ' . $this->translate( $node->get_child_node() );
}
return 'REGEXP ' . $this->translate( $node->get_child_node() );
}

private function translate_like(WP_Parser_Node $node ): string {

Check failure on line 1370 in wp-includes/sqlite-ast/class-wp-sqlite-driver.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces after opening parenthesis; 0 found
$tokens = $node->get_descendant_tokens();
$is_binary = isset($tokens[1]) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id;

Check failure on line 1372 in wp-includes/sqlite-ast/class-wp-sqlite-driver.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces after opening parenthesis; 0 found

Check failure on line 1372 in wp-includes/sqlite-ast/class-wp-sqlite-driver.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces before closing parenthesis; 0 found

if ( true === $is_binary ) {
$children = $node->get_children();
return sprintf(
"GLOB _helper_like_to_glob_pattern(%s)",

Check failure on line 1377 in wp-includes/sqlite-ast/class-wp-sqlite-driver.php

View workflow job for this annotation

GitHub Actions / Check code style

String "GLOB _helper_like_to_glob_pattern(%s)" does not require double quotes; use single quotes instead
$this->translate( $children[1] )
);
}
return $this->translate_sequence( $node->get_children() );
}

private function translate_token( WP_MySQL_Token $token ) {
switch ( $token->id ) {
case WP_MySQL_Lexer::EOF:
Expand All @@ -1346,6 +1391,13 @@ private function translate_token( WP_MySQL_Token $token ) {
return '"' . trim( $token->value, '`"' ) . '"';
case WP_MySQL_Lexer::AUTO_INCREMENT_SYMBOL:
return 'AUTOINCREMENT';
case WP_MySQL_Lexer::BINARY_SYMBOL:
/*
* There is no "BINARY expr" equivalent in SQLite. We can look for
* the BINARY keyword in particular cases (with REGEXP, LIKE, etc.)
* and then remove it from the translated output here.
*/
return null;
default:
return $token->value;
}
Expand Down
72 changes: 72 additions & 0 deletions wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ public function __construct( $pdo ) {
'utc_time' => 'utc_time',
'utc_timestamp' => 'utc_timestamp',
'version' => 'version',

'_helper_like_to_glob_pattern' => '_helper_like_to_glob_pattern',
);

/**
Expand Down Expand Up @@ -759,4 +761,74 @@ public function utc_timestamp() {
public function version() {
return '5.5';
}

/**
* A helper to covert LIKE pattern to a GLOB pattern for "LIKE BINARY" support.
*
* To convert LIKE pattern to a GLOB pattern, we need to apply the following
* steps, while exactly maintaining the listed order:
*
* 1. Escape "]" as "[]]" to avoid interpreting "[...]" as a character class.
* 2. Escape "*" as "[*]" (must be after 1 to avoid being escaped).
* 3. Escape "?" as "[?]" (must be after 1 to avoid being escaped).
* 4. Unescape "\\" to "\" (must be before .
* 5. Replace "%" with "*" (when not escaped by "\").
* 6. Replace "_" with "?" (when not escaped by "\").
* 7. Unescape "\%" to "%".
* 8. Unescape "\_" to "_".
*
* @TODO: Unescaping backshlashes for a MySQL LIKE is actually more complex
* due to a bug: https://bugs.mysql.com/bug.php?id=84118
* We can consider implementing compatibility with the bug, but it
* affects all LIKE patterns, not just LIKE BINARY.
*
* @param string $pattern
* @return string
*/
public function _helper_like_to_glob_pattern( $pattern ) {
if ( null === $pattern ) {
return null;
}

$pattern = str_replace( ']', '[]]', $pattern );
$pattern = str_replace( '*', '[*]', $pattern );
$pattern = str_replace( '?', '[?]', $pattern );

$glob_pattern = '';
for ( $i = 0; $i < strlen( $pattern ); $i += 1 ) {
$byte1 = $pattern[ $i ];
if ( '\\' === $byte1 ) {
// Add the escape character.
$glob_pattern .= $byte1;

// Special case: "\\%" and "\\_" are equivalent to "\%" and "\_".
// In such case, we need to skip the extra backslash.
$byte2 = $pattern[ $i + 1 ] ?? null;
$byte3 = $pattern[ $i + 2 ] ?? null;
if ( '\\' === $byte2 && ( '%' === $byte3 || '_' === $byte3 ) ) {
$glob_pattern .= $byte3;
$i += 2;
continue;
}

// We're in an escape sequence. Add the next character as it is.
$glob_pattern .= $byte2;
$i += 1;
} elseif ( '%' === $byte1 ) {
$glob_pattern .= '*';
} elseif ( '_' === $byte1 ) {
$glob_pattern .= '?';
} else {
$glob_pattern .= $byte1;
}
}

// 1. Unescape C-style escape sequences.
$glob_pattern = stripcslashes($glob_pattern);

Check failure on line 827 in wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces after opening parenthesis; 0 found

Check failure on line 827 in wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces before closing parenthesis; 0 found

// 2. Unescape LIKE escape sequences.
$glob_pattern = preg_replace('/\\\\(.)/', '$1', $glob_pattern);

Check failure on line 830 in wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces after opening parenthesis; 0 found

Check failure on line 830 in wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php

View workflow job for this annotation

GitHub Actions / Check code style

Expected 1 spaces before closing parenthesis; 0 found

return $glob_pattern;
}
}

0 comments on commit 68eb288

Please sign in to comment.