Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement: better input validation #7668

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions src/Helpers/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,22 @@ public static function removeBackslashes($data)
return $data;
}

/**
* Decode strings recursively to prevent double (or more) encoded strings
*
* @unreleased
*/
public static function recursiveUrlDecode(string $data): string
{
$decoded = urldecode($data);

return $decoded === $data ? $data : self::recursiveUrlDecode($decoded);
glaubersilva marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* The regular expression attempts to capture the basic structure of all data types that can be serialized by PHP.
*
* @unreleased Decode the string and remove any character not allowed in a serialized string
* @since 3.19.3 Support all types of serialized data instead of only objects and arrays
* @since 3.17.2
*/
Expand All @@ -141,9 +154,17 @@ public static function containsSerializedDataRegex($data): bool
return false;
}

$data = self::recursiveUrlDecode($data);

/**
* This regular expression removes any special character that is not:
* a Letter (a-zA-Z), number (0-9), or any of the characters {}, :, ;, ", ', ., [, ], (, ), ,
*/
$data = preg_replace('/[^a-zA-Z0-9:{};"\'.\[\](),]/', '', $data);

$pattern = '/
(a:\d+:\{.*\}) | # Matches arrays (e.g: a:2:{i:0;s:5:"hello";i:1;i:42;})
(O:\d+:"[^"]+":\{.*\}) | # Matches objects (e.g: O:8:"stdClass":1:{s:4:"name";s:5:"James";})
(a:\d+:\{.*}) | # Matches arrays (e.g: a:2:{i:0;s:5:"hello";i:1;i:42;})
(O:\d+:"[^"]+":\{.*}) | # Matches objects (e.g: O:8:"stdClass":1:{s:4:"name";s:5:"James";})
(s:\d+:"[^"]*";) | # Matches strings (e.g: s:5:"hello";)
(i:\d+;) | # Matches integers (e.g: i:42;)
(b:[01];) | # Matches booleans (e.g: b:1; or b:0;)
Expand Down
47 changes: 45 additions & 2 deletions tests/Unit/Helpers/UtilsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,14 @@ public function testMaybeSafeUnserialize($data, bool $expected)
}

/**
* @unreleased Test encoded strings and strings with special characters
* @since 3.19.3 Test all types of serialized data
* @since 3.17.2
*/
public function serializedDataProvider(): array
{
return [
[serialize('bar'), true],
['\\' . serialize('backslash-bypass'), true],
['\\\\' . serialize('double-backslash-bypass'), true],
['foo', false],
[serialize('qux'), true],
['bar', false],
Expand All @@ -103,6 +102,50 @@ public function serializedDataProvider(): array
['Lorem ipsum b:1; dolor sit amet', true], // boolean
['Lorem ipsum d:3.14; dolor sit amet', true], // float
['Lorem ipsum N; dolor sit amet', true], // NULL
// Strings with special characters (e.g: emojis, spaces, control characters) that are not part of a predefined set of safe characters for serialized data structures (used to try to bypass the validations)
[
// emojis bypass sample
'O😼:8:"stdClass":1:{s😼:4:"name";s😼:5:"James";}',
true,
],
[
// spaces bypass sample
'O :8:"stdClass":1:{s :4:"name";s :5:"James";}',
true,
],
// Bypass with simple methods
[
// backslash
'\\' . serialize('backslash-bypass'),
true,
],
[
// double-backslash
'\\\\' . serialize('double-backslash-bypass'),
true,
],
// Bypass with encoding string method - URL-encoded
[
// Single encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";}
'O%3A8%3A%22stdClass%22%3A1%3A%7Bs%3A4%3A%22name%22%3Bs%3A5%3A%22James%22%3B%7D',
true,
],
[
// Double encode for O:8:"stdClass":1:{s:4:"name";s:5:"James";}
'O%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%253A4%253A%2522name%2522%253Bs%253A5%253A%2522James%2522%253B%257D',
true,
],
// Samples using multiple obfuscation techniques together
[
// Single URL-encoded for O😼:8:"stdClass":1:{s😼:4:"name";s😼:5:"James";}
'O%F0%9F%98%BC%3A8%3A%22stdClass%22%3A1%3A%7Bs%F0%9F%98%BC%3A4%3A%22name%22%3Bs%F0%9F%98%BC%3A5%3A%22James%22%3B%7D',
true,
],
[
// Double URL-encoded for O😼:8:"stdClass":1:{s😼:4:"name";s😼:5:"James";}
'O%25F0%259F%2598%25BC%253A8%253A%2522stdClass%2522%253A1%253A%257Bs%25F0%259F%2598%25BC%253A4%253A%2522name%2522%253Bs%25F0%259F%2598%25BC%253A5%253A%2522James%2522%253B%257D',
true,
],
];
}
}
Loading