From af3a695bcfb19b8e70c57c51f3d16d822062f7ac Mon Sep 17 00:00:00 2001 From: Jaroslav Hanslik Date: Thu, 11 Feb 2016 22:10:45 +0100 Subject: [PATCH] Improved Css::convertStyleToInline() --- Jyxo/Css.php | 480 +++++++++++++----- tests/Jyxo/CssTest.php | 2 +- tests/files/css/convertstyle-1-expected.html | 366 ++++++++++++- tests/files/css/convertstyle-1.html | 354 ++++++++++++- tests/files/css/convertstyle-10-expected.html | 24 + tests/files/css/convertstyle-10.html | 24 + tests/files/css/convertstyle-4-expected.html | 80 +-- tests/files/css/convertstyle-4.html | 2 +- tests/files/css/convertstyle-5-expected.html | 42 +- tests/files/css/convertstyle-5.html | 42 +- tests/files/css/convertstyle-6-expected.html | 378 +++++++------- tests/files/css/convertstyle-6.html | 10 +- tests/files/css/convertstyle-7-expected.html | 15 + tests/files/css/convertstyle-7.html | 15 + tests/files/css/convertstyle-8-expected.html | 14 + tests/files/css/convertstyle-8.html | 14 + tests/files/css/convertstyle-9-expected.html | 15 + tests/files/css/convertstyle-9.html | 15 + 18 files changed, 1526 insertions(+), 366 deletions(-) create mode 100644 tests/files/css/convertstyle-10-expected.html create mode 100644 tests/files/css/convertstyle-10.html create mode 100644 tests/files/css/convertstyle-7-expected.html create mode 100644 tests/files/css/convertstyle-7.html create mode 100644 tests/files/css/convertstyle-8-expected.html create mode 100644 tests/files/css/convertstyle-8.html create mode 100644 tests/files/css/convertstyle-9-expected.html create mode 100644 tests/files/css/convertstyle-9.html diff --git a/Jyxo/Css.php b/Jyxo/Css.php index b2f455a..d208a2f 100644 --- a/Jyxo/Css.php +++ b/Jyxo/Css.php @@ -129,7 +129,18 @@ public static function minify(string $css): string * * a.icon.small {...} * * a#remove.icon.small {...} * * a img {...} + * * a > img {...} + * * li + li {...} + * * a#remove.icon.small img {...} * * h1, h2 {...} + * * p a:first-child {...} + * * p a:last-child {...} + * * p a:nth-child(...) {...} + * * p a:nth-last-child(...) {...} + * * p a:first-of-type {...} + * * p a:last-of-type {...} + * * p a:nth-of-type(...) {...} + * * p a:nth-last-of-type(...) {...} * * a:link {...} - converts to a {...} * * @param string $html Processed HTML source @@ -146,131 +157,338 @@ public static function convertStyleToInline(string $html): string } // Parse the HTML source - preg_match_all('~(?:<\\w+[^>]*(?: /)?>)|(?:)|(?:]+>)|(?:[^<]+)~', $html, $matches); + preg_match_all('~(?:<\\w+[^>]*(?:\\s*/)?>)|(?:)|(?:)|(?:)|(?:]+>)|(?:[^<]+)~s', $html, $matches); + + $level = 0; $path = []; - $html = ''; - $inStyle = false; - foreach ($matches[0] as $htmlPart) { - // Skip ~s', $html, $styles)) { return []; @@ -292,8 +513,9 @@ private static function parseStyle(string $html): array $cssList = []; foreach ($styles[1] as $style) { - // Remove CDATA and HTML comments + // Remove CDATA and comments $style = str_replace(['', ''], '', $style); + $style = preg_replace('~/\*.*\*/~sU', '', $style); // Optimize the parsed definitions $style = self::minify($style); @@ -302,15 +524,15 @@ private static function parseStyle(string $html): array continue; } - // Replace quotes with apostrophes - $style = str_replace('"', "'", $style); + // Replace double quotes with single quotes + $style = strtr($style, ['"' => "'", "\\'" => "'"]); // Remove the last empty part $definitions = explode('}', $style, -1); foreach ($definitions as $definition) { // Allows only supported selectors with valid rules - if (!preg_match('~^(?:(?:(?:[\-_\\w#.:]+)\\s?)+,?)+{(?:[-\\w]+:[^;]+[;]?)+$~', $definition)) { + if (!preg_match('~^(?:(?:(?:(?:[#.]?[-\\w]+)+(?::[-\\w\(\)+]+)?)[\\s>+]*)+,?)+{(?:[-\\w]+:[^;]+[;]?)+$~', $definition)) { continue; } @@ -320,12 +542,37 @@ private static function parseStyle(string $html): array $part = str_replace(':link', '', $part); $parsedSelector = []; - foreach (explode(' ', $part) as $selectorPart) { - // If no tag name was given use a fake one - if (('.' === $selectorPart[0]) || ('#' === $selectorPart[0])) { - $selectorPart = ' ' . $selectorPart; + $type = null; + + if (!preg_match_all('~((?:[#.]?[-\\w]+)+(?::[-\\w\(\)+]+)?)|([+>\\s])~', $part, $matches, PREG_SET_ORDER)) { + continue; + } + + foreach ($matches as $match) { + if (isset($match[2])) { + switch ($match[2]) { + case '+': + $type = 'sibling'; + break; + case '>': + $type = 'child'; + break; + default: + $type = 'descendant'; + break; + } + continue; } + $selectorPart = $match[1]; + + if (false !== strpos($selectorPart, ':')) { + list($selectorPart, $pseudoClass) = explode(':', $selectorPart, 2); + // There can be multiple pseudo-classes + $pseudoClass = explode(':', $pseudoClass); + } else { + $pseudoClass = []; + } if (false !== strpos($selectorPart, '.')) { list($selectorPart, $class) = explode('.', $selectorPart, 2); // There can be multiple classes @@ -336,14 +583,19 @@ private static function parseStyle(string $html): array if (false !== strpos($selectorPart, '#')) { list($selectorPart, $id) = explode('#', $selectorPart, 2); } else { - $id = ''; + $id = null; + } + $tag = strtolower(trim($selectorPart)); + if ($tag === '') { + $tag = null; } - $tag = trim($selectorPart); $parsedSelector[] = [ - 'tag' => strtolower($tag), + 'type' => $type, + 'tag' => $tag, 'id' => $id, - 'class' => $class + 'class' => $class, + 'pseudoClass' => $pseudoClass, ]; } diff --git a/tests/Jyxo/CssTest.php b/tests/Jyxo/CssTest.php index e170949..7efe91d 100644 --- a/tests/Jyxo/CssTest.php +++ b/tests/Jyxo/CssTest.php @@ -172,7 +172,7 @@ public function testMinify() */ public function testConvertStyleToInline() { - $testCount = 6; + $testCount = 10; for ($i = 1; $i <= $testCount; $i++) { $this->assertStringEqualsFile( diff --git a/tests/files/css/convertstyle-1-expected.html b/tests/files/css/convertstyle-1-expected.html index 07cac12..23db705 100644 --- a/tests/files/css/convertstyle-1-expected.html +++ b/tests/files/css/convertstyle-1-expected.html @@ -1,9 +1,15 @@ + -

H1

+

H1

- Link - - - + Link + + + +

-

H2

-

Text text text

+

H2

+

+ First + Second + Third +

+

+ First + Second + Third +

+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+ +
+ Span 1 + Strong 1 + Span 2 + Strong 2 +
+ +
+ Span 1 + Strong 1 + Span 2 + Strong 2 +
+ +
+ Span 1 + Strong 1 + Span 2 + Strong 2 + Span 3 + Strong 3 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
- \ No newline at end of file + diff --git a/tests/files/css/convertstyle-1.html b/tests/files/css/convertstyle-1.html index fb85b50..362235c 100644 --- a/tests/files/css/convertstyle-1.html +++ b/tests/files/css/convertstyle-1.html @@ -1,9 +1,15 @@ +

H1

@@ -28,8 +81,301 @@

H1

+

H2

-

Text text text

+

+ First + Second + Third +

+

+ First + Second + Third +

+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +
+ +
+ Span 1 + Strong 1 + Span 2 + Strong 2 +
+ +
+ Span 1 + Strong 1 + Span 2 + Strong 2 +
+ +
+ Span 1 + Strong 1 + Span 2 + Strong 2 + Span 3 + Strong 3 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
+ +
+ 1 + 1 + 2 + 2 + 3 + 3 + 4 + 4 + 5 + 5 + 6 + 6 + 7 + 7 + 8 + 8 + 9 + 9 + 10 + 10 + 11 + 11 + 12 + 12 + 13 + 13 + 14 + 14 + 15 + 15 + 16 + 16 + 17 + 17 + 18 + 18 + 19 + 19 + 20 + 20 +
- \ No newline at end of file + diff --git a/tests/files/css/convertstyle-10-expected.html b/tests/files/css/convertstyle-10-expected.html new file mode 100644 index 0000000..f1ba7fd --- /dev/null +++ b/tests/files/css/convertstyle-10-expected.html @@ -0,0 +1,24 @@ + + + + + +
1
+
2
+
3 +
31
+ XX +
32
+ XX +
33 +
331
+
332
+
333
+
+
+ + diff --git a/tests/files/css/convertstyle-10.html b/tests/files/css/convertstyle-10.html new file mode 100644 index 0000000..a73d11a --- /dev/null +++ b/tests/files/css/convertstyle-10.html @@ -0,0 +1,24 @@ + + + + + +
1
+
2
+
3 +
31
+ XX +
32
+ XX +
33 +
331
+
332
+
333
+
+
+ + diff --git a/tests/files/css/convertstyle-4-expected.html b/tests/files/css/convertstyle-4-expected.html index cdfd193..2337ac0 100644 --- a/tests/files/css/convertstyle-4-expected.html +++ b/tests/files/css/convertstyle-4-expected.html @@ -2,8 +2,8 @@ - - + +
Get i.t going ea_sy and sw,ift wit+h VP-Rx Oil gel.

© 2008 Microsofts | Unsubscuribe | Morye Newsletters | Pyrivacy

Micrrowsoft Corwporration, One Micwrrosoft Way, Redmond, WA 78345
\ No newline at end of file + + + + + + + +
+ Get i.t going ea_sy and sw,ift wit+h VP-Rx Oil gel.

© 2008 Microsofts | Unsubscuribe | Morye Newsletters | Pyrivacy

Micrrowsoft Corwporration, One Micwrrosoft Way, Redmond, WA 78345 +
+ + diff --git a/tests/files/css/convertstyle-5.html b/tests/files/css/convertstyle-5.html index 40a4d48..2acae78 100644 --- a/tests/files/css/convertstyle-5.html +++ b/tests/files/css/convertstyle-5.html @@ -1 +1,41 @@ -
Get i.t going ea_sy and sw,ift wit+h VP-Rx Oil gel.

© 2008 Microsofts | Unsubscuribe | Morye Newsletters | Pyrivacy

Micrrowsoft Corwporration, One Micwrrosoft Way, Redmond, WA 78345
\ No newline at end of file + + + + + + + +
+ Get i.t going ea_sy and sw,ift wit+h VP-Rx Oil gel.

© 2008 Microsofts | Unsubscuribe | Morye Newsletters | Pyrivacy

Micrrowsoft Corwporration, One Micwrrosoft Way, Redmond, WA 78345 +
+ + diff --git a/tests/files/css/convertstyle-6-expected.html b/tests/files/css/convertstyle-6-expected.html index 8432bee..9c0d277 100644 --- a/tests/files/css/convertstyle-6-expected.html +++ b/tests/files/css/convertstyle-6-expected.html @@ -200,105 +200,105 @@ --> - -
+ +
-
- TN.cz - Nova.cz - Blog.cz - Galerie.cz - Forum.cz - jyxo.cz - vybereme.cz + -

Denní přehled novinek - Úterý 22. prosince

+

Denní přehled novinek - Úterý 22. prosince

-