');
+
+ return trim($body);
+ }
+
+ protected function stripTagsContent(string $text, string $tags = ''): string
+ {
+ preg_match_all('/<(.+?)[\s]*\/?[\s]*>/si', trim($tags), $foundTags);
+ $tagsArray = array_unique($foundTags[1]);
+
+ if (count($tagsArray) > 0) {
+ return (string)preg_replace('@<(' . implode('|', $tagsArray) . ')\b.*?>.*?\1>@si', '', $text);
+ }
+
+ return $text;
+ }
+}
diff --git a/Classes/Service/Preview/ContentExtractors/BodyProcessorInterface.php b/Classes/Service/Preview/ContentExtractors/BodyProcessorInterface.php
new file mode 100644
index 00000000..46d456eb
--- /dev/null
+++ b/Classes/Service/Preview/ContentExtractors/BodyProcessorInterface.php
@@ -0,0 +1,10 @@
+]*>(.*?)<\/title>/is", $content, $matchesTitle);
+
+ if ($titleFound) {
+ $title = $matchesTitle[1];
+ }
+
+ return strip_tags(html_entity_decode($title));
+ }
+
+ public function getDescription(string $content): string
+ {
+ $metaDescription = '';
+ $descriptionFound = preg_match(
+ "/]*name=[\" | \']description[\"|\'][^>]*content=[\"]([^\"]*)[\"][^>]*>/i",
+ $content,
+ $matchesDescription
+ );
+
+ if ($descriptionFound) {
+ $metaDescription = $matchesDescription[1];
+ }
+
+ return strip_tags(html_entity_decode($metaDescription));
+ }
+
+ public function getLocale(string $content): string
+ {
+ $locale = 'en';
+ $localeFound = preg_match('/]*lang="([a-z\-A-Z]*)"/is', $content, $matchesLocale);
+
+ if ($localeFound) {
+ [$locale] = explode('-', trim($matchesLocale[1]));
+ }
+
+ return $locale;
+ }
+}
diff --git a/Classes/Service/Preview/ContentExtractors/ContentMetadataExtractorInterface.php b/Classes/Service/Preview/ContentExtractors/ContentMetadataExtractorInterface.php
new file mode 100644
index 00000000..990842fb
--- /dev/null
+++ b/Classes/Service/Preview/ContentExtractors/ContentMetadataExtractorInterface.php
@@ -0,0 +1,12 @@
+ 1) {
+ [$prepend, $append] = explode('|||', (string)$matchesTitleConfig[1]);
+ }
+ return [
+ 'titlePrepend' => $prepend,
+ 'titleAppend' => $append,
+ ];
+ }
+}
diff --git a/Classes/Service/Preview/ContentExtractors/TitleConfigurationExtractorInterface.php b/Classes/Service/Preview/ContentExtractors/TitleConfigurationExtractorInterface.php
new file mode 100644
index 00000000..3e0cb522
--- /dev/null
+++ b/Classes/Service/Preview/ContentExtractors/TitleConfigurationExtractorInterface.php
@@ -0,0 +1,13 @@
+
*/
public function parse(string $content, string $uriToCheck, int $pageId): array
{
$urlParts = parse_url((string)preg_replace('/\/$/', '', $uriToCheck));
- $baseUrl = $this->getBaseUrl($urlParts);
+ $baseUrl = $this->baseUrlParser->getBaseUrl($urlParts);
$url = $baseUrl . ($urlParts['path'] ?? '');
- $titleConfiguration = $this->getTitleConfiguration($content);
+ $titleConfiguration = $this->titleConfigurationExtractor->getTitleConfiguration($content);
return [
'id' => $pageId,
'url' => $url,
'baseUrl' => $baseUrl,
'slug' => '/',
- 'title' => $this->getTitle($content),
- 'description' => $this->getDescription($content),
- 'locale' => $this->getLocale($content),
- 'body' => $this->getBody($content),
- 'faviconSrc' => $this->getFaviconSrc($baseUrl, $content),
+ 'title' => $this->contentMetadataExtractor->getTitle($content),
+ 'description' => $this->contentMetadataExtractor->getDescription($content),
+ 'locale' => $this->contentMetadataExtractor->getLocale($content),
+ 'body' => $this->bodyProcessor->getBody($content),
+ 'faviconSrc' => $this->faviconExtractor->getFaviconSrc($baseUrl, $content),
'pageTitlePrepend' => $titleConfiguration['titlePrepend'],
'pageTitleAppend' => $titleConfiguration['titleAppend'],
];
}
-
- protected function getBaseUrl(mixed $urlParts): string
- {
- if (!is_array($urlParts)) {
- return '://';
- }
- if ($urlParts['port'] ?? false) {
- return (isset($urlParts['scheme']) ? $urlParts['scheme'] . ':' : '') . '//' . ($urlParts['host'] ?? '') . ':' . $urlParts['port'];
- }
- return (isset($urlParts['scheme']) ? $urlParts['scheme'] . ':' : '') . '//' . ($urlParts['host'] ?? '');
- }
-
- protected function getTitle(string $content): string
- {
- $title = '';
- $titleFound = preg_match("/]*>(.*?)<\/title>/is", $content, $matchesTitle);
-
- if ($titleFound) {
- $title = $matchesTitle[1];
- }
-
- return strip_tags(html_entity_decode($title));
- }
-
- protected function getDescription(string $content): string
- {
- $metaDescription = '';
- $descriptionFound = preg_match(
- "/]*name=[\" | \']description[\"|\'][^>]*content=[\"]([^\"]*)[\"][^>]*>/i",
- $content,
- $matchesDescription
- );
-
- if ($descriptionFound) {
- $metaDescription = $matchesDescription[1];
- }
-
- return strip_tags(html_entity_decode($metaDescription));
- }
-
- protected function getLocale(string $content): string
- {
- $locale = 'en';
- $localeFound = preg_match('/]*lang="([a-z\-A-Z]*)"/is', $content, $matchesLocale);
-
- if ($localeFound) {
- [$locale] = explode('-', trim($matchesLocale[1]));
- }
-
- return $locale;
- }
-
- protected function getBody(string $content): string
- {
- $body = '';
-
- $bodyFound = preg_match("/]*>(.*)<\/body>/is", $content, $matchesBody);
-
- if ($bodyFound) {
- $body = $matchesBody[1];
-
- preg_match_all(
- '/.*?/mis',
- $body,
- $indexableContents
- );
-
- if (is_array($indexableContents[0]) && !empty($indexableContents[0])) {
- $body = implode('', $indexableContents[0]);
- }
- }
-
- return $this->prepareBody($body);
- }
-
- protected function prepareBody(string $body): string
- {
- $body = $this->stripTagsContent($body, '