Skip to content

Commit

Permalink
[TASK] Split up ContentParser to multiple classes and interfaces
Browse files Browse the repository at this point in the history
  • Loading branch information
RinyVT committed Jan 6, 2025
1 parent af5a9fc commit d27fb22
Show file tree
Hide file tree
Showing 12 changed files with 250 additions and 132 deletions.
19 changes: 19 additions & 0 deletions Classes/Service/Preview/ContentExtractors/BaseUrlParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

class BaseUrlParser implements BaseUrlParserInterface
{
public function getBaseUrl(mixed $urlParts): string
{
if (!is_array($urlParts)) {
return '://';
}
if ($urlParts['port'] ?? false) {
return (isset($urlParts['scheme']) ? $urlParts['scheme'] . ':' : '') . '//' . ($urlParts['host'] ?? '') . ':' . $urlParts['port'];
}
return (isset($urlParts['scheme']) ? $urlParts['scheme'] . ':' : '') . '//' . ($urlParts['host'] ?? '');
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

interface BaseUrlParserInterface
{
public function getBaseUrl(mixed $urlParts): string;
}
52 changes: 52 additions & 0 deletions Classes/Service/Preview/ContentExtractors/BodyProcessor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

class BodyProcessor implements BodyProcessorInterface
{
public function getBody(string $content): string
{
$body = '';

$bodyFound = preg_match("/<body[^>]*>(.*)<\/body>/is", $content, $matchesBody);

if ($bodyFound) {
$body = $matchesBody[1];

preg_match_all(
'/<!--\s*?TYPO3SEARCH_begin\s*?-->.*?<!--\s*?TYPO3SEARCH_end\s*?-->/mis',
$body,
$indexableContents
);

if (is_array($indexableContents[0]) && !empty($indexableContents[0])) {
$body = implode('', $indexableContents[0]);
}
}

return $this->prepareBody($body);
}

protected function prepareBody(string $body): string
{
$body = $this->stripTagsContent($body, '<script><noscript>');
$body = preg_replace(['/\s?\n\s?/', '/\s{2,}/'], [' ', ' '], $body);
$body = strip_tags((string)$body, '<h1><h2><h3><h4><h5><p><a><img>');

return trim($body);
}

protected function stripTagsContent(string $text, string $tags = ''): string
{
preg_match_all('/<(.+?)[\s]*\/?[\s]*>/si', trim($tags), $foundTags);
$tagsArray = array_unique($foundTags[1]);

if (count($tagsArray) > 0) {
return (string)preg_replace('@<(' . implode('|', $tagsArray) . ')\b.*?>.*?</\1>@si', '', $text);
}

return $text;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

interface BodyProcessorInterface
{
public function getBody(string $content): string;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

class ContentMetadataExtractor implements ContentMetadataExtractorInterface
{
public function getTitle(string $content): string
{
$title = '';
$titleFound = preg_match("/<title[^>]*>(.*?)<\/title>/is", $content, $matchesTitle);

if ($titleFound) {
$title = $matchesTitle[1];
}

return strip_tags(html_entity_decode($title));
}

public function getDescription(string $content): string
{
$metaDescription = '';
$descriptionFound = preg_match(
"/<meta[^>]*name=[\" | \']description[\"|\'][^>]*content=[\"]([^\"]*)[\"][^>]*>/i",
$content,
$matchesDescription
);

if ($descriptionFound) {
$metaDescription = $matchesDescription[1];
}

return strip_tags(html_entity_decode($metaDescription));
}

public function getLocale(string $content): string
{
$locale = 'en';
$localeFound = preg_match('/<html[^>]*lang="([a-z\-A-Z]*)"/is', $content, $matchesLocale);

if ($localeFound) {
[$locale] = explode('-', trim($matchesLocale[1]));
}

return $locale;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

interface ContentMetadataExtractorInterface
{
public function getTitle(string $content): string;
public function getDescription(string $content): string;
public function getLocale(string $content): string;
}
22 changes: 22 additions & 0 deletions Classes/Service/Preview/ContentExtractors/FaviconExtractor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

class FaviconExtractor implements FaviconExtractorInterface
{
public function getFaviconSrc(string $baseUrl, string $content): string
{
$faviconSrc = $baseUrl . '/favicon.ico';
$favIconFound = preg_match('/<link rel=\"shortcut icon\" href=\"([^"]*)\"/i', $content, $matchesFavIcon);
if ($favIconFound) {
$faviconSrc = str_contains($matchesFavIcon[1], '://') ? $matchesFavIcon[1] : $baseUrl . $matchesFavIcon[1];
}
$favIconHeader = @get_headers($faviconSrc);
if (($favIconHeader[0] ?? '') === 'HTTP/1.1 404 Not Found') {
$faviconSrc = '';
}
return $faviconSrc;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

interface FaviconExtractorInterface
{
public function getFaviconSrc(string $baseUrl, string $content): string;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

class TitleConfigurationExtractor implements TitleConfigurationExtractorInterface
{
/**
* @return array{titlePrepend: string, titleAppend: string}
*/
public function getTitleConfiguration(string $content): array
{
$prepend = $append = '';
preg_match('/<meta name=\"x-yoast-title-config\" value=\"([^"]*)\"/i', $content, $matchesTitleConfig);
if (count($matchesTitleConfig) > 1) {
[$prepend, $append] = explode('|||', (string)$matchesTitleConfig[1]);
}
return [
'titlePrepend' => $prepend,
'titleAppend' => $append,
];
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

declare(strict_types=1);

namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors;

interface TitleConfigurationExtractorInterface
{
/**
* @return array{titlePrepend: string, titleAppend: string}
*/
public function getTitleConfiguration(string $content): array;
}
Loading

0 comments on commit d27fb22

Please sign in to comment.