From 41dfe74e303fdd7bb245ba46d4e82215418427df Mon Sep 17 00:00:00 2001 From: Thomas Hohn Date: Wed, 31 Jan 2024 13:17:07 +0100 Subject: [PATCH] [BUGFIX] Handle if some tags are upper-case and strict-comparison The method getTagContent in class HtmlContentExtractor should also for work for tags that are not lower case. Resolves: #3940 --- Classes/HtmlContentExtractor.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Classes/HtmlContentExtractor.php b/Classes/HtmlContentExtractor.php index 6460bc48ac..49e3e6daee 100644 --- a/Classes/HtmlContentExtractor.php +++ b/Classes/HtmlContentExtractor.php @@ -224,7 +224,8 @@ public function getTagContent(): array foreach ($matches[1] as $key => $tag) { // We don't want to index links auto-generated by the url filter. $pattern = '@(?:http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)[a-zA-Z0-9]+@'; - if ($tag != 'a' || !preg_match($pattern, $matches[2][$key])) { + $tag = strtolower((string)$tag); + if ($tag !== 'a' || !preg_match($pattern, $matches[2][$key])) { $fieldName = $this->tagToFieldMapping[$tag]; $hasContentForFieldName = empty($result[$fieldName]); $separator = ($hasContentForFieldName) ? '' : ' ';