From eac7ca35a4f5d3d910f6f0da112c3a27b46812bc Mon Sep 17 00:00:00 2001 From: Andreas von Studnitz Date: Mon, 28 May 2018 09:27:06 +0200 Subject: [PATCH] 7802 Filter script tags from HTML of CMS pages during indexing The tags themselves have been removed before, but not there content. --- .../IntegerNet/Solr/Model/Bridge/Page.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/app/code/community/IntegerNet/Solr/Model/Bridge/Page.php b/src/app/code/community/IntegerNet/Solr/Model/Bridge/Page.php index dd5b0bc..03a7603 100644 --- a/src/app/code/community/IntegerNet/Solr/Model/Bridge/Page.php +++ b/src/app/code/community/IntegerNet/Solr/Model/Bridge/Page.php @@ -59,14 +59,14 @@ public function getTitle() public function getContent() { if (is_null($this->_content)) { - $this->_content = Mage::helper('cms')->getPageTemplateProcessor()->filter($this->_page->getData('content')); + $this->_content = $this->filterHtml(Mage::helper('cms')->getPageTemplateProcessor()->filter($this->_page->getData('content'))); } return $this->_content; } public function getAbstract() { - $content = trim(strip_tags(html_entity_decode(str_replace(array("\r", "\n", "\t"), ' ', $this->getContent())))); + $content = trim($this->filterHtml(html_entity_decode(str_replace(array("\r", "\n", "\t"), ' ', $this->getContent())))); if (strlen($content) > self::ABSTRACT_MAX_LENGTH) { $content = substr($content, 0, self::ABSTRACT_MAX_LENGTH) . '…'; } @@ -130,4 +130,16 @@ public function __call($method, $args) { return call_user_func_array(array($this->_page, $method), $args); } + + /** + * Remove script tags (including its content) and other tags (keeping their content) + * + * @param string $html + * @return string + */ + private function filterHtml($html) + { + $html = preg_replace('#(.*?)#is', '', $html); + return strip_tags($html); + } } \ No newline at end of file