Skip to content

Commit

Permalink
7802 Filter script tags from HTML of CMS pages during indexing
Browse files Browse the repository at this point in the history
The tags themselves have been removed before, but not there content.
  • Loading branch information
avstudnitz committed May 28, 2018
1 parent 06a6992 commit eac7ca3
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions src/app/code/community/IntegerNet/Solr/Model/Bridge/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ public function getTitle()
public function getContent()
{
if (is_null($this->_content)) {
$this->_content = Mage::helper('cms')->getPageTemplateProcessor()->filter($this->_page->getData('content'));
$this->_content = $this->filterHtml(Mage::helper('cms')->getPageTemplateProcessor()->filter($this->_page->getData('content')));
}
return $this->_content;
}

public function getAbstract()
{
$content = trim(strip_tags(html_entity_decode(str_replace(array("\r", "\n", "\t"), ' ', $this->getContent()))));
$content = trim($this->filterHtml(html_entity_decode(str_replace(array("\r", "\n", "\t"), ' ', $this->getContent()))));
if (strlen($content) > self::ABSTRACT_MAX_LENGTH) {
$content = substr($content, 0, self::ABSTRACT_MAX_LENGTH) . '…';
}
Expand Down Expand Up @@ -130,4 +130,16 @@ public function __call($method, $args)
{
return call_user_func_array(array($this->_page, $method), $args);
}

/**
* Remove script tags (including its content) and other tags (keeping their content)
*
* @param string $html
* @return string
*/
private function filterHtml($html)
{
$html = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $html);
return strip_tags($html);
}
}

0 comments on commit eac7ca3

Please sign in to comment.