Skip to content

Commit

Permalink
Merge pull request #3 from Leimi/fix-attributes-parsing
Browse files Browse the repository at this point in the history
Fix parser bug when having "/" char in a xml node attribute
  • Loading branch information
felixgirault authored Jan 10, 2018
2 parents 828b751 + a38422f commit b3438ad
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
28 changes: 28 additions & 0 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ class Parser {
//
const PARSING_SELF_CLOSING_TAG = 4;

//
const PARSING_ATTRIBUTE_VALUE = 5 ;

//
protected $_observers = [];

Expand All @@ -45,6 +48,9 @@ class Parser {
//
protected $_tagName = '';

//
protected $_attributeQuoteChar = '';

//
protected $_state = self::PARSING_TAG_CONTENTS;

Expand Down Expand Up @@ -96,6 +102,10 @@ public function parse($xml) {
case self::PARSING_SELF_CLOSING_TAG:
$this->_parseSelfClosingTag($char);
break;

case self::PARSING_ATTRIBUTE_VALUE:
$this->_parseAttributeValue($char);
break;
}

if (!$this->_continue) {
Expand Down Expand Up @@ -138,6 +148,12 @@ protected function _parseOpeningTag($char) {
*/
protected function _parseTagAttributes($char) {
switch ($char) {
case '"':
case '\'':
$this->_attributeQuoteChar = $char;
$this->_state = self::PARSING_ATTRIBUTE_VALUE;
break;

case '/':
$this->_state = self::PARSING_SELF_CLOSING_TAG;
break;
Expand All @@ -150,6 +166,18 @@ protected function _parseTagAttributes($char) {
}
}

/**
* we are in an attribute value: don't parse anything here
* just go back to the attributes parsing function when not in the value anymore
*/
protected function _parseAttributeValue($char) {
switch ($char) {
case $this->_attributeQuoteChar:
$this->_state = self::PARSING_TAG_ATTRIBUTES;
break;
}
}

/**
*
*/
Expand Down
24 changes: 24 additions & 0 deletions tests/NokogiriTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,30 @@ public function testCutWithAutoClosingTags() {
);
}

public function testAttributeValues() {
$html = <<<HTML
<p><a href="www.attribu.te/with/slash">this is a link</a></p>
HTML;
$expected = <<<HTML
<p><a href="www.attribu.te/with/slash">this</a></p>
HTML;
$this->assertEquals(
$expected,
$this->Nokogiri->cut($html, 4, true)
);

$html = <<<HTML
<p><a title='simple quotes' aria-label="double quotes">this is a link</a></p>
HTML;
$expected = <<<HTML
<p><a title='simple quotes' aria-label="double quotes">this</a></p>
HTML;
$this->assertEquals(
$expected,
$this->Nokogiri->cut($html, 4, true)
);
}

/**
*
*/
Expand Down

0 comments on commit b3438ad

Please sign in to comment.