diff --git a/Parsedown.php b/Parsedown.php index 3232a1fb4..b5f8005c6 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -182,29 +182,17 @@ protected function linesElements(array $lines) continue; } - if (strpos($line, "\t") !== false) + while (($beforeTab = strstr($line, "\t", true)) !== false) { - $parts = explode("\t", $line); + $shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4; - $line = $parts[0]; - - unset($parts[0]); - - foreach ($parts as $part) - { - $shortage = 4 - mb_strlen($line, 'utf-8') % 4; - - $line .= str_repeat(' ', $shortage); - $line .= $part; - } + $line = $beforeTab + . str_repeat(' ', $shortage) + . substr($line, strlen($beforeTab) + 1) + ; } - $indent = 0; - - while (isset($line[$indent]) and $line[$indent] === ' ') - { - $indent ++; - } + $indent = strspn($line, ' '); $text = $indent > 0 ? substr($line, $indent) : $line; @@ -216,7 +204,8 @@ protected function linesElements(array $lines) if (isset($CurrentBlock['continuable'])) { - $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Continue'; + $Block = $this->$methodName($Line, $CurrentBlock); if (isset($Block)) { @@ -228,7 +217,8 @@ protected function linesElements(array $lines) { if ($this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } } } @@ -254,7 +244,7 @@ protected function linesElements(array $lines) foreach ($blockTypes as $blockType) { - $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); + $Block = $this->{"block$blockType"}($Line, $CurrentBlock); if (isset($Block)) { @@ -309,7 +299,8 @@ protected function linesElements(array $lines) if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } # ~ @@ -336,12 +327,12 @@ protected function extractElement(array $Component) protected function isBlockContinuable($Type) { - return method_exists($this, 'block'.$Type.'Continue'); + return method_exists($this, 'block' . $Type . 'Continue'); } protected function isBlockCompletable($Type) { - return method_exists($this, 'block'.$Type.'Complete'); + return method_exists($this, 'block' . $Type . 'Complete'); } # @@ -393,15 +384,6 @@ protected function blockCodeContinue($Line, $Block) } } - protected function blockCodeComplete($Block) - { - $text = $Block['element']['element']['text']; - - $Block['element']['element']['text'] = $text; - - return $Block; - } - # # Comment @@ -452,33 +434,42 @@ protected function blockCommentContinue($Line, array $Block) protected function blockFencedCode($Line) { - if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) + $marker = $Line['text'][0]; + + $openerLength = strspn($Line['text'], $marker); + + if ($openerLength < 3) { - $Element = array( - 'name' => 'code', - 'text' => '', - ); + return; + } - if (isset($matches[2])) - { - $class = 'language-'.$matches[2]; + $infostring = trim(substr($Line['text'], $openerLength), "\t "); - $Element['attributes'] = array( - 'class' => $class, - ); - } + if (strpos($infostring, '`') !== false) + { + return; + } - $Block = array( - 'char' => $Line['text'][0], - 'openerLength' => mb_strlen($matches[1]), - 'element' => array( - 'name' => 'pre', - 'element' => $Element, - ), - ); + $Element = array( + 'name' => 'code', + 'text' => '', + ); - return $Block; + if ($infostring !== '') + { + $Element['attributes'] = array('class' => "language-$infostring"); } + + $Block = array( + 'char' => $marker, + 'openerLength' => $openerLength, + 'element' => array( + 'name' => 'pre', + 'element' => $Element, + ), + ); + + return $Block; } protected function blockFencedCodeContinue($Line, $Block) @@ -495,9 +486,8 @@ protected function blockFencedCodeContinue($Line, $Block) unset($Block['interrupted']); } - if ( - preg_match('/^(['.preg_quote($Block['char']).']{3,})[ ]*$/', $Line['text'], $matches) - and mb_strlen($matches[1]) >= $Block['openerLength'] + if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength'] + and chop(substr($Line['text'], $len), ' ') === '' ) { $Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1); @@ -506,16 +496,7 @@ protected function blockFencedCodeContinue($Line, $Block) return $Block; } - $Block['element']['element']['text'] .= "\n".$Line['body']; - - return $Block; - } - - protected function blockFencedCodeComplete($Block) - { - $text = $Block['element']['element']['text']; - - $Block['element']['element']['text'] = $text; + $Block['element']['element']['text'] .= "\n" . $Line['body']; return $Block; } @@ -525,12 +506,7 @@ protected function blockFencedCodeComplete($Block) protected function blockHeader($Line) { - $level = 1; - - while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') - { - $level ++; - } + $level = strspn($Line['text'], '#'); if ($level > 6) { @@ -565,9 +541,9 @@ protected function blockHeader($Line) protected function blockList($Line, array $CurrentBlock = null) { - list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}[.\)]'); + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]'); - if (preg_match('/^('.$pattern.'([ ]+|$))(.*)/', $Line['text'], $matches)) + if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches)) { $contentIndent = strlen($matches[2]); @@ -582,19 +558,22 @@ protected function blockList($Line, array $CurrentBlock = null) $matches[1] .= ' '; } + $markerWithoutWhitespace = strstr($matches[1], ' ', true); + $Block = array( 'indent' => $Line['indent'], 'pattern' => $pattern, 'data' => array( 'type' => $name, 'marker' => $matches[1], - 'markerType' => ($name === 'ul' ? strstr($matches[1], ' ', true) : substr(strstr($matches[1], ' ', true), -1)), + 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)), ), 'element' => array( 'name' => $name, 'elements' => array(), ), ); + $Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/'); if ($name === 'ol') { @@ -642,10 +621,10 @@ protected function blockListContinue($Line, array $Block) and ( ( $Block['data']['type'] === 'ol' - and preg_match('/^[0-9]+'.preg_quote($Block['data']['markerType']).'(?:[ ]+(.*)|$)/', $Line['text'], $matches) + and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) ) or ( $Block['data']['type'] === 'ul' - and preg_match('/^'.preg_quote($Block['data']['markerType']).'(?:[ ]+(.*)|$)/', $Line['text'], $matches) + and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) ) ) ) { @@ -707,7 +686,7 @@ protected function blockListContinue($Line, array $Block) if ( ! isset($Block['interrupted'])) { - $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}/', '', $Line['body']); + $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']); $Block['li']['handler']['argument'] []= $text; @@ -736,7 +715,7 @@ protected function blockListComplete(array $Block) protected function blockQuote($Line) { - if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block = array( 'element' => array( @@ -760,7 +739,7 @@ protected function blockQuoteContinue($Line, array $Block) return; } - if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block['element']['handler']['argument'] []= $matches[1]; @@ -780,7 +759,9 @@ protected function blockQuoteContinue($Line, array $Block) protected function blockRule($Line) { - if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) + $marker = $Line['text'][0]; + + if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '') { $Block = array( 'element' => array( @@ -802,10 +783,8 @@ protected function blockSetextHeader($Line, array $Block = null) return; } - if ( - chop(chop($Line['text'], ' '), $Line['text'][0]) === '' - and $Line['indent'] < 4 - ) { + if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '') + { $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; return $Block; @@ -822,7 +801,7 @@ protected function blockMarkup($Line) return; } - if (preg_match('/^<[\/]?+(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches)) { $element = strtolower($matches[1]); @@ -850,7 +829,7 @@ protected function blockMarkupContinue($Line, array $Block) return; } - $Block['element']['rawHtml'] .= "\n".$Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; return $Block; } @@ -860,8 +839,9 @@ protected function blockMarkupContinue($Line, array $Block) protected function blockReference($Line) { - if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) - { + if (strpos($Line['text'], ']') !== false + and preg_match('/^\[(.+?)\]:[ ]*+?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches) + ) { $id = strtolower($matches[1]); $Data = array( @@ -970,7 +950,7 @@ protected function blockTable($Line, array $Block = null) $alignment = $alignments[$index]; $HeaderElement['attributes'] = array( - 'style' => 'text-align: '.$alignment.';', + 'style' => "text-align: $alignment;", ); } @@ -1021,7 +1001,7 @@ protected function blockTableContinue($Line, array $Block) $row = trim($row); $row = trim($row, '|'); - preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); $cells = array_slice($matches[0], 0, count($Block['alignments'])); @@ -1041,7 +1021,7 @@ protected function blockTableContinue($Line, array $Block) if (isset($Block['alignments'][$index])) { $Element['attributes'] = array( - 'style' => 'text-align: '.$Block['alignments'][$index].';', + 'style' => 'text-align: ' . $Block['alignments'][$index] . ';', ); } @@ -1115,7 +1095,7 @@ protected function paragraphContinue($Line, array $Block) # ~ # - public function line($text, $nonNestables=array()) + public function line($text, $nonNestables = array()) { return $this->elements($this->lineElements($text, $nonNestables)); } @@ -1124,13 +1104,18 @@ protected function lineElements($text, $nonNestables = array()) { $Elements = array(); + $nonNestables = (empty($nonNestables) + ? array() + : array_combine($nonNestables, $nonNestables) + ); + # $excerpt is based on the first occurrence of a marker while ($excerpt = strpbrk($text, $this->inlineMarkerList)) { $marker = $excerpt[0]; - $markerPosition = strpos($text, $marker); + $markerPosition = strlen($text) - strlen($excerpt); $Excerpt = array('text' => $excerpt, 'context' => $text); @@ -1138,12 +1123,12 @@ protected function lineElements($text, $nonNestables = array()) { # check to see if the current inline type is nestable in the current context - if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) + if (isset($nonNestables[$inlineType])) { continue; } - $Inline = $this->{'inline'.$inlineType}($Excerpt); + $Inline = $this->{"inline$inlineType"}($Excerpt); if ( ! isset($Inline)) { @@ -1166,10 +1151,11 @@ protected function lineElements($text, $nonNestables = array()) # cause the new element to 'inherit' our non nestables - foreach ($nonNestables as $non_nestable) - { - $Inline['element']['nonNestables'][] = $non_nestable; - } + + $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) + ? array_merge($Inline['element']['nonNestables'], $nonNestables) + : $nonNestables + ; # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); @@ -1200,14 +1186,13 @@ protected function lineElements($text, $nonNestables = array()) $InlineText = $this->inlineText($text); $Elements[] = $InlineText['element']; - $Elements = array_map( - function ($Element) { - $Element['autobreak'] = isset($Element['autobreak']) - ? $Element['autobreak'] : false; - return $Element; - }, - $Elements - ); + foreach ($Elements as &$Element) + { + if ( ! isset($Element['autobreak'])) + { + $Element['autobreak'] = false; + } + } return $Elements; } @@ -1220,33 +1205,17 @@ protected function inlineText($text) { $Inline = array( 'extent' => strlen($text), - 'element' => array( - 'elements' => array(), - ), + 'element' => array(), ); - if ($this->breaksEnabled) - { - $Inline['element']['elements'] = self::pregReplaceElements( - '/[ ]*\n/', - array( - array('name' => 'br'), - array('text' => "\n"), - ), - $text - ); - } - else - { - $Inline['element']['elements'] = self::pregReplaceElements( - '/(?:[ ][ ]+|[ ]*\\\\)\n/', - array( - array('name' => 'br'), - array('text' => "\n"), - ), - $text - ); - } + $safeText = self::escape($text, true); + + $Inline['element']['rawHtml'] = preg_replace( + $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', + "
\n", + $safeText + ); + $Inline['element']['allowRawHtmlInSafeMode'] = true; return $Inline; } @@ -1255,10 +1224,10 @@ protected function inlineCode($Excerpt) { $marker = $Excerpt['text'][0]; - if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), @@ -1284,7 +1253,7 @@ protected function inlineEmailTag($Excerpt) if ( ! isset($matches[2])) { - $url = 'mailto:' . $url; + $url = "mailto:$url"; } return array( @@ -1414,7 +1383,7 @@ protected function inlineLink($Excerpt) return; } - if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) { $Element['attributes']['href'] = $matches[1]; @@ -1463,7 +1432,7 @@ protected function inlineMarkup($Excerpt) return; } - if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches)) { return array( 'element' => array('rawHtml' => $matches[0]), @@ -1471,7 +1440,7 @@ protected function inlineMarkup($Excerpt) ); } - if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) { return array( 'element' => array('rawHtml' => $matches[0]), @@ -1479,7 +1448,7 @@ protected function inlineMarkup($Excerpt) ); } - if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches)) { return array( 'element' => array('rawHtml' => $matches[0]), @@ -1490,10 +1459,11 @@ protected function inlineMarkup($Excerpt) protected function inlineSpecialCharacter($Excerpt) { - if (preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches)) - { + if ($Excerpt['text'][1] !== ' ' and strpos($Excerpt['text'], ';') !== false + and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches) + ) { return array( - 'element' => array('rawHtml' => '&'.$matches[1].';'), + 'element' => array('rawHtml' => '&' . $matches[1] . ';'), 'extent' => strlen($matches[0]), ); } @@ -1531,8 +1501,9 @@ protected function inlineUrl($Excerpt) return; } - if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) - { + if (strpos($Excerpt['context'], 'http') !== false + and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE) + ) { $url = $matches[0][0]; $Inline = array( @@ -1553,7 +1524,7 @@ protected function inlineUrl($Excerpt) protected function inlineUrlTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches)) { $url = $matches[1]; @@ -1611,9 +1582,9 @@ protected function handle(array $Element) { $Element = $this->handle($Element); } - } - unset($Element['handler']); + unset($Element['handler']); + } return $Element; } @@ -1696,7 +1667,7 @@ protected function element(array $Element) if ($hasName) { - $markup .= '<'.$Element['name']; + $markup .= '<' . $Element['name']; if (isset($Element['attributes'])) { @@ -1707,7 +1678,7 @@ protected function element(array $Element) continue; } - $markup .= ' '.$name.'="'.self::escape($value).'"'; + $markup .= " $name=\"".self::escape($value).'"'; } } } @@ -1754,7 +1725,7 @@ protected function element(array $Element) } } - $markup .= $hasName ? '' : ''; + $markup .= $hasName ? '' : ''; } elseif ($hasName) { @@ -1777,8 +1748,8 @@ protected function elements(array $Elements) continue; } - $autoBreakNext = (isset($Element['autobreak']) && $Element['autobreak'] - || ! isset($Element['autobreak']) && isset($Element['name']) + $autoBreakNext = (isset($Element['autobreak']) + ? $Element['autobreak'] : isset($Element['name']) ); // (autobreak === false) covers both sides of an element $autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext; @@ -1959,8 +1930,8 @@ static function instance($name = 'default') ); protected $StrongRegex = array( - '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', - '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us', ); protected $EmRegex = array( @@ -1968,7 +1939,7 @@ static function instance($name = 'default') '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', ); - protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+'; protected $voidElements = array( 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',