Skip to content
This repository has been archived by the owner on Oct 10, 2019. It is now read-only.

Commit

Permalink
Merge pull request #505 from DonKult/bugfix/dont-abort-on-empty-make-…
Browse files Browse the repository at this point in the history
…title

Miscellaneous fixes from David Kalnischkies
  • Loading branch information
Minoru authored Feb 23, 2017
2 parents abaad74 + a038cd9 commit 1833c2e
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
8 changes: 4 additions & 4 deletions src/tagsouppullparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ tagsouppullparser::event tagsouppullparser::next() {
skip_whitespace();
if (inputstream->eof()) {
current_event = event::END_DOCUMENT;
break;
}
if (c != '<') {
} else if (c != '<') {
handle_text();
break;
} else {
handle_tag();
}
break;
case event::TEXT:
handle_tag();
break;
Expand Down
6 changes: 5 additions & 1 deletion src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,9 @@ std::string utils::make_title(const std::string& const_url) {
//Throw away common webpage suffixes: .html, .php, .aspx, .htm
std::regex rx("\\.html$|\\.htm$|\\.php$|\\.aspx$");
title = std::regex_replace(title,rx,"");
// if there is nothing left, just give up
if (title.empty())
return title;
// 'title with dashes'
std::replace(title.begin(), title.end(), '-', ' ');
std::replace(title.begin(), title.end(), '_', ' ');
Expand All @@ -1074,9 +1077,10 @@ std::string utils::make_title(const std::string& const_url) {
title[0] -= 'a' - 'A';
}
// Un-escape any percent-encoding, e.g. "It%27s%202017%21" -> "It's 2017!"
char* result = xmlURIUnescapeString(title.c_str(), 0, nullptr);
auto const result = xmlURIUnescapeString(title.c_str(), 0, nullptr);
if (result) {
title = result;
xmlFree(result);
}
return title;
}
Expand Down
5 changes: 5 additions & 0 deletions test/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,11 @@ TEST_CASE("utils::make_title extracts possible title from URL") {
auto input = "https://example.com/It%27s%202017%21";
REQUIRE(utils::make_title(input) == "It's 2017!");
}

SECTION("Deal with an empty last component") {
auto input = "https://example.com/?format=rss";
REQUIRE(utils::make_title(input) == "");
}
}

TEST_CASE("remove_soft_hyphens remove all U+00AD characters from a string",
Expand Down

0 comments on commit 1833c2e

Please sign in to comment.