Skip to content
forked from BOINC/boinc

Commit

Permalink
Fix to xml_unescape function for multiple escape values.
Browse files Browse the repository at this point in the history
Added unit tests for parse.cpp to prove xml_unescape is working properly.
  • Loading branch information
Keith Uplinger committed May 29, 2019
1 parent be63e62 commit 4928cc9
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 7 deletions.
35 changes: 28 additions & 7 deletions lib/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,9 @@ void xml_unescape(char* buf) {
char* out = buf;
char* in = buf;
char* p;
bool goodescape;
while (*in) {
goodescape = false;
if (*in != '&') { // avoid strncmp's if possible
*out++ = *in++;
} else if (!strncmp(in, "<", 4)) {
Expand All @@ -412,10 +414,10 @@ void xml_unescape(char* buf) {
} else if (!strncmp(in, ">", 4)) {
*out++ = '>';
in += 4;
} else if (!strncmp(in, """, 4)) {
} else if (!strncmp(in, """, 6)) {
*out++ = '"';
in += 6;
} else if (!strncmp(in, "'", 4)) {
} else if (!strncmp(in, "'", 6)) {
*out++ = '\'';
in += 6;
} else if (!strncmp(in, "&", 5)) {
Expand All @@ -428,14 +430,33 @@ void xml_unescape(char* buf) {
*out++ = '\n';
in += 5;
} else if (!strncmp(in, "&#", 2)) {
//If escape is poorly formed or outside of char size, then print as is.
in += 2;
char c = atoi(in);
*out++ = c;
p = strchr(in, ';');
if (p) {
in = p+1;
if (!p || *in == ';') { //No end semicolon found or it was formatted as &#;
*out++ = '&';
*out++ = '#';
} else {
while (isdigit(*in)) in++;
//Check that escape is formed correctly
for (unsigned int i = 0; i < 4 || i < strlen(in); i++) {
if (!isdigit(*(in + i)) && *(in + i) != ';') {
//Found something other than a single digit.
break;
}
if (*(in + i) == ';') {
goodescape = true;
break;
}
}
int ascii = atoi(in);

if (goodescape && ascii < 256) {
*out++ = ascii;
in = p + 1;
} else {
*out++ = '&';
*out++ = '#';
}
}
} else {
*out++ = *in++;
Expand Down
63 changes: 63 additions & 0 deletions tests/unit-tests/lib/test_parse.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include "gtest/gtest.h"
#include "common_defs.h"
#include "url.h"
#include <string>
#include <ios>

using namespace std;

namespace test_parse {

// The fixture for testing class Foo.

class test_parse : public ::testing::Test {
protected:
// You can remove any or all of the following functions if its body
// is empty.

test_parse() {
// You can do set-up work for each test here.
}

virtual ~test_parse() {
// You can do clean-up work that doesn't throw exceptions here.
}

// If the constructor and destructor are not enough for setting up
// and cleaning up each test, you can define the following methods:

virtual void SetUp() {
// Code here will be called immediately after the constructor (right
// before each test).
}

virtual void TearDown() {
// Code here will be called immediately after each test (right
// before the destructor).
}

// Objects declared here can be used by all tests in the test case for Foo.
};

// Tests that Foo does Xyz.

TEST_F(test_parse, xml_unescape) {
string test = "&lt;&gt;&quot;&apos;&amp;&#xD;&#xd;&#xA;&#xa;&#75;";
string answer = "<>\"\'&\r\r\n\nK";
xml_unescape(test);
EXPECT_EQ(test, answer);

//Note: this is to check that partial values don't pass strncmp for previously bad compares.
test = "&quoYIKES&apoBOO";
answer = "&quoYIKES&apoBOO";
xml_unescape(test);
EXPECT_EQ(test, answer);

//Testing the ascii conversion unknown.
test = "&#9s3;&#694312532&#;eq&#1234;&#75";
answer = "&#9s3;&#694312532&#;eq&#1234;&#75";
xml_unescape(test);
EXPECT_EQ(test, answer);
}

} // namespace

0 comments on commit 4928cc9

Please sign in to comment.