Skip to content

Commit

Permalink
Use xmlSaveToBuffer for as.character
Browse files Browse the repository at this point in the history
Unifies the as.character interface with xml_write methods
  • Loading branch information
jimhester committed Jan 5, 2017
1 parent d95ba1f commit 1a13387
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 141 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

* `xml_new_document()` now explicitly sets the encoding (default UTF-8) (#142)

* Document `write_xml(format = TRUE)` (#132)
* Document formatting options for `write_xml()` (#132)

* Add missing methods for xml_missing objects. (#134)

Expand Down
28 changes: 10 additions & 18 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,40 +177,32 @@ node_new_dtd <- function(doc, name = "", eid = "", sid = "") {
invisible(.Call('xml2_node_new_dtd', PACKAGE = 'xml2', doc, name, eid, sid))
}

doc_format_xml <- function(x, format = TRUE) {
.Call('xml2_doc_format_xml', PACKAGE = 'xml2', x, format)
}

doc_format_html <- function(x, format = TRUE) {
.Call('xml2_doc_format_html', PACKAGE = 'xml2', x, format)
}

xml_save_options <- function() {
.Call('xml2_xml_save_options', PACKAGE = 'xml2')
}

doc_write <- function(x, path, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_doc_write', PACKAGE = 'xml2', x, path, encoding, options))
doc_write_file <- function(x, path, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_doc_write_file', PACKAGE = 'xml2', x, path, encoding, options))
}

doc_write_connection <- function(x, connection, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_doc_write_connection', PACKAGE = 'xml2', x, connection, encoding, options))
}

node_write <- function(x, path, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_node_write', PACKAGE = 'xml2', x, path, encoding, options))
doc_write_character <- function(x, encoding = "UTF-8", options = 1L) {
.Call('xml2_doc_write_character', PACKAGE = 'xml2', x, encoding, options)
}

node_write_connection <- function(x, connection, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_node_write_connection', PACKAGE = 'xml2', x, connection, encoding, options))
node_write_file <- function(x, path, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_node_write_file', PACKAGE = 'xml2', x, path, encoding, options))
}

node_format_xml <- function(doc, node, format = TRUE, indent = 0L) {
.Call('xml2_node_format_xml', PACKAGE = 'xml2', doc, node, format, indent)
node_write_connection <- function(x, connection, encoding = "UTF-8", options = 1L) {
invisible(.Call('xml2_node_write_connection', PACKAGE = 'xml2', x, connection, encoding, options))
}

node_format_html <- function(doc, node, format = TRUE) {
.Call('xml2_node_format_html', PACKAGE = 'xml2', doc, node, format)
node_write_character <- function(x, encoding = "UTF-8", options = 1L) {
.Call('xml2_node_write_character', PACKAGE = 'xml2', x, encoding, options)
}

doc_validate <- function(doc, schema) {
Expand Down
15 changes: 6 additions & 9 deletions R/classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ xml_node <- function(node = NULL, doc = NULL) {
}

#' @export
as.character.xml_node <- function(x, ..., format = TRUE, indent = 0) {
node_format_xml(x$doc, x$node, format = format, indent = indent)
as.character.xml_node <- function(x, ..., options = "format", encoding = "UTF-8") {
options <- parse_options(options, xml_save_options())
node_write_character(x$node, options = options, encoding = encoding)
}

#' @export
Expand Down Expand Up @@ -53,13 +54,9 @@ print.xml_document <- function(x, width = getOption("width"), max_n = 20, ...) {
}

#' @export
as.character.xml_document <- function(x, ..., format = TRUE, type = c("xml", "html")) {
type <- match.arg(type)
if (type == "xml") {
doc_format_xml(x$doc, format = format)
} else {
doc_format_html(x$doc, format = format)
}
as.character.xml_document <- function(x, ..., options = "format", encoding = "UTF-8") {
options <- parse_options(options, xml_save_options())
doc_write_character(x$doc, options = options, encoding = encoding)
}

# nodeset ----------------------------------------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions R/xml_write.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ write_xml.xml_document <- function(x, file, ..., options = "format", encoding =
if (!(is.character(file) && length(file) == 1 && nzchar(file))) {
stop("`file` must be a non-zero character of length 1", call. = FALSE)
}
doc_write(x$doc, file, options = options, encoding = encoding)
doc_write_file(x$doc, file, options = options, encoding = encoding)
}
}

Expand All @@ -72,12 +72,12 @@ write_xml.xml_nodeset <- function(x, file, ..., options = "format", encoding = "
if (!(is.character(file) && length(file) == 1 && nzchar(file))) {
stop("`file` must be a non-zero character of length 1", call. = FALSE)
}
node_write(x[[1]]$node, file, options = options, encoding = encoding)
node_write_file(x[[1]]$node, file, options = options, encoding = encoding)
}
}

#' @export
write_xml.xml_node <- function(x, file, format = TRUE, ..., options = "format", encoding = "UTF-8") {
write_xml.xml_node <- function(x, file, ..., options = "format", encoding = "UTF-8") {
options <- parse_options(options, xml_save_options())

file <- path_to_connection(file, check = "dir")
Expand All @@ -91,7 +91,7 @@ write_xml.xml_node <- function(x, file, format = TRUE, ..., options = "format",
if (!(is.character(file) && length(file) == 1 && nzchar(file))) {
stop("`file` must be a non-zero character of length 1", call. = FALSE)
}
node_write(x$node, file, options = options, encoding = encoding)
node_write_file(x$node, file, options = options, encoding = encoding)
}
}

Expand Down
81 changes: 28 additions & 53 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,30 +521,6 @@ BEGIN_RCPP
return R_NilValue;
END_RCPP
}
// doc_format_xml
CharacterVector doc_format_xml(XPtrDoc x, bool format);
RcppExport SEXP xml2_doc_format_xml(SEXP xSEXP, SEXP formatSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
Rcpp::traits::input_parameter< bool >::type format(formatSEXP);
rcpp_result_gen = Rcpp::wrap(doc_format_xml(x, format));
return rcpp_result_gen;
END_RCPP
}
// doc_format_html
CharacterVector doc_format_html(XPtrDoc x, bool format);
RcppExport SEXP xml2_doc_format_html(SEXP xSEXP, SEXP formatSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
Rcpp::traits::input_parameter< bool >::type format(formatSEXP);
rcpp_result_gen = Rcpp::wrap(doc_format_html(x, format));
return rcpp_result_gen;
END_RCPP
}
// xml_save_options
Rcpp::IntegerVector xml_save_options();
RcppExport SEXP xml2_xml_save_options() {
Expand All @@ -555,16 +531,16 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// doc_write
void doc_write(XPtrDoc x, std::string path, std::string encoding, int options);
RcppExport SEXP xml2_doc_write(SEXP xSEXP, SEXP pathSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
// doc_write_file
void doc_write_file(XPtrDoc x, std::string path, std::string encoding, int options);
RcppExport SEXP xml2_doc_write_file(SEXP xSEXP, SEXP pathSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
Rcpp::traits::input_parameter< std::string >::type path(pathSEXP);
Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
doc_write(x, path, encoding, options);
doc_write_file(x, path, encoding, options);
return R_NilValue;
END_RCPP
}
Expand All @@ -581,16 +557,29 @@ BEGIN_RCPP
return R_NilValue;
END_RCPP
}
// node_write
void node_write(XPtrNode x, std::string path, std::string encoding, int options);
RcppExport SEXP xml2_node_write(SEXP xSEXP, SEXP pathSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
// doc_write_character
CharacterVector doc_write_character(XPtrDoc x, std::string encoding, int options);
RcppExport SEXP xml2_doc_write_character(SEXP xSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
rcpp_result_gen = Rcpp::wrap(doc_write_character(x, encoding, options));
return rcpp_result_gen;
END_RCPP
}
// node_write_file
void node_write_file(XPtrNode x, std::string path, std::string encoding, int options);
RcppExport SEXP xml2_node_write_file(SEXP xSEXP, SEXP pathSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrNode >::type x(xSEXP);
Rcpp::traits::input_parameter< std::string >::type path(pathSEXP);
Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
node_write(x, path, encoding, options);
node_write_file(x, path, encoding, options);
return R_NilValue;
END_RCPP
}
Expand All @@ -607,30 +596,16 @@ BEGIN_RCPP
return R_NilValue;
END_RCPP
}
// node_format_xml
CharacterVector node_format_xml(XPtrDoc doc, XPtrNode node, bool format, int indent);
RcppExport SEXP xml2_node_format_xml(SEXP docSEXP, SEXP nodeSEXP, SEXP formatSEXP, SEXP indentSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
Rcpp::traits::input_parameter< bool >::type format(formatSEXP);
Rcpp::traits::input_parameter< int >::type indent(indentSEXP);
rcpp_result_gen = Rcpp::wrap(node_format_xml(doc, node, format, indent));
return rcpp_result_gen;
END_RCPP
}
// node_format_html
CharacterVector node_format_html(XPtrDoc doc, XPtrNode node, bool format);
RcppExport SEXP xml2_node_format_html(SEXP docSEXP, SEXP nodeSEXP, SEXP formatSEXP) {
// node_write_character
CharacterVector node_write_character(XPtrNode x, std::string encoding, int options);
RcppExport SEXP xml2_node_write_character(SEXP xSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
Rcpp::traits::input_parameter< bool >::type format(formatSEXP);
rcpp_result_gen = Rcpp::wrap(node_format_html(doc, node, format));
Rcpp::traits::input_parameter< XPtrNode >::type x(xSEXP);
Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
rcpp_result_gen = Rcpp::wrap(node_write_character(x, encoding, options));
return rcpp_result_gen;
END_RCPP
}
Expand Down
66 changes: 27 additions & 39 deletions src/xml2_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,6 @@ Rconnection get_connection(SEXP con) {
#include "xml2_types.h"
#include "xml2_utils.h"

// [[Rcpp::export]]
CharacterVector doc_format_xml(XPtrDoc x, bool format = true) {
xmlChar *s;
int size;
xmlDocDumpFormatMemory(x.checked_get(), &s, &size, format);

return Xml2String(s).asRString();
}

// [[Rcpp::export]]
CharacterVector doc_format_html(XPtrDoc x, bool format = true) {
xmlChar *s;
int size;
htmlDocDumpMemoryFormat(x.checked_get(), &s, &size, format);

return Xml2String(s).asRString();
}

// [[Rcpp::export]]
Rcpp::IntegerVector xml_save_options() {
Rcpp::IntegerVector out = Rcpp::IntegerVector::create(
Expand Down Expand Up @@ -83,7 +65,7 @@ int xml_write_callback(Rconnection con, const char * buffer, int len) {
}

// [[Rcpp::export]]
void doc_write(XPtrDoc x, std::string path, std::string encoding = "UTF-8", int options = 1) {
void doc_write_file(XPtrDoc x, std::string path, std::string encoding = "UTF-8", int options = 1) {
xmlSaveCtxtPtr savectx = xmlSaveToFilename(
path.c_str(),
encoding.c_str(),
Expand Down Expand Up @@ -113,7 +95,23 @@ void doc_write_connection(XPtrDoc x, SEXP connection, std::string encoding = "UT
}

// [[Rcpp::export]]
void node_write(XPtrNode x, std::string path, std::string encoding = "UTF-8", int options = 1) {
CharacterVector doc_write_character(XPtrDoc x, std::string encoding = "UTF-8", int options = 1) {
boost::shared_ptr<xmlBuffer> buffer(xmlBufferCreate(), xmlFree);

xmlSaveCtxtPtr savectx = xmlSaveToBuffer(
buffer.get(),
encoding.c_str(),
options);

xmlSaveDoc(savectx, x.checked_get());
if (xmlSaveClose(savectx) == -1) {
stop("Error writing to buffer");
}
return Xml2String(buffer->content).asRString();
}

// [[Rcpp::export]]
void node_write_file(XPtrNode x, std::string path, std::string encoding = "UTF-8", int options = 1) {
xmlSaveCtxtPtr savectx = xmlSaveToFilename(
path.c_str(),
encoding.c_str(),
Expand Down Expand Up @@ -143,27 +141,17 @@ void node_write_connection(XPtrNode x, SEXP connection, std::string encoding = "
}

// [[Rcpp::export]]
CharacterVector node_format_xml(XPtrDoc doc, XPtrNode node,
bool format = true,
int indent = 0) {
CharacterVector node_write_character(XPtrNode x, std::string encoding = "UTF-8", int options = 1) {
boost::shared_ptr<xmlBuffer> buffer(xmlBufferCreate(), xmlFree);
xmlNodeDump(buffer.get(), doc.checked_get(), node.checked_get(), indent, format);

return Xml2String(buffer->content).asRString();
}
// [[Rcpp::export]]
CharacterVector node_format_html(XPtrDoc doc, XPtrNode node,
bool format = true) {
boost::shared_ptr<xmlBuffer> buffer(xmlBufferCreate(), xmlBufferFree);
xmlOutputBuffer *outputBuffer = xmlOutputBufferCreateBuffer( buffer.get(), NULL );

htmlNodeDumpFormatOutput(
outputBuffer,
doc.checked_get(),
node.checked_get(),
NULL,
format ? 1 : 0
);
xmlSaveCtxtPtr savectx = xmlSaveToBuffer(
buffer.get(),
encoding.c_str(),
options);

xmlSaveTree(savectx, x.checked_get());
if (xmlSaveClose(savectx) == -1) {
stop("Error writing to buffer");
}
return Xml2String(buffer->content).asRString();
}
4 changes: 2 additions & 2 deletions tests/testthat/output/print-xml_document.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{xml_document}
<html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
[1] <head>\n <script type="text/javascript"><![CDATA[var ue_t0=window.ue_t0| ...
[2] <body id="styleguide-v2" class="fixed">\n<script><![CDATA[\n if (typeo ...
[1] <head>\n<script type="text/javascript">var ue_t0=window.ue_t0||+new Date( ...
[2] <body id="styleguide-v2" class="fixed">\n<script>\n if (typeof uet == ...
20 changes: 10 additions & 10 deletions tests/testthat/output/print-xml_node.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
{xml_node}
<body id="styleguide-v2" class="fixed">
[1] <script><![CDATA[\n if (typeof uet == 'function') {\n uet("bb"); ...
[2] <script><![CDATA[\n if ('csm' in window) {\n csm.measure('csm_bo ...
[1] <script>\n if (typeof uet == 'function') {\n uet("bb");\n }\n ...
[2] <script>\n if ('csm' in window) {\n csm.measure('csm_body_delive ...
[3] <div id="wrapper">\n <div id="root" class="redesign">\n<scrip ...
[4] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ...
[5] <script type="text/imdblogin-js" id="login"><![CDATA[\njQuery(document). ...
[6] <script type="text/javascript"><![CDATA[\n jQuery(\n ...
[5] <script type="text/imdblogin-js" id="login">\njQuery(document).ready(fun ...
[6] <script type="text/javascript">\n jQuery(\n ...
[7] <iframe id="sis_pixel_sitewide" width="1" height="1" frameborder="0" mar ...
[8] <script><![CDATA[\n setTimeout(function(){\n try{\n ...
[8] <script>\n setTimeout(function(){\n try{\n //sis3.0 ...
[9] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ...
[10] <script type="text/javascript"><![CDATA[\nif(window.COMSCORE){\nCOMSCORE ...
[10] <script type="text/javascript">\nif(window.COMSCORE){\nCOMSCORE.beacon({ ...
[11] <noscript>\n<img src="http://b.scorecardresearch.com/p?c1=2&amp;c2=60349 ...
[12] <script><![CDATA[\n doWithAds(function(){\n (new Image()).src ...
[13] <script><![CDATA[\n(function(){\n var readyTimeout = setInterval(func ...
[14] <div id="servertime" time="235"/>
[15] <script><![CDATA[\n if (typeof uet == 'function') {\n uet("be"); ...
[12] <script>\n doWithAds(function(){\n (new Image()).src = "http:/ ...
[13] <script>\n(function(){\n var readyTimeout = setInterval(function(){\n ...
[14] <div id="servertime" time="235"></div>
[15] <script>\n if (typeof uet == 'function') {\n uet("be");\n }\n ...
8 changes: 4 additions & 4 deletions tests/testthat/output/print-xml_nodeset.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{xml_nodeset (10)}
[1] <div id="wrapper">\n <div id="root" class="redesign">\n<scrip ...
[2] <div id="root" class="redesign">\n<script><![CDATA[\n if (typeof uet ...
[2] <div id="root" class="redesign">\n<script>\n if (typeof uet == 'funct ...
[3] <div id="nb20" class="navbarSprite">\n<div id="supertab">\t\n\t<!-- begi ...
[4] <div id="supertab">\t\n\t<!-- begin TOP_AD -->\n<div id="top_ad_wrapper" ...
[5] <div id="top_ad_wrapper" class="dfp_slot">\n<script type="text/javascrip ...
[6] <div id="top_ad_reflow_helper"/>
[6] <div id="top_ad_reflow_helper"></div>
[7] <div id="navbar" class="navbarSprite">\n<noscript>\n <link rel="stylesh ...
[8] <div id="nb_search">\n <noscript><div id="more_if_no_javascript"><a h ...
[9] <div id="more_if_no_javascript">\n <a href="/search/">More</a>\n</div>
[10] <div class="magnifyingglass navbarSprite"/>
[9] <div id="more_if_no_javascript"><a href="/search/">More</a></div>
[10] <div class="magnifyingglass navbarSprite"></div>
Loading

0 comments on commit 1a13387

Please sign in to comment.