Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[misc] attemt to reduce the number of variables in cc #1253

Merged
merged 4 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions R/class-sheet-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,6 @@ wb_sheet_data <- function() {

# helpers -----------------------------------------------------------------

# Consider making some helpers for the cc stuff.

empty_sheet_data_cc <- function(n) {
create_char_dataframe(
colnames = c("r", "row_r", "c_r", "c_s", "c_t", "c_cm", "c_ph", "c_vm",
"v", "f", "f_attr", "is", "typ"),
n = n
)
}

empty_row_attr <- function(n) {
create_char_dataframe(
colnames = c("collapsed", "customFormat", "customHeight", "x14ac:dyDescent",
Expand Down
2 changes: 1 addition & 1 deletion R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -4042,7 +4042,7 @@ wbWorkbook <- R6::R6Class(

if (as_ref) {
from_sheet_name <- self$get_sheet_names(escape = TRUE)[[from_sheet]]
to_cc[c("c_t", "c_cm", "c_ph", "c_vm", "v", "f", "f_attr", "is")] <- ""
to_cc[names(to_cc) %in% c("c_t", "c_cm", "c_ph", "c_vm", "v", "f", "f_attr", "is")] <- ""
to_cc[c("f")] <- paste0(shQuote(from_sheet_name, type = "sh"), "!", from_dims)
}

Expand Down
4 changes: 2 additions & 2 deletions R/class-worksheet.R
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,10 @@ wbWorksheet <- R6::R6Class(

if (characters)
cc[sel & cc$c_t %in% c("inlineStr", "s", "str"),
c("c_t", "c_ph", "v", "f", "f_attr", "is")] <- ""
names(cc) %in% c("c_t", "c_ph", "v", "f", "f_attr", "is")] <- ""

if (styles)
cc[sel, c("c_s", "c_cm", "c_vm")] <- ""
cc[sel, names(cc) %in% c("c_s", "c_cm", "c_vm")] <- ""

self$sheet_data$cc <- cc

Expand Down
29 changes: 17 additions & 12 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,11 @@ inner_update <- function(
na.strings <- NULL
}

if (removeCellStyle) {
cell_style <- "c_s"
} else {
cell_style <- NULL
replacement <- names(cc)
if (!removeCellStyle) {
replacement <- replacement[-which(replacement == "c_s")]
}

replacement <- c("r", cell_style, "c_t", "c_cm", "c_ph", "c_vm", "v",
"f", "f_attr", "is", "typ")

sel <- match(x$r, cc$r)

# to avoid bricking the worksheet, we make sure that we do not overwrite the
Expand Down Expand Up @@ -148,9 +144,10 @@ inner_update <- function(
initialize_cell <- function(wb, sheet, new_cells) {

sheet_id <- wb$validate_sheet(sheet)
nms <- names(wb$worksheets[[sheet_id]]$sheet_data$cc)

# create artificial cc for the missing cells
x <- empty_sheet_data_cc(n = length(new_cells))
x <- create_char_dataframe(n = length(new_cells), colnames = nms)
x$r <- new_cells
x$row_r <- gsub("[[:upper:]]", "", new_cells)
x$c_r <- gsub("[[:digit:]]", "", new_cells)
Expand Down Expand Up @@ -417,8 +414,15 @@ write_data2 <- function(
rows_attr$r <- rownames(rtyp)

# original cc data frame
cc <- empty_sheet_data_cc(n = nrow(data) * ncol(data))

has_cm <- if (any(dc == openxlsx2_celltype[["cm_formula"]])) "c_cm" else NULL
nms <- c(
"r", "row_r", "c_r", "c_s", "c_t", has_cm,
"v", "f", "f_attr", "is", "typ"
)
cc <- create_char_dataframe(
colnames = nms,
n = nrow(data) * ncol(data)
)

sel <- which(dc == openxlsx2_celltype[["logical"]])
for (i in sel) {
Expand Down Expand Up @@ -524,13 +528,14 @@ write_data2 <- function(

int_si <- max(int_si, -1L) + 1L

cc$f_attr <- sprintf("t=\"%s\"", "shared")
cc[["f_attr"]] <- sprintf("t=\"%s\"", "shared")
cc[1, "f_attr"] <- paste(cc[1, "f_attr"], sprintf("ref=\"%s\"", dims))
cc[["f_attr"]] <- paste(cc[["f_attr"]], sprintf("si=\"%s\"", int_si))
cc[2:nrow(cc), "f"] <- ""
cc$f_attr <- paste(cc$f_attr, sprintf("si=\"%s\"", int_si))
}

if (is.null(wb$worksheets[[sheetno]]$sheet_data$cc)) {
# message("write_cell()")

wb$worksheets[[sheetno]]$dimension <- paste0("<dimension ref=\"", dims, "\"/>")

Expand Down
6 changes: 5 additions & 1 deletion src/helper_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,14 @@ void wide_to_long(

int32_t in_string_nums = string_nums;

bool has_cm = zz.containsElementNamed("c_cm");

// pointer magic. even though these are extracted, they just point to the
// memory in the data frame
Rcpp::CharacterVector zz_c_cm;

Rcpp::CharacterVector zz_row_r = Rcpp::as<Rcpp::CharacterVector>(zz["row_r"]);
Rcpp::CharacterVector zz_c_cm = Rcpp::as<Rcpp::CharacterVector>(zz["c_cm"]);
if (has_cm) zz_c_cm = Rcpp::as<Rcpp::CharacterVector>(zz["c_cm"]);
Rcpp::CharacterVector zz_c_r = Rcpp::as<Rcpp::CharacterVector>(zz["c_r"]);
Rcpp::CharacterVector zz_v = Rcpp::as<Rcpp::CharacterVector>(zz["v"]);
Rcpp::CharacterVector zz_c_t = Rcpp::as<Rcpp::CharacterVector>(zz["c_t"]);
Expand Down
21 changes: 17 additions & 4 deletions src/load_workbook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ inline Rcpp::DataFrame row_to_df(XPtrXML doc) {
void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {
auto ws = doc->child("worksheet").child("sheetData");

bool has_cm = false, has_ph = false, has_vm = false;

// character
Rcpp::DataFrame row_attributes;

Expand Down Expand Up @@ -228,9 +230,18 @@ void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {

if (attr_name == s_str) single_xml_col.c_s = buffer;
if (attr_name == t_str) single_xml_col.c_t = buffer;
if (attr_name == cm_str) single_xml_col.c_cm = buffer;
if (attr_name == ph_str) single_xml_col.c_ph = buffer;
if (attr_name == vm_str) single_xml_col.c_vm = buffer;
if (attr_name == cm_str) {
has_cm = true;
single_xml_col.c_cm = buffer;
}
if (attr_name == ph_str) {
has_ph = true;
single_xml_col.c_ph = buffer;
}
if (attr_name == vm_str) {
has_vm = true;
single_xml_col.c_vm = buffer;
}
}

// some files have no colnames. in this case we need to add c_r and row_r
Expand Down Expand Up @@ -283,6 +294,8 @@ void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {
++itr_rows;
}

// Rcpp::Rcout << has_cm << ": " << has_ph << ": " << has_vm << std::endl;

sheet_data["row_attr"] = row_attributes;
sheet_data["cc"] = Rcpp::wrap(xml_cols);
sheet_data["cc"] = xml_cols_to_df(xml_cols, has_cm, has_ph, has_vm);
}
181 changes: 181 additions & 0 deletions src/openxlsx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,184 @@ static inline bool validate_dims(const std::string& input) {

return has_col && has_row;
}

inline SEXP xml_cols_to_df(const std::vector<xml_col>& x, bool has_cm, bool has_ph, bool has_vm) {
R_xlen_t n = static_cast<R_xlen_t>(x.size());

// Vector structure identical to xml_col from openxlsx2_types.h
Rcpp::CharacterVector r(Rcpp::no_init(n)); // cell name: A1, A2 ...
Rcpp::CharacterVector row_r(Rcpp::no_init(n)); // row name: 1, 2, ..., 9999

Rcpp::CharacterVector c_r(Rcpp::no_init(n)); // col name: A, B, ..., ZZ
Rcpp::CharacterVector c_s(Rcpp::no_init(n)); // cell style
Rcpp::CharacterVector c_t(Rcpp::no_init(n)); // cell type
Rcpp::CharacterVector c_cm, c_ph, c_vm;
if (has_cm) c_cm = Rcpp::CharacterVector(Rcpp::no_init(n));
if (has_ph) c_ph = Rcpp::CharacterVector(Rcpp::no_init(n));
if (has_vm) c_vm = Rcpp::CharacterVector(Rcpp::no_init(n));

Rcpp::CharacterVector v(Rcpp::no_init(n)); // <v> tag
Rcpp::CharacterVector f(Rcpp::no_init(n)); // <f> tag
Rcpp::CharacterVector f_attr(Rcpp::no_init(n)); // <f /> attributes
Rcpp::CharacterVector is(Rcpp::no_init(n)); // <is> tag

// struct to vector
// We have to convert utf8 inputs via Rcpp::String for non unicode R sessions
// Ideally there would be a function that calls Rcpp::String only if needed
for (R_xlen_t i = 0; i < n; ++i) {
size_t ii = static_cast<size_t>(i);
if (!x[ii].r.empty()) r[i] = std::string(x[ii].r);
if (!x[ii].row_r.empty()) row_r[i] = std::string(x[ii].row_r);
if (!x[ii].c_r.empty()) c_r[i] = std::string(x[ii].c_r);
if (!x[ii].c_s.empty()) c_s[i] = std::string(x[ii].c_s);
if (!x[ii].c_t.empty()) c_t[i] = std::string(x[ii].c_t);
if (has_cm && !x[ii].c_cm.empty()) c_cm[i] = std::string(x[ii].c_cm);
if (has_ph && !x[ii].c_ph.empty()) c_ph[i] = Rcpp::String(x[ii].c_ph);
if (has_vm && !x[ii].c_vm.empty()) c_vm[i] = std::string(x[ii].c_vm);
if (!x[ii].v.empty()) { // can only be utf8 if c_t = "str"
if (x[ii].c_t.empty() && x[ii].f_attr.empty())
v[i] = std::string(x[ii].v);
else
v[i] = Rcpp::String(x[ii].v);
}
if (!x[ii].f.empty()) f[i] = Rcpp::String(x[ii].f);
if (!x[ii].f_attr.empty()) f_attr[i] = std::string(x[ii].f_attr);
if (!x[ii].is.empty()) is[i] = Rcpp::String(x[ii].is);
}

// Assign and return a dataframe
if (has_cm && has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (has_cm && has_ph && !has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (has_cm && !has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (!has_cm && has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (has_cm && !has_ph && !has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (!has_cm && has_ph && !has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else if (!has_cm && !has_ph && has_vm) {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
} else {
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
}

}
Loading
Loading