diff --git a/DESCRIPTION b/DESCRIPTION
index 3f1e936..03bbcb1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -24,8 +24,12 @@ Depends:
Imports:
data.table (>= 1.12.8),
flashpcaR (>= 2.1),
+ fs (>= 1.4.1),
+ ggforce (>= 0.3.2),
ggplot2 (>= 3.3.2),
- gt (>= 0.2.1)
+ ggtext (>= 0.1.0),
+ gt (>= 0.2.1),
+ patchwork (>= 1.0.1)
Suggests:
roxygen2 (>= 7.1.0)
Remotes:
diff --git a/NAMESPACE b/NAMESPACE
index beda532..709f9a5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,3 +6,4 @@ export(pca_report)
import(data.table)
import(ggplot2)
import(gt)
+import(patchwork)
diff --git a/NEWS.md b/NEWS.md
index 066ebc5..abc2327 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,9 +1,13 @@
# rain (development version)
-## Breaking CHanges
+## Breaking Changes
* In `R/pca_report.R`,
- + Code refactoring.
+ + Code refactoring using `data.table`.
+* In `R/compute_pca.R`,
+ + Code refactoring using `data.table`.
+ + Tweak ethnicity figure.
+ + Add an ethnicity figure focussing on the studied cohort.
# rain 0.4.1
diff --git a/R/check_bin.R b/R/check_bin.R
index 76483d5..5824ba8 100644
--- a/R/check_bin.R
+++ b/R/check_bin.R
@@ -6,18 +6,17 @@
check_bin <- function(bin_path) {
if (!all(sapply(bin_path, file.exists))) {
message_prefix <- "[rain] "
+ text <- paste0(
+ names(bin_path[!sapply(bin_path, file.exists)]),
+ ' ("', bin_path[!sapply(bin_path, file.exists)], '")'
+ )
stop(
message_prefix,
paste0(
"No binary found for the following tools: ",
- glue::glue_collapse(
- x = paste0(
- names(bin_path[!sapply(bin_path, file.exists)]),
- ' ("', bin_path[!sapply(bin_path, file.exists)], '")'
- ),
- sep = ", ",
- last = " and "
- ),
+ paste(text[-length(text)], collapse = ", "),
+ " and ",
+ text[length(text)],
"!"
)
)
diff --git a/R/compute_pca.R b/R/compute_pca.R
index bfafe39..912503f 100644
--- a/R/compute_pca.R
+++ b/R/compute_pca.R
@@ -5,170 +5,254 @@
#'
#' @return A `data.frame`.
#'
+#' @import data.table
+#' @import ggplot2
+#' @import gt
+#' @import patchwork
+#'
#' @export
-compute_pca <- function(cohort_name, input_plink, output_directory, ref1kg_population) {
+compute_pca <- function(cohort_name, input_plink, output_directory, ref1kg_population, n_comp = 10) {
message_prefix <- "[rain] "
- ######################
- ### Performing PCA ###
- ######################
- message(message_prefix, "Performing PCA ...")
+ message(message_prefix, "Performing PCA ...", appendLF = TRUE)
- fid_iid <- readr::read_delim(
+ fid_iid <- data.table::fread(
file = paste0(input_plink, ".fam"),
- delim = " ",
- col_types = readr::cols_only(
- X1 = readr::col_character(),
- X2 = readr::col_character()
- ),
- col_names = FALSE
+ colClasses = c("V1" = "character", "V2" = "character"),
+ header = FALSE
)
- pca_ndim <- 10
+ pca_res <- flashpcaR::flashpca(input_plink, ndim = n_comp)
+
+ pca_contrib <- sprintf(
+ fmt = "PC%02d (%s %%)",
+ 1:n_comp,
+ format(pca_res$values / sum(pca_res$values) * 100, digits = 2, nsmall = 2, trim = TRUE)
+ )
+ names(pca_contrib) <- sprintf("PC%02d", 1:n_comp)
- pca_res <- flashpcaR::flashpca(input_plink, ndim = pca_ndim)
- pca_dfxy <- data.table::as.data.table(pca_res[["projection"]])
+ pca_dfxy <- data.table::as.data.table(pca_res[["projection"]], keep.rownames = "iid")
data.table::setnames(
x = pca_dfxy,
- old = setdiff(names(pca_dfxy), id_var),
- new = sprintf("PC%02d", as.numeric(gsub("V", "", setdiff(names(pca_dfxy), id_var))))
+ old = setdiff(names(pca_dfxy), "iid"),
+ new = sprintf("PC%02d", as.numeric(gsub("V", "", setdiff(names(pca_dfxy), "iid"))))
)
if (all.equal(fid_iid[[1]], fid_iid[[2]])) {
- pca_dfxy[["sample"]] <- fid_iid[[2]]
+ pca_dfxy[, "iid" := fid_iid[[2]]]
} else {
- pca_dfxy[["sample"]] <- paste(fid_iid[[1]], fid_iid[[2]], sep = "/")
+ pca_dfxy[, "iid" := paste(fid_iid[[1]], fid_iid[[2]], sep = "/")]
}
- pca_dfxy <- pca_dfxy %>%
- dplyr::left_join(
- y = suppressWarnings(
- readr::read_tsv(
- file = ref1kg_population,
- col_types = readr::cols_only(
- sample = readr::col_character(),
- pop = readr::col_character(),
- super_pop = readr::col_character()
- )
- )
- ),
- by = "sample"
- ) %>%
- dplyr::mutate(
- cohort = ifelse(is.na(.data[["pop"]]), cohort_name, "1,000 Genomes"),
- pop = ifelse(is.na(.data[["pop"]]), cohort_name, .data[["pop"]]),
- super_pop = ifelse(is.na(.data[["super_pop"]]), cohort_name, .data[["super_pop"]])
- ) %>%
- dplyr::mutate_at(
- .vars = dplyr::vars(.data[["cohort"]], .data[["pop"]], .data[["super_pop"]]),
- .funs = ~ stats::relevel(factor(.x), ref = cohort_name)
- ) %>%
- dplyr::select("sample", dplyr::everything())
-
- pop_centre <- purrr::map_df(c("super_pop", "pop") , function(ipop) {
- pca_dfxy %>%
- dplyr::filter(.data[["cohort"]] != !!cohort_name) %>%
- dplyr::select(-"cohort") %>%
- dplyr::group_by(.data[[ipop]]) %>%
- dplyr::summarise(
- PC01_centre = mean(.data[["PC01"]]),
- PC02_centre = mean(.data[["PC02"]])
- ) %>%
- dplyr::mutate(pop_type = ipop) %>%
- dplyr::rename("pop_closest" = !!ipop) %>%
- dplyr::mutate_if(.predicate = is.factor, .funs = as.character)
- })
+ ref_pop_table <- data.table::fread(
+ file = ref1kg_population,
+ colClasses = "character",
+ header = FALSE,
+ col.names = c("iid", "pop", "super_pop", "sex")
+ )
- pca_gg_pred_best <- pca_dfxy %>%
- dplyr::select(-c(.data[["pop"]], .data[["super_pop"]])) %>%
- dplyr::filter(.data[["cohort"]] == !!cohort_name) %>%
- dplyr::mutate(pop_centre = list(pop_centre)) %>%
- tidyr::unnest("pop_centre") %>%
- dplyr::group_by(.data[["sample"]], .data[["pop_type"]]) %>%
- dplyr::mutate(
- pop_dist = purrr::pmap_dbl(
- .l = list(
- .x = .data[["PC01"]], .y = .data[["PC02"]],
- .x_centre = .data[["PC01_centre"]], .y_centre = .data[["PC02_centre"]]
- ),
- .f = function(.x, .y, .x_centre, .y_centre) {
- sqrt((.x - .x_centre)^2 + (.y - .y_centre)^2)
- }
+ pca_dfxy <- merge(
+ x = pca_dfxy,
+ y = ref_pop_table[, "cohort" := "1,000 Genomes"],
+ by = "iid",
+ all = TRUE
+ )
+
+ cohort <- NULL
+ pca_dfxy[!cohort %in% "1,000 Genomes", (c("pop", "super_pop", "cohort")) := list(cohort_name, cohort_name, cohort_name)]
+ pca_dfxy[,
+ (c("pop", "super_pop", "cohort")) := lapply(
+ X = .SD,
+ FUN = function(x) factor(x, levels = c(cohort_name, sort(setdiff(x, cohort_name))))
+ ),
+ .SDcols = c("pop", "super_pop", "cohort")
+ ]
+
+ pop_centre <- pca_dfxy[
+ cohort %in% "1,000 Genomes",
+ lapply(.SD, mean),
+ .SDcols = sprintf("PC%02d", 1:2),
+ by = "pop"
+ ]
+
+ PC01 <- PC02 <- dist <- which_closest <- pop <- super_pop <- NULL
+ pca_dfxy[,
+ "pop_closest" := (function(.x, .y) {
+ as.character(
+ pop_centre[,
+ list("dist" = sqrt((.x - PC01)^2 + (.y - PC02)^2)),
+ by = "pop"
+ ][,
+ "which_closest" := dist == min(dist)
+ ][
+ (which_closest)
+ ][["pop"]]
)
- ) %>%
- dplyr::slice(which.min(.data[["pop_dist"]])) %>%
- dplyr::ungroup() %>%
- dplyr::select(-dplyr::ends_with("_centre"), -.data[["pop_dist"]]) %>%
- tidyr::pivot_wider(names_from = "pop_type", values_from = "pop_closest")
-
- p <- purrr::map(.x = c("pop" = "pop", "super_pop" = "super_pop"), .f = function(ipop) {
- ggplot2::ggplot(
- data = pca_dfxy,
- mapping = ggplot2::aes(x = .data[["PC01"]], y = .data[["PC02"]], colour = .data[[ipop]])
- ) +
- ggplot2::theme_light(base_size = 12) +
- ggplot2::geom_hline(yintercept = 0, linetype = 1, size = 0.5, na.rm = TRUE) +
- ggplot2::geom_vline(xintercept = 0, linetype = 1, size = 0.5, na.rm = TRUE) +
- ggforce::geom_mark_ellipse(mapping = ggplot2::aes(fill = .data[[ipop]]), con.cap = 0, alpha = 0.1) +
- ggplot2::geom_point(mapping = ggplot2::aes(shape = .data[[ipop]]), na.rm = TRUE) +
- ggplot2::scale_colour_viridis_d(na.translate = FALSE, drop = FALSE, end = 0.9) +
- ggplot2::scale_fill_viridis_d(na.translate = FALSE, drop = FALSE, end = 0.9) +
- ggplot2::scale_shape_manual(values = c(3, rep(1, length(unique(pca_dfxy[[ipop]]))))) +
- ggplot2::labs(
- shape = c("super_pop" = "Super Population", "pop" = "Population")[ipop],
- colour = c("super_pop" = "Super Population", "pop" = "Population")[ipop],
- fill = c("super_pop" = "Super Population", "pop" = "Population")[ipop]
+ })(PC01, PC02),
+ by = "iid"
+ ]
+
+ pca_dfxy <- merge(
+ x = pca_dfxy,
+ y = unique(ref_pop_table[, list("pop_closest" = pop, "super_pop_closest" = super_pop)]),
+ by = "pop_closest",
+ all.x = TRUE
+ )
+
+ p <- lapply(
+ X = c("pop" = "pop", "super_pop" = "super_pop"),
+ FUN = function(ipop) {
+ ggplot2::ggplot(
+ data = pca_dfxy,
+ mapping = ggplot2::aes(x = .data[["PC01"]], y = .data[["PC02"]], colour = .data[[ipop]])
) +
- ggplot2::facet_grid(cols = ggplot2::vars(.data[["cohort"]])) +
- ggplot2::guides(
- colour = ggplot2::guide_legend(
- ncol = 2,
- label.theme = ggplot2::element_text(size = 8),
- keyheight = ggplot2::unit(9, "point")
+ ggplot2::theme_light(base_size = 11) +
+ ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 0.5, na.rm = TRUE) +
+ ggplot2::geom_vline(xintercept = 0, linetype = 2, size = 0.5, na.rm = TRUE) +
+ ggforce::geom_mark_ellipse(mapping = ggplot2::aes(fill = .data[[ipop]]), con.cap = 0, alpha = 0.1) +
+ ggplot2::geom_point(mapping = ggplot2::aes(shape = .data[[ipop]]), na.rm = TRUE) +
+ ggplot2::scale_colour_viridis_d(na.translate = FALSE, drop = FALSE, end = 0.9) +
+ ggplot2::scale_fill_viridis_d(na.translate = FALSE, drop = FALSE, end = 0.9) +
+ ggplot2::scale_shape_manual(values = c(3, rep(1, length(unique(pca_dfxy[[ipop]]))))) +
+ ggplot2::labs(
+ x = pca_contrib["PC01"],
+ y = pca_contrib["PC02"],
+ shape = c("super_pop" = "Super Population", "pop" = "Population")[ipop],
+ colour = c("super_pop" = "Super Population", "pop" = "Population")[ipop],
+ fill = c("super_pop" = "Super Population", "pop" = "Population")[ipop]
+ ) +
+ ggplot2::facet_grid(cols = ggplot2::vars(.data[["cohort"]])) +
+ ggplot2::guides(
+ colour = ggplot2::guide_legend(
+ ncol = 2,
+ label.theme = ggplot2::element_text(size = 8),
+ keyheight = ggplot2::unit(9, "point")
+ )
)
- )
})
- p_zoom <- purrr::map(p, ~ .x + ggplot2::coord_cartesian(
- xlim = range(dplyr::filter(pca_dfxy, .data[["cohort"]] == !!cohort_name)[["PC01"]]),
- ylim = range(dplyr::filter(pca_dfxy, .data[["cohort"]] == !!cohort_name)[["PC02"]])
- ))
+ p_zoom <- lapply(
+ X = p,
+ FUN = function(.p) {
+ .p +
+ ggplot2::coord_cartesian(
+ xlim = range(pca_dfxy[!cohort %in% "1,000 Genomes"][["PC01"]]),
+ ylim = range(pca_dfxy[!cohort %in% "1,000 Genomes"][["PC02"]])
+ )
+ }
+ )
+
+ p_subtitle <- paste0(
+ "Principal Component Analysis using ",
+ format(nrow(data.table::fread(paste0(input_plink, ".bim"))), big.mark = ",", digits = 0),
+ " SNPs
",
+ "With A) population level and B) super population level"
+ )
p_ethni <- patchwork::wrap_plots(
- patchwork::wrap_plots(p[["super_pop"]], p_zoom[["super_pop"]]) +
- patchwork::plot_layout(tag_level = "new", guides = "collect"),
patchwork::wrap_plots(p[["pop"]], p_zoom[["pop"]]) +
+ patchwork::plot_layout(tag_level = "new", guides = "collect"),
+ patchwork::wrap_plots(p[["super_pop"]], p_zoom[["super_pop"]]) +
patchwork::plot_layout(tag_level = "new", guides = "collect")
) +
patchwork::plot_layout(nrow = 2, ncol = 1) +
patchwork::plot_annotation(
tag_levels = c("A", "1"),
- title = "Estimated Ethnicity",
- subtitle = paste(
- "Based on Principal Component Analysis using",
- scales::comma(nrow(data.table::fwrite(paste0(input_plink, ".bim")))),
- "SNPs."
+ title = "Ethnicty Inference Based On 1,000 Genomes Project Data",
+ subtitle = gsub("
W", " w", p_subtitle),
+ theme = ggplot2::theme(plot.title.position = "plot", plot.subtitle = ggtext::element_markdown())
+ )
+
+ p_cohort_base <- ggplot2::ggplot(
+ data = pca_dfxy[!cohort %in% "1,000 Genomes"],
+ mapping = ggplot2::aes(x = .data[["PC01"]], y = .data[["PC02"]])
+ ) +
+ ggplot2::theme_light(base_size = 11) +
+ ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 0.5, na.rm = TRUE) +
+ ggplot2::geom_vline(xintercept = 0, linetype = 2, size = 0.5, na.rm = TRUE) +
+ ggplot2::scale_colour_viridis_d(na.translate = FALSE, drop = FALSE, begin = 0.10, end = 0.90) +
+ ggplot2::scale_fill_viridis_d(na.translate = FALSE, drop = FALSE, begin = 0.10, end = 0.90) +
+ ggplot2::guides(
+ colour = ggplot2::guide_legend(
+ ncol = ifelse(nrow(unique(pca_dfxy[!cohort %in% "1,000 Genomes", "pop_closest"])) > 5, 2, 1)
)
+ ) +
+ ggplot2::theme(plot.title.position = "plot") +
+ ggplot2::coord_cartesian(
+ xlim = range(pca_dfxy[!cohort %in% "1,000 Genomes"][["PC01"]]),
+ ylim = range(pca_dfxy[!cohort %in% "1,000 Genomes"][["PC02"]])
)
+ p_cohort <- patchwork::wrap_plots(
+ p_cohort_base +
+ ggplot2::geom_point(mapping = ggplot2::aes(colour = .data[["pop_closest"]]), shape = 1, na.rm = TRUE) +
+ ggforce::geom_mark_ellipse(
+ mapping = ggplot2::aes(
+ colour = .data[["pop_closest"]],
+ group = .data[["pop_closest"]]
+ ),
+ fill = "transparent",
+ alpha = 0.10
+ ) +
+ ggplot2::labs(
+ x = pca_contrib["PC01"],
+ y = pca_contrib["PC02"],
+ shape = "Population",
+ colour = "Population",
+ fill = "Population"
+ ),
+ p_cohort_base +
+ ggplot2::geom_point(mapping = ggplot2::aes(colour = .data[["super_pop_closest"]]), shape = 1, na.rm = TRUE) +
+ ggforce::geom_mark_ellipse(
+ mapping = ggplot2::aes(
+ colour = .data[["super_pop_closest"]],
+ group = .data[["super_pop_closest"]]
+ ),
+ fill = "transparent",
+ alpha = 0.10
+ ) +
+ ggplot2::labs(
+ x = pca_contrib["PC01"],
+ y = pca_contrib["PC02"],
+ shape = "Super Population",
+ colour = "Super Population",
+ fill = "Super Population"
+ )
+ ) +
+ patchwork::plot_layout(nrow = 2, ncol = 1) +
+ patchwork::plot_annotation(
+ tag_levels = "A",
+ title = "Ethnicty Inference Based On 1,000 Genomes Project Data",
+ subtitle = p_subtitle,
+ theme = ggplot2::theme(plot.title.position = "plot", plot.subtitle = ggtext::element_markdown())
+ )
- #################
- ### Exporting ###
- #################
message(message_prefix, "Exporting ...")
ggplot2::ggsave(
- filename = file.path(output_directory, paste0(cohort_name, "_ethnicity.pdf")),
+ filename = file.path(output_directory, paste0(cohort_name, "_ethnicity_1kg.pdf")),
plot = p_ethni,
width = 29.7 - 5,
height = 21 - 5,
units = "cm",
dpi = 120
)
+ ggplot2::ggsave(
+ filename = file.path(output_directory, paste0(cohort_name, "_ethnicity.pdf")),
+ plot = p_cohort,
+ width = 16,
+ height = 16,
+ units = "cm",
+ dpi = 120
+ )
invisible(
- readr::write_csv(
- x = pca_gg_pred_best,
- path = file.path(output_directory, paste0(cohort_name, "_ethnicity.csv"))
+ data.table::fwrite(
+ x = pca_dfxy[
+ !cohort %in% "1,000 Genomes",
+ .SD,
+ .SDcols = c("iid", sprintf("PC%02d", 1:n_comp), "cohort", "pop_closest", "super_pop_closest")
+ ],
+ file = file.path(output_directory, paste0(cohort_name, "_ethnicity.csv"))
)
)
}
diff --git a/R/estimate_ethnicity.R b/R/estimate_ethnicity.R
index bbf184a..3928402 100755
--- a/R/estimate_ethnicity.R
+++ b/R/estimate_ethnicity.R
@@ -16,6 +16,7 @@
#' + 'haploid'/'h': Treat half-calls as haploid/homozygous (the PLINK 1 file format does not distinguish between the two). This maximizes similarity between the VCF and BCF2 parsers.
#' + 'missing'/'m': Treat half-calls as missing (default).
#' + 'reference'/'r': Treat the missing part as reference.
+#' @param n_comp A `numeric`. The number of principal components to be computed.
#' @param n_cores An `integer`. The number of CPUs to use to estimate the ethnicity.
#' @param bin_path A `list(character)`. A list giving the binary path of
#' `vcftools`, `bcftools`, `bgzip`, `tabix` and `plink`.
@@ -35,6 +36,7 @@ estimate_ethnicity <- function(
quality_threshold = 0.9,
recode = "all",
vcf_half_call = "missing",
+ n_comp = 10,
n_cores = 6,
bin_path = list(
vcftools = "/usr/bin/vcftools",
@@ -129,7 +131,8 @@ estimate_ethnicity <- function(
cohort_name = cohort_name,
input_plink = file.path(output_directory, "all"),
output_directory = output_directory,
- ref1kg_population = ref1kg_population
+ ref1kg_population = ref1kg_population,
+ n_comp = n_comp
)
}
diff --git a/R/pca_report.R b/R/pca_report.R
index 98173d4..1413fdf 100755
--- a/R/pca_report.R
+++ b/R/pca_report.R
@@ -17,6 +17,7 @@
#' @import data.table
#' @import ggplot2
#' @import gt
+#' @import patchwork
#'
#' @examples
#'
@@ -101,14 +102,17 @@ pca_report <- function(
x = paste0(
.data[["x"]],
"
(",
- scales::percent_format(accuracy = 0.01, suffix = " %")(.data[["y"]]),
- ")"
+ format(.data[["y"]] * 100, digits = 2, nsmall = 2),
+ " %)"
),
y = .data[["y"]]
)
) +
- ggplot2::geom_col(width = 1, colour = "white", fill = scales::viridis_pal(begin = 0.5, end = 0.5)(1), na.rm = TRUE) +
- ggplot2::scale_y_continuous(labels = scales::percent_format(accuracy = 0.1, suffix = " %"), expand = ggplot2::expansion(mult = c(0, 0.05))) +
+ ggplot2::geom_col(width = 1, colour = "white", fill = "#21908CFF", na.rm = TRUE) +
+ ggplot2::scale_y_continuous(
+ labels = function(x) paste(format(x * 100, digits = 2, nsmall = 2), "%"),
+ expand = ggplot2::expansion(mult = c(0, 0.05))
+ ) +
ggplot2::labs(
x = "Principal Components",
y = "Contribution"
@@ -146,7 +150,7 @@ pca_report <- function(
label = gsub(
pattern = "(.*)e([-+]*)0*(.*)",
replacement = "\\1
×
10\\2\\3",
- x = scales::scientific(.data[["Pr(>F)"]], digits = 2)
+ x = format(.data[["Pr(>F)"]], digits = 2, nsmall = 2, scientific = TRUE)
)
),
colour = "white",
@@ -162,8 +166,12 @@ pca_report <- function(
labels = function(x) {
paste0(
x, "
(",
- scales::percent_format(accuracy = 0.01, suffix = " %")((pca_res$values / sum(pca_res$values))[as.numeric(gsub("PC", "", x))]),
- ")"
+ format(
+ x = (pca_res$values / sum(pca_res$values))[as.numeric(gsub("PC", "", x))] * 100,
+ digits = 2,
+ nsmall = 2
+ ),
+ " %)"
)
}
) +
diff --git a/man/compute_pca.Rd b/man/compute_pca.Rd
index d90f627..d551dbd 100644
--- a/man/compute_pca.Rd
+++ b/man/compute_pca.Rd
@@ -4,7 +4,13 @@
\alias{compute_pca}
\title{Compute the genomic components (and some figures) for ethnicity based on VCF files.}
\usage{
-compute_pca(cohort_name, input_plink, output_directory, ref1kg_population)
+compute_pca(
+ cohort_name,
+ input_plink,
+ output_directory,
+ ref1kg_population,
+ n_comp = 10
+)
}
\arguments{
\item{cohort_name}{A \code{character}. A name to describe the studied population compared to 1,000 Genomes.}
@@ -14,6 +20,8 @@ compute_pca(cohort_name, input_plink, output_directory, ref1kg_population)
\item{output_directory}{A \code{character}. The path where the data and figures is written.}
\item{ref1kg_population}{A \code{character}. A file which describe samples and their ethnicity.}
+
+\item{n_comp}{A \code{numeric}. The number of principal components to be computed.}
}
\value{
A \code{data.frame}.
diff --git a/man/estimate_ethnicity.Rd b/man/estimate_ethnicity.Rd
index ec1c81b..b29f980 100644
--- a/man/estimate_ethnicity.Rd
+++ b/man/estimate_ethnicity.Rd
@@ -17,6 +17,7 @@ estimate_ethnicity(
quality_threshold = 0.9,
recode = "all",
vcf_half_call = "missing",
+ n_comp = 10,
n_cores = 6,
bin_path = list(vcftools = "/usr/bin/vcftools", bcftools = "/usr/bin/bcftools", bgzip
= "/usr/bin/bgzip", tabix = "/usr/bin/tabix", plink = "/usr/bin/plink1.9")
@@ -51,6 +52,8 @@ estimate_ethnicity(
+ 'missing'/'m': Treat half-calls as missing (default).
+ 'reference'/'r': Treat the missing part as reference.}
+\item{n_comp}{A \code{numeric}. The number of principal components to be computed.}
+
\item{n_cores}{An \code{integer}. The number of CPUs to use to estimate the ethnicity.}
\item{bin_path}{A \code{list(character)}. A list giving the binary path of