Skip to content

Commit

Permalink
fix don't download models twice
Browse files Browse the repository at this point in the history
  • Loading branch information
pachadotdev committed Jul 28, 2024
1 parent c8acf14 commit daf4183
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions R/tessdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,35 +37,48 @@
#' }
tesseract_download <- function(lang, datapath = NULL, best = FALSE, progress = interactive()) {
stopifnot(is.character(lang))

if (!length(datapath)) {
warn_on_linux()
datapath <- tesseract_info()$datapath
}

datapath <- normalizePath(datapath, mustWork = TRUE)

version <- tesseract_version_major()

if (isTRUE(best)) {
repo <- "tessdata_best"
} else {
repo <- "tessdata_fast"
}

release <- "4.1.0"

url <- sprintf("https://github.com/tesseract-ocr/%s/raw/%s/%s.traineddata", repo, release, lang)

destfile <- file.path(datapath, basename(url))

if (file.exists(destfile)) {
message("Training data already exists.")
return(destfile)
}

req <- curl::curl_fetch_memory(url, curl::new_handle(
progressfunction = progress_fun,
noprogress = !isTRUE(progress)
))

if (progress) {
cat("\n")
}

if (req$status_code != 200) {
stop("Download failed: HTTP ", req$status_code, call. = FALSE)
}
destfile <- file.path(datapath, basename(url))
if (file.exists(destfile)) {
message("File already exists.")
} else {
writeBin(req$content, destfile)
}

writeBin(req$content, destfile)

return(destfile)
}

Expand Down

0 comments on commit daf4183

Please sign in to comment.