carpentries · froggleston · Oct 16, 2024 · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -89,7 +89,8 @@ Imports:
     callr,
     servr,
     utils,
-    tools
+    tools,
+    stringr
 Suggests:
     testthat (>= 3.0.0),
     covr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 export(build_episode_html)
 export(build_episode_md)
 export(build_handout)
+export(build_glossary)
 export(build_lesson)
 export(check_lesson)
 export(create_episode)

diff --git a/R/build_episode.R b/R/build_episode.R
@@ -19,6 +19,8 @@
 #'   sidebar. The current episode will be replaced with an index of all the
 #'   chapters in the episode.
 #' @param date the date the episode was last built.
+#' @param glosario a dictionary of terms read in from Glosario glossary.yaml
+#'   on Github. Defaults to NULL.
 #' @return `TRUE` if the page was successful, `FALSE` otherwise.
 #' @export
 #' @note this function is for internal use, but exported for those who know what
@@ -72,12 +74,13 @@
 build_episode_html <- function(path_md, path_src = NULL,
                                page_back = "index.md", page_forward = "index.md",
                                pkg, quiet = FALSE, page_progress = NULL,
-                               sidebar = NULL, date = NULL) {
+                               sidebar = NULL, date = NULL, glosario = NULL) {
   home <- get_source_path() %||% root_path(path_md)
   this_lesson(home)
   page_globals <- setup_page_globals()
   slug <- get_slug(path_md)
-  body <- render_html(path_md, quiet = quiet)
+  body <- render_html(path_md, quiet = quiet, glosario = glosario)
+
   if (body == "") {
     # if there is nothing in the page then we build nothing.
     return(NULL)

diff --git a/R/build_glossary.R b/R/build_glossary.R
@@ -0,0 +1,183 @@
+read_glosario_yaml <- function(glosario) {
+  if (is.null(glosario) || glosario == FALSE) {
+    return(NULL)
+  }
+  else {
+    # Load the glossary YAML file from the GitHub repository
+    glosario_url <- "https://raw.githubusercontent.com/carpentries/glosario/main/glossary.yml"
+    glosario_response <- httr::GET(glosario_url)
+
+    if (httr::status_code(glosario_response) == 200) {
+      glosario <- yaml::yaml.load(httr::content(glosario_response, as = "text"))
+      return(glosario)
+    } else {
+      return(NULL)
+    }
+  }
+}
+
+# Function to generate the link if the term exists in the glossary
+create_glosario_link <- function(term, slugs) {
+  if (term %in% slugs) {
+    url <- paste0("https://glosario.carpentries.org/", this_metadata$get()[["lang"]], "/#", term)
+    return(paste0("[^", term, "^](", url, ")"))
+  } else {
+    return(term)  # Return the term as-is if not found in the glossary
+  }
+}
+
+render_glosario_links <- function(path_in, glosario = NULL, quiet = FALSE) {
+  if (!is.null(glosario)) {
+    content <- readLines(path_in)
+
+    slugs <- lapply(glosario, function(x) x$slug)
+    glos_pattern <- "\\{\\{\\s*glosario\\.(\\w+)\\s*\\}\\}"
+
+    # replace {{ glosario.term }} placeholders with glossary link URLs
+    glosarioed_content <- stringr::str_replace_all(
+        content,
+        pattern = glos_pattern,
+        replacement = function(match) {
+          term <- stringr::str_match(match, glos_pattern)[[2]]
+          create_glosario_link(term, slugs)
+        }
+      )
+
+    # overwrite the file with the new content
+    writeLines(glosarioed_content, path_in)
+  }
+
+  # the processed file path for subsequent lesson processing as usual
+  invisible(path_in)
+}
+
+#' @rdname build_glossary
+build_glossary <- function(pkg, pages = NULL, quiet = FALSE) {
+  build_glossary_page(
+      pkg = pkg,
+      pages = pages,
+      # title = tr_computed("Glossary"),
+      slug = "glossary",
+      quiet = quiet,
+  )
+}
+
+#' Build a page for aggregating glosario links found in lesson elements
+#'
+#' @inheritParams provision_agg_page
+#' @param pages output from the function [read_all_html()]: a nested list of
+#'   `xml_document` objects representing episodes in the lesson
+#' @param aggregate a selector for the lesson content you want to aggregate.
+#'   The default is "*", which will aggregate links from all content.
+#'   To grab only links from sections, use "section".
+#' @param append a selector for the section of the page where the aggregate data
+#'   should be placed. This defaults to "self::node()", which indicates that the
+#'   entire page should be appended.
+#' @param quiet if `TRUE`, no messages will be emitted. If FALSE, pkgdown will
+#'   report creation of the temporary file.
+#' @return NULL, invisibly. This is called for its side-effect
+#'
+#' @details
+#'
+#' We programmatically search through lesson content to find links that point to
+#' glosario terms. We then aggregate these links into a single page.
+#'
+#' To customise the page, we need a few things:
+#'
+#' 1. a title
+#' 2. a slug
+#' @note
+#' This function assumes that you have already built all the episodes of your lesson.
+#'
+#' @keywords internal
+#' @rdname build_glossary
+#' @examples
+#' if (FALSE) {
+#'   # build_glossary_page() assumes that your lesson has been built and takes in a
+#'   # pkgdown object, which can be created from the `site/` folder in your
+#'   # lesson.
+#'   lsn <- "/path/to/my/lesson"
+#'   pkg <- pkgdown::as_pkgdown(fs::path(lsn, "site"))
+#'
+#'   htmls <- read_all_html(pkg$dst_path)
+#'   build_glossary_page(pkg, htmls, quiet = FALSE)
+#'   build_keypoints(pkg, htmls, quiet = FALSE)
+#' }
+build_glossary_page <- function(pkg, pages, title = "Glosario Links", slug = NULL, aggregate = "*", append = "self::node()", quiet = FALSE) {
+  path <- get_source_path() %||% root_path(pkg$src_path)
+  out_path <- pkg$dst_path
+  this_lesson(path)
+
+  agg <- provision_agg_page(pkg, title = title, slug = slug, new = TRUE)
+
+  agg_sect <- xml2::xml_find_first(agg$learner, ".//section[@id='glossary']")
+  agg_ul <- xml2::xml_add_child(agg_sect, "ul", id="glosario-list")
+
+  the_episodes <- .resources$get()[["episodes"]]
+  the_slugs <- get_slug(the_episodes)
+
+  # vector to hold all unique links from all episodes
+  glinks <- character()
+
+  for (episode in seq(the_episodes)) {
+    ep_learn <- ep_instruct <- the_episodes[episode]
+    ename <- the_slugs[episode]
+
+    if (!is.null(pages)) {
+      ep_learn <- pages$learner[[ename]]
+      ep_instruct <- pages$instructor[[ename]]
+    }
+    ep_title <- as.character(xml2::xml_contents(get_content(ep_learn, ".//h1")))
+
+    names(ename) <- paste(ep_title, collapse = "")
+    ep_learn <- get_content(ep_learn, content = aggregate, pkg = pkg)
+    ep_learn_glinks <- get_glossary_links(ep_learn)
+
+    ep_instruct <- get_content(ep_instruct, content = aggregate, pkg = pkg, instructor = TRUE)
+    ep_instruct_glinks <- get_glossary_links(ep_instruct)
+
+    # get unique set of links from both learner and instructor
+    ep_glinks <- unique(c(ep_learn_glinks, ep_instruct_glinks))
+
+    # append unique episode glinks to global glinks
+    glinks <- unique(c(ep_glinks, glinks))
+  }
+
+  glinks <- sort(glinks)
+
+  # Iterate over glinks to create HTML elements
+  for (link in glinks) {
+    # remove everything before the last #
+    term <- stringr::str_extract(link, "#(.*)")
+    term <- stringr::str_replace(term, "#", "")
+    agg_li <- xml2::xml_add_child(agg_ul, "li")
+    xml2::xml_add_child(agg_li, "a", term, href = link)
+  }
+
+  glos_out <- fs::path(out_path, as_html(slug))
+  report <- "Writing '{.file {glos_out}}'"
+  out <- fs::path_rel(glos_out, pkg$dst_path)
+  if (!quiet) cli::cli_text(report)
+  writeLines(as.character(agg$learner), glos_out)
+}
+
+get_glossary_links <- function(episode) {
+  lang <- this_metadata$get()[["lang"]]
+  links <- xml2::xml_find_all(episode, ".//a")
+  hrefs <- xml2::xml_attr(links, "href")
+  glos_links <- links[stringr::str_detect(hrefs, "^https://glosario.carpentries.org/")]
+
+  clean_links <- character()
+  for (link in glos_links) {
+    href <- xml2::xml_attr(link, "href")
+    xml2::xml_attr(link, "href") <- stringr::str_replace_all(href, "en/", lang)
+    clean_links <- c(clean_links, href)
+  }
+  invisible(clean_links)
+}
+
+get_title <- function(doc) {
+  xml2::xml_find_first(doc, ".//h1")
+}
+
+# nocov end
diff --git a/R/build_site.R b/R/build_site.R
@@ -103,7 +103,8 @@ build_site <- function(path = ".", quiet = !interactive(), preview = TRUE, overr
       page_progress = progress,
       date          = db$date[i],
       pkg           = pkg,
-      quiet         = quiet
+      quiet         = quiet,
+      glosario      = this_metadata$get()[["glosario"]]
     )
   }
   # if (rebuild_template) template_check$set()
@@ -157,6 +158,9 @@ build_site <- function(path = ".", quiet = !interactive(), preview = TRUE, overr
 
     describe_progress("Creating Images page", quiet = quiet)
     build_images(pkg, pages = html_pages, quiet = quiet)
+
+    describe_progress("Indexing Glosario links", quiet = quiet)
+    build_glossary(pkg, pages = html_pages, quiet = quiet)
   }
   describe_progress("Creating Instructor Notes", quiet = quiet)
   build_instructor_notes(pkg, pages = html_pages, built = built, quiet = quiet)

diff --git a/R/render_html.R b/R/render_html.R
@@ -41,7 +41,7 @@
 #' lf <- paste0("--lua-filter=", lua)
 #' cat(sandpaper:::render_html(tmp, lf))
 #' }
-render_html <- function(path_in, ..., quiet = FALSE) {
+render_html <- function(path_in, ..., quiet = FALSE, glosario = NULL) {
   htm <- tempfile(fileext = ".html")
   on.exit(unlink(htm), add = TRUE)
   links <- getOption("sandpaper.links")
@@ -57,6 +57,13 @@ render_html <- function(path_in, ..., quiet = FALSE) {
     path_in <- tmpin
     on.exit(unlink(tmpin), add = TRUE)
   }
+
+  if (!is.null(glosario)) {
+    # if we have a glossary, then we need to replace the glossary term placeholders
+    # with whisker.replace
+    path_in <- render_glosario_links(path_in, glosario = glosario, quiet = quiet)
+  }
+
   args <- construct_pandoc_args(path_in, output = htm, to = "html", ...)
   sho <- !(quiet || identical(Sys.getenv("TESTTHAT"), "true"))
   # Ensure we use the _loaded version_ of pandoc in case folks are using

diff --git a/R/utils-metadata.R b/R/utils-metadata.R
@@ -48,8 +48,7 @@ initialise_metadata <- function(path = ".") {
   this_metadata$set(c("date", "modified"), format(Sys.Date(), "%F"))
   this_metadata$set(c("date", "published"), format(Sys.Date(), "%F"))
   this_metadata$set("citation", path_citation(path))
+  this_metadata$set("glosario", read_glosario_yaml(cfg$glosario))
 
   this_metadata$set("analytics", cfg$analytics)
 }
-
-