Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Automated processing and page generation of glosario links in lessons #612

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ Imports:
callr,
servr,
utils,
tools
tools,
stringr
Suggests:
testthat (>= 3.0.0),
covr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(build_episode_html)
export(build_episode_md)
export(build_handout)
export(build_glossary)
export(build_lesson)
export(check_lesson)
export(create_episode)
Expand Down
7 changes: 5 additions & 2 deletions R/build_episode.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#' sidebar. The current episode will be replaced with an index of all the
#' chapters in the episode.
#' @param date the date the episode was last built.
#' @param glosario a dictionary of terms read in from Glosario glossary.yaml
#' on Github. Defaults to NULL.
#' @return `TRUE` if the page was successful, `FALSE` otherwise.
#' @export
#' @note this function is for internal use, but exported for those who know what
Expand Down Expand Up @@ -72,12 +74,13 @@
build_episode_html <- function(path_md, path_src = NULL,
page_back = "index.md", page_forward = "index.md",
pkg, quiet = FALSE, page_progress = NULL,
sidebar = NULL, date = NULL) {
sidebar = NULL, date = NULL, glosario = NULL) {
home <- get_source_path() %||% root_path(path_md)
this_lesson(home)
page_globals <- setup_page_globals()
slug <- get_slug(path_md)
body <- render_html(path_md, quiet = quiet)
body <- render_html(path_md, quiet = quiet, glosario = glosario)

if (body == "") {
# if there is nothing in the page then we build nothing.
return(NULL)
Expand Down
183 changes: 183 additions & 0 deletions R/build_glossary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
read_glosario_yaml <- function(glosario) {
if (is.null(glosario) || glosario == FALSE) {
return(NULL)
}
else {
# Load the glossary YAML file from the GitHub repository
glosario_url <- "https://raw.githubusercontent.com/carpentries/glosario/main/glossary.yml"
glosario_response <- httr::GET(glosario_url)

if (httr::status_code(glosario_response) == 200) {
glosario <- yaml::yaml.load(httr::content(glosario_response, as = "text"))
return(glosario)
} else {
return(NULL)
}
}
}

# Function to generate the link if the term exists in the glossary
create_glosario_link <- function(term, slugs) {
if (term %in% slugs) {
url <- paste0("https://glosario.carpentries.org/", this_metadata$get()[["lang"]], "/#", term)
return(paste0("[^", term, "^](", url, ")"))
} else {
return(term) # Return the term as-is if not found in the glossary
}
}

render_glosario_links <- function(path_in, glosario = NULL, quiet = FALSE) {
if (!is.null(glosario)) {
content <- readLines(path_in)

slugs <- lapply(glosario, function(x) x$slug)
glos_pattern <- "\\{\\{\\s*glosario\\.(\\w+)\\s*\\}\\}"

# replace {{ glosario.term }} placeholders with glossary link URLs
glosarioed_content <- stringr::str_replace_all(
content,
pattern = glos_pattern,
replacement = function(match) {
term <- stringr::str_match(match, glos_pattern)[[2]]
create_glosario_link(term, slugs)
}
)

# overwrite the file with the new content
writeLines(glosarioed_content, path_in)
}

# the processed file path for subsequent lesson processing as usual
invisible(path_in)
}

#' @rdname build_glossary
build_glossary <- function(pkg, pages = NULL, quiet = FALSE) {
build_glossary_page(
pkg = pkg,
pages = pages,
# title = tr_computed("Glossary"),
slug = "glossary",
quiet = quiet,
)
}

#' Build a page for aggregating glosario links found in lesson elements
#'
#' @inheritParams provision_agg_page
#' @param pages output from the function [read_all_html()]: a nested list of
#' `xml_document` objects representing episodes in the lesson
#' @param aggregate a selector for the lesson content you want to aggregate.
#' The default is "*", which will aggregate links from all content.
#' To grab only links from sections, use "section".
#' @param append a selector for the section of the page where the aggregate data
#' should be placed. This defaults to "self::node()", which indicates that the
#' entire page should be appended.
#' @param quiet if `TRUE`, no messages will be emitted. If FALSE, pkgdown will
#' report creation of the temporary file.
#' @return NULL, invisibly. This is called for its side-effect
#'
#' @details
#'
#' We programmatically search through lesson content to find links that point to
#' glosario terms. We then aggregate these links into a single page.
#'
#' To customise the page, we need a few things:
#'
#' 1. a title
#' 2. a slug
#' @note
#' This function assumes that you have already built all the episodes of your lesson.
#'
#' @keywords internal
#' @rdname build_glossary
#' @examples
#' if (FALSE) {
#' # build_glossary_page() assumes that your lesson has been built and takes in a
#' # pkgdown object, which can be created from the `site/` folder in your
#' # lesson.
#' lsn <- "/path/to/my/lesson"
#' pkg <- pkgdown::as_pkgdown(fs::path(lsn, "site"))
#'
#' htmls <- read_all_html(pkg$dst_path)
#' build_glossary_page(pkg, htmls, quiet = FALSE)
#' build_keypoints(pkg, htmls, quiet = FALSE)
#' }
build_glossary_page <- function(pkg, pages, title = "Glosario Links", slug = NULL, aggregate = "*", append = "self::node()", quiet = FALSE) {
path <- get_source_path() %||% root_path(pkg$src_path)
out_path <- pkg$dst_path
this_lesson(path)

agg <- provision_agg_page(pkg, title = title, slug = slug, new = TRUE)

agg_sect <- xml2::xml_find_first(agg$learner, ".//section[@id='glossary']")
agg_ul <- xml2::xml_add_child(agg_sect, "ul", id="glosario-list")

the_episodes <- .resources$get()[["episodes"]]
the_slugs <- get_slug(the_episodes)

# vector to hold all unique links from all episodes
glinks <- character()

for (episode in seq(the_episodes)) {
ep_learn <- ep_instruct <- the_episodes[episode]
ename <- the_slugs[episode]

if (!is.null(pages)) {
ep_learn <- pages$learner[[ename]]
ep_instruct <- pages$instructor[[ename]]
}
ep_title <- as.character(xml2::xml_contents(get_content(ep_learn, ".//h1")))

names(ename) <- paste(ep_title, collapse = "")
ep_learn <- get_content(ep_learn, content = aggregate, pkg = pkg)
ep_learn_glinks <- get_glossary_links(ep_learn)

ep_instruct <- get_content(ep_instruct, content = aggregate, pkg = pkg, instructor = TRUE)
ep_instruct_glinks <- get_glossary_links(ep_instruct)

# get unique set of links from both learner and instructor
ep_glinks <- unique(c(ep_learn_glinks, ep_instruct_glinks))

# append unique episode glinks to global glinks
glinks <- unique(c(ep_glinks, glinks))
}

glinks <- sort(glinks)

# Iterate over glinks to create HTML elements
for (link in glinks) {
# remove everything before the last #
term <- stringr::str_extract(link, "#(.*)")
term <- stringr::str_replace(term, "#", "")
agg_li <- xml2::xml_add_child(agg_ul, "li")
xml2::xml_add_child(agg_li, "a", term, href = link)
}

glos_out <- fs::path(out_path, as_html(slug))
report <- "Writing '{.file {glos_out}}'"
out <- fs::path_rel(glos_out, pkg$dst_path)
if (!quiet) cli::cli_text(report)
writeLines(as.character(agg$learner), glos_out)
}

get_glossary_links <- function(episode) {
lang <- this_metadata$get()[["lang"]]
links <- xml2::xml_find_all(episode, ".//a")
hrefs <- xml2::xml_attr(links, "href")
glos_links <- links[stringr::str_detect(hrefs, "^https://glosario.carpentries.org/")]

clean_links <- character()
for (link in glos_links) {
href <- xml2::xml_attr(link, "href")
xml2::xml_attr(link, "href") <- stringr::str_replace_all(href, "en/", lang)
clean_links <- c(clean_links, href)
}
invisible(clean_links)
}

get_title <- function(doc) {
xml2::xml_find_first(doc, ".//h1")
}

# nocov end
6 changes: 5 additions & 1 deletion R/build_site.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ build_site <- function(path = ".", quiet = !interactive(), preview = TRUE, overr
page_progress = progress,
date = db$date[i],
pkg = pkg,
quiet = quiet
quiet = quiet,
glosario = this_metadata$get()[["glosario"]]
)
}
# if (rebuild_template) template_check$set()
Expand Down Expand Up @@ -157,6 +158,9 @@ build_site <- function(path = ".", quiet = !interactive(), preview = TRUE, overr

describe_progress("Creating Images page", quiet = quiet)
build_images(pkg, pages = html_pages, quiet = quiet)

describe_progress("Indexing Glosario links", quiet = quiet)
build_glossary(pkg, pages = html_pages, quiet = quiet)
}
describe_progress("Creating Instructor Notes", quiet = quiet)
build_instructor_notes(pkg, pages = html_pages, built = built, quiet = quiet)
Expand Down
9 changes: 8 additions & 1 deletion R/render_html.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
#' lf <- paste0("--lua-filter=", lua)
#' cat(sandpaper:::render_html(tmp, lf))
#' }
render_html <- function(path_in, ..., quiet = FALSE) {
render_html <- function(path_in, ..., quiet = FALSE, glosario = NULL) {
htm <- tempfile(fileext = ".html")
on.exit(unlink(htm), add = TRUE)
links <- getOption("sandpaper.links")
Expand All @@ -57,6 +57,13 @@ render_html <- function(path_in, ..., quiet = FALSE) {
path_in <- tmpin
on.exit(unlink(tmpin), add = TRUE)
}

if (!is.null(glosario)) {
# if we have a glossary, then we need to replace the glossary term placeholders
# with whisker.replace
path_in <- render_glosario_links(path_in, glosario = glosario, quiet = quiet)
}

args <- construct_pandoc_args(path_in, output = htm, to = "html", ...)
sho <- !(quiet || identical(Sys.getenv("TESTTHAT"), "true"))
# Ensure we use the _loaded version_ of pandoc in case folks are using
Expand Down
3 changes: 1 addition & 2 deletions R/utils-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ initialise_metadata <- function(path = ".") {
this_metadata$set(c("date", "modified"), format(Sys.Date(), "%F"))
this_metadata$set(c("date", "published"), format(Sys.Date(), "%F"))
this_metadata$set("citation", path_citation(path))
this_metadata$set("glosario", read_glosario_yaml(cfg$glosario))

this_metadata$set("analytics", cfg$analytics)
}


Loading