From 3081b4918cf9b92ffb46c52a373728b652dfb50c Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sun, 21 Jan 2024 11:42:38 -0800 Subject: [PATCH] HTML, TAL, XML and SXML modules: New and Reorginized under markup (#1094) --- doc/.vuepress/config.js | 11 +- doc/build.sh | 22 +- doc/ox-gfm.el | 349 ++++++ doc/reference/std/markup/README.md | 7 + doc/reference/std/markup/sxml.md | 331 ++++++ doc/reference/std/markup/sxml/README.md | 331 ++++++ doc/reference/std/markup/sxml/html/README.md | 235 ++++ doc/reference/std/markup/sxml/tal/README.md | 876 ++++++++++++++ doc/reference/std/markup/sxml/xml.md | 63 + doc/reference/std/xml.md | 375 ------ src/std/build-spec.ss | 23 +- src/std/markup/README.org | 12 + src/std/markup/html.ss | 7 + src/std/{xml-test.ss => markup/sxml-test.ss} | 17 +- src/std/markup/sxml.ss | 8 + src/std/markup/sxml/README.org | 417 +++++++ src/std/markup/sxml/html/README.org | 326 ++++++ src/std/markup/sxml/html/html-parser.scm | 649 ++++++++++ src/std/markup/sxml/html/parser.ss | 74 ++ src/std/markup/sxml/html/tal.ss | 5 + src/std/{xml => markup/sxml}/oleg/README.md | 0 src/std/{xml => markup/sxml}/oleg/SSAX.scm | 0 src/std/{xml => markup/sxml}/oleg/SXPath.scm | 0 .../sxml}/oleg/char-encoding.scm | 0 .../{xml => markup/sxml}/oleg/define-opt.scm | 0 .../{xml => markup/sxml}/oleg/input-parse.scm | 0 .../sxml}/oleg/look-for-str.scm | 0 .../sxml}/oleg/myenv-gerbil.scm | 0 .../sxml}/oleg/parser-errors-vanilla.scm | 0 src/std/markup/sxml/print.ss | 264 +++++ src/std/{xml => markup/sxml}/ssax.ss | 0 .../{xml/sxml.ss => markup/sxml/sxml-inf.ss} | 0 src/std/{xml => markup/sxml}/sxpath.ss | 0 src/std/markup/sxml/tal/README.org | 1041 +++++++++++++++++ src/std/markup/sxml/tal/expander.ss | 254 ++++ src/std/markup/sxml/tal/iter.ss | 102 ++ src/std/markup/sxml/tal/parser.ss | 1 + src/std/markup/sxml/tal/syntax.ss | 130 ++ src/std/markup/sxml/tal/toplevel.ss | 38 + src/std/markup/sxml/xml.ss | 18 + src/std/markup/tal.ss | 4 + src/std/markup/xml.ss | 14 + src/std/net/s3/api.ss | 2 +- src/std/net/smtp/README.org | 4 +- src/std/xml.ss | 12 +- 45 files changed, 5618 insertions(+), 404 deletions(-) create mode 100644 doc/ox-gfm.el create mode 100644 doc/reference/std/markup/README.md create mode 100644 doc/reference/std/markup/sxml.md create mode 100644 doc/reference/std/markup/sxml/README.md create mode 100644 doc/reference/std/markup/sxml/html/README.md create mode 100644 doc/reference/std/markup/sxml/tal/README.md create mode 100644 doc/reference/std/markup/sxml/xml.md delete mode 100644 doc/reference/std/xml.md create mode 100644 src/std/markup/README.org create mode 100644 src/std/markup/html.ss rename src/std/{xml-test.ss => markup/sxml-test.ss} (66%) create mode 100644 src/std/markup/sxml.ss create mode 100644 src/std/markup/sxml/README.org create mode 100644 src/std/markup/sxml/html/README.org create mode 100644 src/std/markup/sxml/html/html-parser.scm create mode 100644 src/std/markup/sxml/html/parser.ss create mode 100644 src/std/markup/sxml/html/tal.ss rename src/std/{xml => markup/sxml}/oleg/README.md (100%) rename src/std/{xml => markup/sxml}/oleg/SSAX.scm (100%) rename src/std/{xml => markup/sxml}/oleg/SXPath.scm (100%) rename src/std/{xml => markup/sxml}/oleg/char-encoding.scm (100%) rename src/std/{xml => markup/sxml}/oleg/define-opt.scm (100%) rename src/std/{xml => markup/sxml}/oleg/input-parse.scm (100%) rename src/std/{xml => markup/sxml}/oleg/look-for-str.scm (100%) rename src/std/{xml => markup/sxml}/oleg/myenv-gerbil.scm (100%) rename src/std/{xml => markup/sxml}/oleg/parser-errors-vanilla.scm (100%) create mode 100644 src/std/markup/sxml/print.ss rename src/std/{xml => markup/sxml}/ssax.ss (100%) rename src/std/{xml/sxml.ss => markup/sxml/sxml-inf.ss} (100%) rename src/std/{xml => markup/sxml}/sxpath.ss (100%) create mode 100644 src/std/markup/sxml/tal/README.org create mode 100644 src/std/markup/sxml/tal/expander.ss create mode 100644 src/std/markup/sxml/tal/iter.ss create mode 100644 src/std/markup/sxml/tal/parser.ss create mode 100644 src/std/markup/sxml/tal/syntax.ss create mode 100644 src/std/markup/sxml/tal/toplevel.ss create mode 100644 src/std/markup/sxml/xml.ss create mode 100644 src/std/markup/tal.ss create mode 100644 src/std/markup/xml.ss diff --git a/doc/.vuepress/config.js b/doc/.vuepress/config.js index 9df8c7091..3d173d70f 100644 --- a/doc/.vuepress/config.js +++ b/doc/.vuepress/config.js @@ -77,10 +77,19 @@ module.exports = { 'actor', 'crypto', 'protobuf', - 'xml', 'parser', 'values', 'mime/', + { + title: "Markup Languages", + path: "/reference/std/markup/", + children: [ + "markup/sxml/", + "markup/sxml/xml", + "markup/sxml/html/", + "markup/sxml/tal/" + ] + }, { title: "Networking Libraries", path: "/reference/std/net/", diff --git a/doc/build.sh b/doc/build.sh index df0e8ef64..fad40d938 100755 --- a/doc/build.sh +++ b/doc/build.sh @@ -2,9 +2,27 @@ cd ${0%/*} +weave () { + emacs $1 --batch -l `pwd`/ox-gfm.el \ + --eval '(print (org-gfm-export-to-markdown nil nil nil))' --kill || + echo "Cannot export from org $1 to markdown using emacs" +} +weave_subtree () { + emacs $1 --batch -l `pwd`/ox-gfm.el \ + --eval "(progn (goto-char (+ 1 (org-open-link-from-string \"[[#$2]]\"))) + (print (org-gfm-export-to-markdown nil t nil)))" --kill || + echo "Cannot export from org $1 subtree $2 to markdown using emacs" +} -emacs ../src/std/mime/README.org --batch -f org-gfm-export-to-markdown --kill || - echo "Cannot export from org to markdown using emacs" + + + +weave ../src/std/mime/README.org +weave ../src/std/markup/README.org +weave ../src/std/markup/sxml/README.org +weave ../src/std/markup/sxml/xml.org +weave ../src/std/markup/sxml/html/README.org +weave ../src/std/markup/sxml/tal/README.org npm install diff --git a/doc/ox-gfm.el b/doc/ox-gfm.el new file mode 100644 index 000000000..45aef09c3 --- /dev/null +++ b/doc/ox-gfm.el @@ -0,0 +1,349 @@ +;;; ox-gfm.el --- Github Flavored Markdown Back-End for Org Export Engine -*- lexical-binding: t; -*- + +;; Copyright (C) 2014-2017 Lars Tveito + +;; Author: Lars Tveito +;; Keywords: org, wp, markdown, github + +;; This file is not part of GNU Emacs. + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Commentary: + +;; This library implements a Markdown back-end (github flavor) for Org +;; exporter, based on the `md' back-end. + +;;; Code: + +(require 'ox-md) +(require 'ox-publish) + + +;;; User-Configurable Variables + +(defgroup org-export-gfm nil + "Options specific to Markdown export back-end." + :tag "Org Github Flavored Markdown" + :group 'org-export + :version "24.4" + :package-version '(Org . "8.0")) + + +;;; Define Back-End + +(org-export-define-derived-backend 'gfm 'md + :filters-alist '((:filter-parse-tree . org-md-separate-elements)) + :menu-entry + '(?g "Export to Github Flavored Markdown" + ((?G "To temporary buffer" + (lambda (a s v b) (org-gfm-export-as-markdown a s v))) + (?g "To file" (lambda (a s v b) (org-gfm-export-to-markdown a s v))) + (?o "To file and open" + (lambda (a s v b) + (if a (org-gfm-export-to-markdown t s v) + (org-open-file (org-gfm-export-to-markdown nil s v))))))) + :translate-alist '((inner-template . org-gfm-inner-template) + (paragraph . org-gfm-paragraph) + (strike-through . org-gfm-strike-through) + (example-block . org-gfm-example-block) + (src-block . org-gfm-src-block) + (table-cell . org-gfm-table-cell) + (table-row . org-gfm-table-row) + (table . org-gfm-table))) + + +;;; Transcode Functions + +;;;; Paragraph + +(defun org-gfm-paragraph (paragraph contents info) + "Transcode PARAGRAPH element into Github Flavoured Markdown format. +CONTENTS is the paragraph contents. INFO is a plist used as a +communication channel." + (unless (plist-get info :preserve-breaks) + (setq contents (concat (mapconcat 'identity (split-string contents) " ") "\n"))) + (let ((first-object (car (org-element-contents paragraph)))) + ;; If paragraph starts with a #, protect it. + (if (and (stringp first-object) (string-match "\\`#" first-object)) + (replace-regexp-in-string "\\`#" "\\#" contents nil t) + contents))) + +;;;; Src Block + +(defun org-gfm-src-block (src-block _contents info) + "Transcode SRC-BLOCK element into Github Flavored Markdown format. +_CONTENTS is nil. INFO is a plist used as a communication +channel." + (let* ((lang (org-element-property :language src-block)) + (code (org-export-format-code-default src-block info)) + (prefix (concat "```" lang "\n")) + (suffix "```")) + (concat prefix code suffix))) + +;;;; Example Block + +(defalias 'org-gfm-example-block #'org-gfm-src-block) + +;;;; Strike-Through + +(defun org-gfm-strike-through (_strike-through contents _info) + "Transcode _STRIKE-THROUGH from Org to Markdown (GFM). +CONTENTS is the text with strike-through markup. _INFO is a plist +holding contextual information." + (format "~~%s~~" contents)) + +;;;; Table-Common + +(defvar width-cookies nil) +(defvar width-cookies-table nil) + +(defconst gfm-table-left-border "|") +(defconst gfm-table-right-border " |") +(defconst gfm-table-separator " |") + +(defun org-gfm-table-col-width (table column info) + "Return width of TABLE at given COLUMN. +INFO is a plist used as communication channel. Width of a column +is determined either by inquerying `width-cookies' in the column, +or by the maximum cell with in the column." + (let ((cookie (when (hash-table-p width-cookies) + (gethash column width-cookies)))) + (if (and (eq table width-cookies-table) + (not (eq nil cookie))) + cookie + (progn + (unless (and (eq table width-cookies-table) + (hash-table-p width-cookies)) + (setq width-cookies (make-hash-table)) + (setq width-cookies-table table)) + (let ((max-width 0) + (specialp (org-export-table-has-special-column-p table))) + (org-element-map + table + 'table-row + (lambda (row) + (setq max-width + (max (length + (org-export-data + (org-element-contents + (elt (if specialp (car (org-element-contents row)) + (org-element-contents row)) + column)) + info)) + max-width))) + info) + (puthash column max-width width-cookies)))))) + +(defun org-gfm-make-hline-builder (table info char) + "Return a function to build horizontal line in TABLE with given CHAR. +INFO is a plist used as a communication channel." + (lambda (col) + (let ((max-width (max 3 (org-gfm-table-col-width table col info)))) + (when (< max-width 1) + (setq max-width 1)) + (make-string max-width char)))) + +;;;; Table-Cell + +(defun org-gfm-table-cell (table-cell contents info) + "Transcode TABLE-CELL element from Org into GFM. +CONTENTS is content of the cell. INFO is a plist used as a +communication channel." + (let* ((table (org-export-get-parent-table table-cell)) + (column (cdr (org-export-table-cell-address table-cell info))) + (width (org-gfm-table-col-width table column info)) + (left-border (if (org-export-table-cell-starts-colgroup-p table-cell info) "| " " ")) + (right-border " |") + (data (or contents ""))) + (setq contents + (concat data + (make-string (max 0 (- width (string-width data))) + ?\s))) + (concat left-border contents right-border))) + +;;;; Table-Row + +(defun org-gfm-table-row (table-row contents info) + "Transcode TABLE-ROW element from Org into GFM. +CONTENTS is cell contents of TABLE-ROW. INFO is a plist used as a +communication channel." + (let ((table (org-export-get-parent-table table-row))) + (when (and (eq 'rule (org-element-property :type table-row)) + ;; In GFM, rule is valid only at second row. + (eq 1 (cl-position + table-row + (org-element-map table 'table-row 'identity info)))) + (let* ((table (org-export-get-parent-table table-row)) + (build-rule (org-gfm-make-hline-builder table info ?-)) + (cols (cdr (org-export-table-dimensions table info)))) + (setq contents + (concat gfm-table-left-border + (mapconcat (lambda (col) (funcall build-rule col)) + (number-sequence 0 (- cols 1)) + gfm-table-separator) + gfm-table-right-border)))) + contents)) + +;;;; Table + +(defun org-gfm-table (table contents info) + "Transcode TABLE element into Github Flavored Markdown table. +CONTENTS is the contents of the table. INFO is a plist holding +contextual information." + (let* ((rows (org-element-map table 'table-row 'identity info)) + (no-header (or (<= (length rows) 1))) + (cols (cdr (org-export-table-dimensions table info))) + (build-dummy-header + (lambda () + (let ((build-empty-cell (org-gfm-make-hline-builder table info ?\s)) + (build-rule (org-gfm-make-hline-builder table info ?-)) + (columns (number-sequence 0 (- cols 1)))) + (concat gfm-table-left-border + (mapconcat (lambda (col) (funcall build-empty-cell col)) + columns + gfm-table-separator) + gfm-table-right-border "\n" gfm-table-left-border + (mapconcat (lambda (col) (funcall build-rule col)) + columns + gfm-table-separator) + gfm-table-right-border "\n"))))) + (concat (and no-header (funcall build-dummy-header)) + (replace-regexp-in-string "\n\n" "\n" contents)))) + +;;;; Table of contents + +(defun org-gfm-format-toc (headline info) + "Return an appropriate table of contents entry for HEADLINE." + (let* ((title (org-export-data + (org-export-get-alt-title headline info) info)) + (level (1- (org-element-property :level headline))) + (indent (concat (make-string (* level 2) ? ))) + (anchor (or (org-element-property :CUSTOM_ID headline) + (org-export-get-reference headline info)))) + (concat indent "- [" title "]" "(#" anchor ")"))) + +;;;; Footnote section + +(defun org-gfm-footnote-section (info) + "Format the footnote section. +INFO is a plist used as a communication channel." + (let* ((fn-alist (org-export-collect-footnote-definitions info))) + (and fn-alist + (format + "## Footnotes\n\n%s\n" + (mapconcat (pcase-lambda (`(,n ,_type ,def)) + (format + "%s %s\n" + (format (plist-get info :html-footnote-format) + (org-html--anchor + (format "fn.%d" n) + n + (format " class=\"footnum\" href=\"#fnr.%d\"" n) + info)) + (org-trim (org-export-data def info)))) + fn-alist "\n"))))) + +;;;; Template + +(defun org-gfm-inner-template (contents info) + "Return body of document after converting it to Markdown syntax. +CONTENTS is the transcoded contents string. INFO is a plist +holding export options." + (let* ((depth (plist-get info :with-toc)) + (headlines (and depth (org-export-collect-headlines info depth))) + (toc-string (or (mapconcat (lambda (headline) + (org-gfm-format-toc headline info)) + headlines "\n") + "")) + (toc-tail (if headlines "\n\n" ""))) + (org-trim (concat toc-string toc-tail contents "\n" (org-gfm-footnote-section info))))) + + +;;; Interactive function + +;;;###autoload +(defun org-gfm-export-as-markdown (&optional async subtreep visible-only) + "Export current buffer to a Github Flavored Markdown buffer. + +If narrowing is active in the current buffer, only export its +narrowed part. + +If a region is active, export that region. + +A non-nil optional argument ASYNC means the process should happen +asynchronously. The resulting buffer should be accessible +through the `org-export-stack' interface. + +When optional argument SUBTREEP is non-nil, export the sub-tree +at point, extracting information from the headline properties +first. + +When optional argument VISIBLE-ONLY is non-nil, don't export +contents of hidden elements. + +Export is done in a buffer named \"*Org GFM Export*\", which will +be displayed when `org-export-show-temporary-export-buffer' is +non-nil." + (interactive) + (org-export-to-buffer 'gfm "*Org GFM Export*" + async subtreep visible-only nil nil (lambda () (text-mode)))) + +;;;###autoload +(defun org-gfm-convert-region-to-md () + "Convert the region to Github Flavored Markdown. +This can be used in any buffer, this function assume that the +current region has org-mode syntax. For example, you can write +an itemized list in org-mode syntax in a Markdown buffer and use +this command to convert it." + (interactive) + (org-export-replace-region-by 'gfm)) + +;;;###autoload +(defun org-gfm-export-to-markdown (&optional async subtreep visible-only) + "Export current buffer to a Github Flavored Markdown file. + +If narrowing is active in the current buffer, only export its +narrowed part. + +If a region is active, export that region. + +A non-nil optional argument ASYNC means the process should happen +asynchronously. The resulting file should be accessible through +the `org-export-stack' interface. + +When optional argument SUBTREEP is non-nil, export the sub-tree +at point, extracting information from the headline properties +first. + +When optional argument VISIBLE-ONLY is non-nil, don't export +contents of hidden elements. + +Return output file's name." + (interactive) + (let ((outfile (org-export-output-file-name ".md" subtreep))) + (org-export-to-file 'gfm outfile async subtreep visible-only))) + +;;;###autoload +(defun org-gfm-publish-to-gfm (plist filename pub-dir) + "Publish an org file to Markdown. +FILENAME is the filename of the Org file to be published. PLIST +is the property list for the given project. PUB-DIR is the +publishing directory. +Return output file name." + (org-publish-org-to 'gfm filename ".md" plist pub-dir)) + +(provide 'ox-gfm) + +;;; ox-gfm.el ends here diff --git a/doc/reference/std/markup/README.md b/doc/reference/std/markup/README.md new file mode 100644 index 000000000..1a47fa798 --- /dev/null +++ b/doc/reference/std/markup/README.md @@ -0,0 +1,7 @@ +# Markup Languages + +Gerbil supports many different markup languages using [SXML](README.md) for parsing, searching, manipulating and printing. + +- **[XML](sxml/xml.md) :** eXtensible Markup Language +- **[HTML](sxml/html/README.md):** Hyper Text Markup Language +- **[TAL](sxml/tal/README.md) :** Template Attribute Language diff --git a/doc/reference/std/markup/sxml.md b/doc/reference/std/markup/sxml.md new file mode 100644 index 000000000..e4b5ac206 --- /dev/null +++ b/doc/reference/std/markup/sxml.md @@ -0,0 +1,331 @@ +# SXML: S-expression eXtensible Markup Language + +This module adds utilities to work with XML and HTML. It has been inspired by Oleg's SXML package. See more detailed info about SXML can be found [here](http://okmij.org/ftp/Scheme/xml.html). + +::: tip To use the bindings from this module: + +```scheme +(import :std/markup/sxml) +``` + +::: + + +## Concepts + +"SXML is an abstract syntax tree of an XML document. SXML is also a concrete representation of the XML Infoset in the form of S-expressions." + +When developing in Gerbil we generally use sexps. XML and HTML are not quite sexps. + +For parsing and printing have a look at [XML docs](./xml.md) or the [HTML docs](./html.md) depending on your needs. + +There's a lot more detail in the [SMXL Specification](https://okmij.org/ftp/Scheme/SXML.html) so for basics a simple ` +``` + +If the second item is a list that starts with an `@` symbol, `(@ ...}` marks the start of the attributes alist. + +Otherwise it's an element or a block of text. Simple! + +If you notice, the `write-sxml` function indents the html in a whitespace sensitive way to ensure there are no extra characters in the actual output. + + +## Printer + +All of HTML, XML and XHTML are printed from the same function. + + +### write-sxml + +```scheme +(def (write-sxml + sxml + port: (port (current-sxml-output-port)) + xml?: (xml? (current-sxml-output-xml?)) + indent: (indent #f) + quote-char: (quote-char #\")) ...) + +sxml := An sxml element, a list of elements, or text. +port := A keyword for binding the output port +xml? := A keyword for boolean choosing XML or HTML. Defaults to #f +indent := A keyword where #f means no indentation and a number means indent (aka + pretty print) the output hiegenically staring at this level. +quote-char := A keyword that chooses the quote character, either #\" + or #\', for attributess. +``` + +This is a generic abstract markup printer. The `:std/xml` and `:std/html` printers are based off of this one for more specific usage. + +```scheme +> (write-sxml '(*TOP* + (div + (p "I'm paragraph one") + (p "I'm paragraph two")))) +

I'm paragraph one

I'm paragraph two

+``` + +By default the `(current-sxml-output-port)` is set to `(current-output-port)`. It may not be what is expected and is really just for REPL use so set the port or parameterize `(current-sxml-output-port)` for best results. + +The XML/HTML can be indented. It does so inside the tags so as not to pollute or change semantics. + +```scheme +> (write-sxml '(*TOP* + (div + (p "I'm paragraph one") + (p "I'm paragraph two"))) indent: 1) +

I'm paragraph one

I'm paragraph two

+``` + +For HTML, the default, empty tags with no close are allowed. + +```scheme +> (write-sxml'(*TOP* + (area) + (base) + (br) + (col) + (embed) + (hr) + (img) + (input) + (link) + (meta) + (track) + (wbr))) + +

+``` + +When set to `xml?` things are different. + +```scheme +> (write-sxml'(*TOP* + (area) + (base) + (br) + (col) + (embed) + (hr) + (img) + (input) + (link) + (meta) + (track) + (wbr)) xml?: #t) + +

+``` + + +## SXML Queries + + +### sxpath + +```scheme +(sxpath path) -> sxml + + path := list +``` + +Evaluate an abbreviated SXPath + +``` +sxpath:: AbbrPath -> Converter, or +sxpath:: AbbrPath -> Node|Nodeset -> Nodeset +``` + +AbbrPath is a list. It is translated to the full SXPath according to the following rewriting rules: + +```scheme +(sxpath '()) -> (node-join) +(sxpath '(path-component ...)) -> + (node-join (sxpath1 path-component) (sxpath '(...))) +(sxpath1 '//) -> (node-or + (node-self (node-typeof? '*any*)) + (node-closure (node-typeof? '*any*))) +(sxpath1 '(equal? x)) -> (select-kids (node-equal? x)) +(sxpath1 '(eq? x)) -> (select-kids (node-eq? x)) +(sxpath1 ?symbol) -> (select-kids (node-typeof? ?symbol) +(sxpath1 procedure) -> procedure +(sxpath1 '(?symbol ...)) -> (sxpath1 '((?symbol) ...)) +(sxpath1 '(path reducer ...)) -> + (node-reduce (sxpath path) (sxpathr reducer) ...) +(sxpathr number) -> (node-pos number) +(sxpathr path-filter) -> (filter (sxpath path-filter)) +``` + + +### sxml-select + +```scheme +(sxml-select n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +``` + +Collects all children from node *n* that satisfy a predicate *predf*; optionally transforms result with mapping function *mapf* once a node satisfies a predicate, its children are not traversed. + + +### sxml-attributes + +```scheme +(sxml-attributes n) -> list | #f + + n := sxml node +``` + +Returns the attributes of given node *n* or #f if node does have any attributes. + + +### sxml-e + +```scheme +(sxml-e n) -> symbol | #f + + n := sxml node +``` + +Returns the element type of node *n* or #f if no type is found. + + +### sxml-find + +```scheme +(sxml-find n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +``` + +Find the first child that satisfies a predicate *predf*, using depth-first search. Predicate *predf* is a lambda which takes an node as parameter and returns an boolean. If optional *mapf* is given the results satisfying *predf* are transformed with it. + + +### sxml-select\* + +```scheme +(sxml-select* n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +``` + +Select from immediate children of node *n* using predicate function *predf*. Results satisfying *predf* are transformed if given optional mapping function *mapf*. + + +### sxml-attribute-e + +```scheme +(sxml-attribute-e n key) -> any | #f + + n := sxml node + key := string; node key +``` + +Returns the node *n* attribute value for given *key* or #f if value is not found. + + +### sxml-attribute-getq + +```scheme +(sxml-attribute-getq key attrs) -> any + + key := string; node key + attrs := alist? +``` + +attribute list => value + + +### sxml-class? + +```scheme +(sxml-class? klass) -> lambda + + klass := string; node class to match +``` + +returns dom class + + +### sxml-find\* + +```scheme +(sxml-find* n pred [mapf = values]) -> sxml | #f + + n := sxml node + pred := predicate fn + mapf := transform fn +``` + +find in immediate children + + +### sxml-e? + +```scheme +(sxml-e? el) -> lambda + + el := sxml element +``` + +returns element type + + +### sxml-id? + +```scheme +(sxml-id? id) -> lambda + + id := sxml node id value +``` + +returns dom id + + +### sxml-children + +```scheme +(sxml-children n) -> list + + n := sxml node +``` + +returns nodes children as a list + + +### sxml-find/context + +```scheme +(sxml-find/context n predf [mapf values]) -> sxml + + n := sxml node + predf := predicate fn to match + mapf := transform fn to apply to matches +``` + +find with context \ No newline at end of file diff --git a/doc/reference/std/markup/sxml/README.md b/doc/reference/std/markup/sxml/README.md new file mode 100644 index 000000000..82a9f5b6b --- /dev/null +++ b/doc/reference/std/markup/sxml/README.md @@ -0,0 +1,331 @@ +# SXML: S-expression eXtensible Markup Language + +This module adds utilities to work with XML and HTML. It has been inspired by Oleg's SXML package. See more detailed info about SXML can be found [here](http://okmij.org/ftp/Scheme/xml.html). + +::: tip To use the bindings from this module: + +```scheme +(import :std/markup/sxml) +``` + +::: + + +## Concepts + +"SXML is an abstract syntax tree of an XML document. SXML is also a concrete representation of the XML Infoset in the form of S-expressions." + +When developing in Gerbil we generally use sexps. XML and HTML are not quite sexps. + +For parsing and printing have a look at [XML](xml.md) or the [HTML docs](./html.md) depending on your needs. + +There's a lot more detail in the [SMXL Specification](https://okmij.org/ftp/Scheme/SXML.html) so for basics a simple ` +``` + +If the second item is a list that starts with an `@` symbol, `(@ ...}` marks the start of the attributes alist. + +Otherwise it's an element or a block of text. Simple! + +If you notice, the `write-sxml` function indents the html in a whitespace sensitive way to ensure there are no extra characters in the actual output. + + +## Printer + +All of HTML, XML and XHTML are printed from the same function. + + +### write-sxml + +```scheme +(def (write-sxml + sxml + port: (port (current-sxml-output-port)) + xml?: (xml? (current-sxml-output-xml?)) + indent: (indent #f) + quote-char: (quote-char #\")) ...) + +sxml := An sxml element, a list of elements, or text. +port := A keyword for binding the output port +xml? := A keyword for boolean choosing XML or HTML. Defaults to #f +indent := A keyword where #f means no indentation and a number means indent (aka + pretty print) the output hiegenically staring at this level. +quote-char := A keyword that chooses the quote character, either #\" + or #\', for attributess. +``` + +This is a generic abstract markup printer. The `:std/xml` and `:std/html` printers are based off of this one for more specific usage. + +```scheme +> (write-sxml '(*TOP* + (div + (p "I'm paragraph one") + (p "I'm paragraph two")))) +

I'm paragraph one

I'm paragraph two

+``` + +By default the `(current-sxml-output-port)` is set to `(current-output-port)`. It may not be what is expected and is really just for REPL use so set the port or parameterize `(current-sxml-output-port)` for best results. + +The XML/HTML can be indented. It does so inside the tags so as not to pollute or change semantics. + +```scheme +> (write-sxml '(*TOP* + (div + (p "I'm paragraph one") + (p "I'm paragraph two"))) indent: 1) +

I'm paragraph one

I'm paragraph two

+``` + +For HTML, the default, empty tags with no close are allowed. + +```scheme +> (write-sxml'(*TOP* + (area) + (base) + (br) + (col) + (embed) + (hr) + (img) + (input) + (link) + (meta) + (track) + (wbr))) + +

+``` + +When set to `xml?` things are different. + +```scheme +> (write-sxml'(*TOP* + (area) + (base) + (br) + (col) + (embed) + (hr) + (img) + (input) + (link) + (meta) + (track) + (wbr)) xml?: #t) + +

+``` + + +## SXML Queries + + +### sxpath + +```scheme +(sxpath path) -> sxml + + path := list +``` + +Evaluate an abbreviated SXPath + +``` +sxpath:: AbbrPath -> Converter, or +sxpath:: AbbrPath -> Node|Nodeset -> Nodeset +``` + +AbbrPath is a list. It is translated to the full SXPath according to the following rewriting rules: + +```scheme +(sxpath '()) -> (node-join) +(sxpath '(path-component ...)) -> + (node-join (sxpath1 path-component) (sxpath '(...))) +(sxpath1 '//) -> (node-or + (node-self (node-typeof? '*any*)) + (node-closure (node-typeof? '*any*))) +(sxpath1 '(equal? x)) -> (select-kids (node-equal? x)) +(sxpath1 '(eq? x)) -> (select-kids (node-eq? x)) +(sxpath1 ?symbol) -> (select-kids (node-typeof? ?symbol) +(sxpath1 procedure) -> procedure +(sxpath1 '(?symbol ...)) -> (sxpath1 '((?symbol) ...)) +(sxpath1 '(path reducer ...)) -> + (node-reduce (sxpath path) (sxpathr reducer) ...) +(sxpathr number) -> (node-pos number) +(sxpathr path-filter) -> (filter (sxpath path-filter)) +``` + + +### sxml-select + +```scheme +(sxml-select n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +``` + +Collects all children from node *n* that satisfy a predicate *predf*; optionally transforms result with mapping function *mapf* once a node satisfies a predicate, its children are not traversed. + + +### sxml-attributes + +```scheme +(sxml-attributes n) -> list | #f + + n := sxml node +``` + +Returns the attributes of given node *n* or #f if node does have any attributes. + + +### sxml-e + +```scheme +(sxml-e n) -> symbol | #f + + n := sxml node +``` + +Returns the element type of node *n* or #f if no type is found. + + +### sxml-find + +```scheme +(sxml-find n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +``` + +Find the first child that satisfies a predicate *predf*, using depth-first search. Predicate *predf* is a lambda which takes an node as parameter and returns an boolean. If optional *mapf* is given the results satisfying *predf* are transformed with it. + + +### sxml-select\* + +```scheme +(sxml-select* n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +``` + +Select from immediate children of node *n* using predicate function *predf*. Results satisfying *predf* are transformed if given optional mapping function *mapf*. + + +### sxml-attribute-e + +```scheme +(sxml-attribute-e n key) -> any | #f + + n := sxml node + key := string; node key +``` + +Returns the node *n* attribute value for given *key* or #f if value is not found. + + +### sxml-attribute-getq + +```scheme +(sxml-attribute-getq key attrs) -> any + + key := string; node key + attrs := alist? +``` + +attribute list => value + + +### sxml-class? + +```scheme +(sxml-class? klass) -> lambda + + klass := string; node class to match +``` + +returns dom class + + +### sxml-find\* + +```scheme +(sxml-find* n pred [mapf = values]) -> sxml | #f + + n := sxml node + pred := predicate fn + mapf := transform fn +``` + +find in immediate children + + +### sxml-e? + +```scheme +(sxml-e? el) -> lambda + + el := sxml element +``` + +returns element type + + +### sxml-id? + +```scheme +(sxml-id? id) -> lambda + + id := sxml node id value +``` + +returns dom id + + +### sxml-children + +```scheme +(sxml-children n) -> list + + n := sxml node +``` + +returns nodes children as a list + + +### sxml-find/context + +```scheme +(sxml-find/context n predf [mapf values]) -> sxml + + n := sxml node + predf := predicate fn to match + mapf := transform fn to apply to matches +``` + +find with context \ No newline at end of file diff --git a/doc/reference/std/markup/sxml/html/README.md b/doc/reference/std/markup/sxml/html/README.md new file mode 100644 index 000000000..e000ad876 --- /dev/null +++ b/doc/reference/std/markup/sxml/html/README.md @@ -0,0 +1,235 @@ +# HTML: Hyper Text Markup Language Module + +HTML is a widely used Markup Language that, while very similar to [XML](./xml.md), differs enough to have its own specific libraries. + +::: tip To use the bindings from this module: + +```scheme +(import :std/markup/html) +``` + +::: + +If HTML templates for web development are up your alley have a look at our [Template Attribute Language (TAL)](tal/README.md) which uses this parser and printer. + + +## HTML Parser and Printer + + +### Element, aka Tag Types + +"There are six different kinds of elements: void elements, the template element, raw text elements, escapable raw text elements, foreign elements, and normal elements." + +While HTML and XML are friends there are some elements in HTML that cannot be expressed in XML. Knowing what/where they are is important for both parsing and printing. + +1. Void: `current-html-void-tags` and `html-void-tag?` + + > Void elements + > + > area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr + > + > Void elements can't have any contents (since there's no end tag, no content can be put between the start tag and the end tag). + > + > -- + + The void tags are stored in a parameter `current-html-void-tags`. It has more than the spec says but there's more than one spec and version so we try to be complete, + + ```scheme + > (current-html-void-tags) + (area base br col command embed hr img input keygen + link meta param source track wbr) + ``` + + There's an `html-void-tag?` procedure to test. It's case-insensitive as HTML is meant to be. + + ```scheme + > (html-void-tag? 'InPut) + (input keygen link meta param source track wbr) + > (html-void-tag? 'InPuter) + #f + ``` + +2. Raw Text: `current-html-raw-tags` and `html-raw-tag?` + + > Raw text elements script, style -- + + These are not escaped non-html contents. + + ```scheme + > (current-html-raw-tags) + (script style xmp) + > (html-raw-tag? 'ScRipt) + (script style xmp) + > (html-raw-tag? 'html) + #f + ``` + + +### Reading + +`html-parser` is intended as a permissive HTML parser for people who prefer the scalable interface described in Oleg Kiselyov's SSAX parser, as well as providing simple convenience utilities. It correctly handles all invalid HTML, inserting "virtual" starting and closing tags as needed to maintain the proper tree structure. A major goal of this parser is bug-for-bug compatibility with the way common web browsers parse HTML. + +1. html->sxml + + ```scheme + (def (html->sxml + port-or-string + start: (start (pgetq start: default-html->sxml-plist)) + end: (end (pgetq end: default-html->sxml-plist)) + decl: (decl (pgetq decl: default-html->sxml-plist)) + process: (process (pgetq process: default-html->sxml-plist)) + comment: (comment (pgetq comment: default-html->sxml-plist)) + text: (text (pgetq text: default-html->sxml-plist)) + bodyless: (bodyless (current-html-void-tags)) + literals: (literals (current-http-raw-tags))) + ...) + ``` + + Returns the SXML representation of the document from `port-or-string`, using the default or provided parsing options. + +2. default-html->sxml-plist + + This is where the default parsing options come from. + + ```scheme + (def default-html->sxml-plist + [start: (lambda (tag attrs seed virtual?) '()) + end: (lambda (tag attrs parent-seed seed virtual?) + `((,tag ,@(if (pair? attrs) + `((@ ,@attrs) ,@(reverse seed)) + (reverse seed))) + ,@parent-seed)) + decl: (lambda (tag attrs seed) `((*DECL* ,tag ,@attrs) ,@seed)) + process: (lambda (attrs seed) `((*PI* ,@attrs) ,@seed)) + comment: (lambda (text seed) `((*COMMENT* ,text) ,@seed)) + text: (lambda (text seed) (cons text seed))]) + ``` + +3. html-strip + + ```scheme + (html-strip port-or-string) + ``` + + Returns a string representation of the document from PORT with all tags removed. No whitespace reduction or other rendering is done. + + ```scheme + > (html-strip "

This is a title.

\n\n

This is the summary of things

") + "This is a title.\n\nThis is the summary of things" + ``` + +4. make-html-parser + + ```scheme + (make-html-parser start: #f end: #f text: #f + comment: #f decl: #f process: #f + entity: #f entities: *default-entities* + tag-levels: *tag-levels* + unnestables: *unnestables* + bodyless: (current-html-void-tags) + literals: (current-html-raw-tags) + terminators: *terminators*) + ``` + + Returns a procedure of two arguments, an initial seed and an optional input port, which parses the HTML document from the port with the callbacks specified by a keyword. + + The following callbacks are recognized: + + ``` + start: tag attrs seed virtual? + fdown in foldts, called when a start-tag is encountered. + tag := tag name + attrs := tag attributes as a alist + seed := current seed value + virtual? =: #t if this start tag was inserted to fix the HTML tree + ``` + + ``` + end: tag attrs parent-seed seed virtual? + fup in foldts, called when an end-tag is encountered. + tag := tag name + attrs := tag attributes of the corresponding start tag + parent -=SEED: parent seed value (i.e. seed passed to the start tag) + seed := current seed value + virtual? =: #t if this end tag was inserted to fix the HTML tree + ``` + + ``` + text: text seed + fhere in foldts, called when any text is encountered. May be + called multiple times between a start and end tag, so you need + to string-append yourself if desired. + text := entity-decoded text + seed := current seed value + ``` + + ``` + comment: text seed + fhere on comment data + ``` + + ``` + decl: name attrs seed + fhere on declaration data + + process: list seed + fhere on process-instruction data + ``` + + In addition, entity-mappings may be overriden with the `entities:` keyword. + + +## Writing + + +### sxml->html + +```scheme +(sxml->html sxml (port #f)) +``` + +Convert the HTML representation of `sxml` to a string which it outputs to the passed port. + +If the port is `#f`, or not provided, return a string. + + +### html-escape + +```scheme +(html-escape str (port #f) escapes: (esc #f)) +``` + +Returns or writes an HTML escaped string to the port by default replacing the characters `<>&"'` with the appropriate HTML entities. + +If the port is `#f`, or not provided, return a string. + +If other escapes are wanted a list can be passed with the `escapes:` keyword. If false the defaults are in [html-character-escapes](#html-character-escapes). + +```scheme +> html-character-escapes +((#\< . "<") + (#\> . ">") + (#\& . "&") + (#\" . """) + (#\' . "'")) +> (html-escape "< ' >") +"< ' >" +> (html-escape "< ' >" escapes: '((#\< . "Less Than"))) +"Less Than ' >" +``` + + + + +### html-character-escapes + +These are the characters that are escaped when writing HTML. + +```scheme +> html-character-escapes +((#\< . "<") + (#\> . ">") + (#\& . "&") + (#\" . """) + (#\' . "'") +``` diff --git a/doc/reference/std/markup/sxml/tal/README.md b/doc/reference/std/markup/sxml/tal/README.md new file mode 100644 index 000000000..40bff12f1 --- /dev/null +++ b/doc/reference/std/markup/sxml/tal/README.md @@ -0,0 +1,876 @@ +# TAL: The Template Attribute Language + +I, (drewc ), first got introduced to `TAL` around '05 by [@segv](https://github.com/segv) in his [Common Lisp YACLML](https://web.archive.org/web/20160315020505/http://www.3ofcoins.net/2010/01/21/yaclml-in-pictures-part-ii-templating/) library. + +Its usefulness cannot be understated! + +Here is my attempt at the [Zope Template Attribute Language](https://zope.readthedocs.io/en/latest/zopebook/AppendixC.html). + +::: tip To use the bindings from this module: + +```scheme +(import :std/markup/tal) +``` + +::: + +> The Template Attribute Language (TAL) is a templating language used to generate dynamic HTML and XML pages. Its main goal is to simplify the collaboration between programmers and designers. This is achieved by embedding TAL statements inside valid HTML (or XML) tags which can then be worked on using common design tools. +> +> – + +A `TAL Statement` is an attribute in an HTML tag that has a `tal:` prefix. For the most part the attribute value is **Gerbil** code. + +To define a `TAL` procedure we take `HTML` code that may have Template Attributes and transform it into a compiled function + + +## Attributes + +- **define:** creates local variables, valid in the element bearing the attribute (including contained elements) or sets a "global". +- **switch:** Set up a switch statement +- **condition:** decides whether or not to render the tag (and all contained text) +- **repeat:** creates a loop variable and repeats the tag iterating a sequence, e.g. for creating a selection list or a table +- **case:** A case in a `tal:switch` statement +- **content:** replaces the content of the tag +- **replace:** replaces the tag (and therefore is not usable together with content or attributes) +- **attributes:** replaces the given attributes (e. g. by using `tal:attributes="(name name) (id name)"` the name and id attributes of an input field could be set to the value of the variable "name") +- **omit-tag:** allows to omit the start and end tag and only render the content if the given expression is true. +- **on-error:** if an error occurs, this attribute works like the content tag. + +If a tag has more than one TAL attribute they are evaluated in the above (fairly logical) order. + + +## `define-TAL` + +```scheme +(define-TAL (name args ...) /key/ string-or-filename) + + (name args ...) := a definition for a function and parameters + similar to `def` + + /key/ := Optional, default `string:`, can also be file: + + string-or-filename := A literal string if the key is string: + A filename relative to the source if key is file: +``` + +A function that outputs **HTML** makes up a portion of a web application. Using `define-TAL` makes it easy to use a block of **HTML/XHTML** as a function. + +```scheme +(import :std/markup/tal :std/format :std/text/utf8 :std/sugar) + +(define-TAL (htmlist items) + "
  • + List Item
") +``` + +That gives a function that prints the **HTML** to `(current-tal-output-port)`. + +```scheme +> (htmlist ["These" "are" "list" "items"]) +
  • i) These
  • ii) are
  • iii) list
  • iv) items
+``` + +We can get rid of the nested quotes by using the `#<<` syntax and hide the `"`'s with a quick definition. + +```scheme +(def (fmt sym . args) (apply format (symbol->string sym) args)) +(define-TAL (foo item) #< +EOF +) +> (foo 42) +

'item:42'

> +``` + +Even better is the `file:` argument that pulls from a file. + +The following is placed in `foo.html` + +And the gerbil code is simple. + +```html + + <title></head> + <body><p tal:replace="raw: (body)"></p></body> +</html> +``` + +```scheme +(define-TAL (foo.html title body) file: "foo.html") +(define-TAL (bar) "<hr>") +``` + +We can then use it creatively. + +```scheme +> (foo.html "Title <hr>" bar) +<html> + <head><title>Title <hr> +
+ +``` + +As you can see it enables a fairly easy interaction between **HTML** syntax and **Lisp**. + + +## current-tal-output-port + +Output to where? The `current-tal-output-port` parameter of course! + +Most of the time it will be over a socket but for testing purposes we'll tear it down to a string. + +```scheme +(defrule (:> tal ...) + (let ((u8v (call-with-output-u8vector + #u8() (lambda (p) (parameterize ((current-tal-output-port p)) + tal ...))))) + (utf8->string u8v))) +``` + +Now we can see what it outputs and have a testable form as well. + +```scheme +> (:> (foo.html "Inside :>" bar)) +"\n Inside :>\n
\n\n" +``` + +As well as have a more documentation friendly output. + +```html +> (display #) + + Inside :> +
+ +``` + + +## tal:attributes : Replace element attributes + +*Syntax*: + +```bnf +argument ::= attribute_statement [attribute_statement]* +attribute_statement ::= ( attribute_name expression ) +attribute_name ::= Name +``` + +The `tal:attributes` statement is a way of setting the `attribute_name` to the value of `expression`. + +```scheme +(define-TAL (lnk href text) #< text +EOF +) +``` + +Works as expected + +```scheme +> (:> (lnk "https://duckduckgo.com" "Search")) +"Search +``` + +If the `expression` evaluates to `#f` the attribute is omitted. + +```scheme +> (:> (lnk #f "No Anchor!")) +"No Anchor!" +``` + +If the `tal:attributes` statement is on an element with a `tal:repeat` statement, the replacement is made on each repetition of the element, and the replacement expression is evaluated fresh for each repetition. + +```scheme +(define-TAL (sel items) #< + +``` + +If you use `tal:attributes` on an element with an active `tal:replace` command, the `tal:attributes` statement is ignored because of the order of operations. + +It can, of course, set more than one attribute. + +```scheme +(define-TAL (att-textarea (rows 80) (cols 20)) #< +EOF +) +``` + +```scheme +> (:> (att-textarea)) +"" +> (:> (att-textarea 10 42)) +"" +``` + + +## tal:condition : Conditionally insert or remove an element + +*Syntax* + +```bnf +argument ::= expression +``` + +The `tal:condition` statement includes the statement element in the template only if the expression evaluates to a value that's not `#f` and omits it otherwise. + +```scheme +(define-TAL (p-when value) #< P!

+EOF +) +``` + +```scheme +> (:> (p-when "Lorum Ipsum")) +"

Lorum Ipsum

" +> (:> (p-when #f)) +"" +``` + +It can be used for alternate conditions. + +```scheme +(define-TAL (p-if) #< +

Even

+

Odd

+ +EOF +) +``` + +```scheme +> (:> (p-if)) +"
\n

Even

\n\n
\n\n

Odd

\n
\n

Even

\n\n
\n\n

Odd

\n
+``` + +That's a good example of why "in-tag indentation" can be important. + +```html +> (display #) +
+

Even

+ +
+ +

Odd

+
+

Even

+ +
+ +

Odd

+
+``` + +Outside of the `tal:condition` but inside the `tal:repeat` are those newlines. Makes it nice to read but adds things that could mess up the display and really are not needed. + +```scheme +(define-TAL (p-if-in-tag) #< +

Even

Odd

+ +EOF +) +``` + +That gives us something "nicer". + +```scheme +> (:> (p-if-in-tag)) +"
\n

Even

\n
\n

Odd

\n
\n

Even

\n
\n

Odd

\n
" +``` + +Which kinda looks like what I'm trying to portray. + +```html +> (display #) +
+

Even

+
+

Odd

+
+

Even

+
+

Odd

+
+``` + + +## tal:content : Replace the content of an element + +*Syntax* + +```bnf +argument ::= (['text:'] | 'as-is:' | 'raw:') expression +``` + +You can insert `text:` or `as-is:` in place of its children with the `tal:content` statement. The statement argument is exactly like that of `tal:replace`, and is interpreted in the same fashion. + +If the expression evaluates to `#f` , the statement element is left childless. f the expression evaluates to default, then the element’s contents are unchanged. + +```scheme +(define-TAL (div-content cnt) #< Default content here +EOF +) +``` + +```scheme +> (:> (div-content default:)) +"
Default content here
" +> (:> (div-content "New Content")) +"
New Content
" +> (:> (div-content #f)) +"
" +``` + +The default replacement behavior is `text:` which replaces angle-brackets and ampersands with their HTML entity equivalents. + +```scheme +(define-TAL (div-text-content cnt) #< Default content here +EOF +) +``` + +```scheme +> (let (txt "Content in a
") + [(:> (div-content txt)) (:> (div-text-content txt))]) +("
Content in a <div/>
" + "
Content in a <div/>
") +``` + +The `as-is:` keyword passes the replacement text through unchanged allowing HTML/XML markup to be inserted. This can break your page if the text contains unanticipated markup (e.g.. text submitted via a web form), which is the reason that it is not the default. + +```scheme +(define-TAL (div-html-content cnt) #< Default content here
+EOF +) +``` + +The `default:` still works. + +```scheme +> (:> (div-html-content default:)) +"
Default content here
" +``` + +Finally the `raw:` keyword doesn't do anything with the expression beyond run it. + +```scheme +(define-TAL (div-raw-content cnt) #< Default content here +EOF +) +``` + +```scheme +> (:> (div-raw-content default:)) +"
" +``` + +Running something which outputs to `current-tal-output-port` will do the right thing, + +```scheme +(define-TAL (div-proc-content cnt) #< +EOF +) +``` + +```scheme +> (:> (div-proc-content + (lambda () + (div-text-content "esc:
") + (div-html-content "hr:
")))) +"
esc: <hr>
hr:
" +``` + + +## `tal:define` + +The `tal:define` command either wraps a `with*` around the tag (by default or with the `local:` keyword) and/or `set!`'ing things with the `set!:` keyword. + +```scheme +(define-TAL (let-and-set x y) #< +

Number?

+

The Answer?

+

We can set as well Bar =

+ +

Setting is what you expect:

+ +EOF +) +``` + +```scheme +> (:> (let-and-set 1 2)) +"
\n +

Number? 42

\n

The Answer? Yes!

\n

We can set as well Bar = 42

\n\n

Setting is what you expect: 42

\n
" +``` + +```scheme +> (display (html-strip #)) + + Number? 42 + The Answer? Yes! + We can set as well Bar = 42 + + Setting is what you expect: 42 +``` + +```scheme +> (:> (let-and-set 1 3)) +"
\n

Number? 63

\n

The Answer? No

\n

We can set as well Bar = 42

\n\n

Setting is what you expect: 42

\n
" +``` + +```scheme +> (display (html-strip #)) + + Number? 63 + The Answer? No + We can set as well Bar = 42 + + Setting is what you expect: 42 + > +``` + + +## tal:switch : Set up a switch statement + +If everything is testing the same item, and only one can succeed, a bunch of conditionals gets hairy. There's a `tal:switch` and some `tal:case` statements to round it up. + +```scheme +(define-TAL (switch-case item) #< + This is why I did not use cond or if. +

We've got foo!

Because where would this go? +

Else is working

+ +EOF +) + +``` + +```scheme +(define-TAL (switch-case item) #< + This is why I did not use cond or if. +

We've got foo!

Because where would this go? +

Else is working

+ +EOF +) +> (:> (switch-case 'asd)) +"
\n This is why I did not use cond or if.\n Because where would this go? \n

Else is working

\n
" +> (:> (switch-case 'foo)) +"
\n This is why I did not use cond or if.\n

We've got foo!

Because where would this go? \n \n
" +> + +``` + + +## tal:omit-tag : Remove an element leaving its contents + +*Syntax* + +```bnf +argument ::= [ expression ] +``` + +The `tal:omit-tag` statement leaves the contents of an element in place while omitting the surrounding start and end tags. + +If the expression evaluates to `#f` then normal processing of the element continues and the tags are not omitted. If the expression evaluates to a true value, or no expression is provided, the statement element is replaced with its contents. + +```scheme +(define-TAL (notag) #< Just The text! and a link

+EOF +) + +(define-TAL (maybe-tag val) #< Is this a Paragraph? Who knows!

+EOF +) +``` + +```scheme +> (:> (notag)) +" Just The text! and a link " +> (:> (maybe-tag #f)) +" Is this a Paragraph? Who knows!" +> (:> (maybe-tag 42)) +"

Is this a Paragraph? Who knows!

" +``` + + +## current-tal:on-error and tal:on-error. + +Handling errors in a decent way is built into our `TAL` be default. This is, from experience, made to make most of the page work if there is an unwanted and unseen error. + +To update the "outside" error handler outside of the `TAL` form/file there is a `current-tal:on-error`. For "inside" use the `tal:on-error` attribute is very useful. + + +### Default and current-tal:on-error + +By default the form that errors will write the error message prefixed with `ERROR:` in place of what is most likely its contents. + +```scheme +(define-TAL (test-no-on-error thunk) #< +
  • + +EOF +) +``` + +In running it we can see it still runs and does not mess up the page that much. + +```scheme +> (:> (test-no-on-error (cut error "This is the error message: "))) +"
      \n
    • ERROR: This is the error message: <escaped>
    • \n
    " +``` + +We can change it. + +```scheme +> (:> (parameterize ((current-tal:on-error + (lambda (e) '(log-error e) + (tal:write "Nothing wrong here!")))) + (test-no-on-error (cut error "Something Wrong!")))) +"
      \n
    • Nothing wrong here!
    • \n
    " +``` + +But in reality that abstraction's just there so pages still run with bugs in them. Even better for all involved is the `tal:on-error` attribute. + + +### tal:on-error + +*Syntax* + +```bnf +argument ::= (['text:'] | 'as-is:' | 'raw:' | 'ignore:' | 'ignore') expression +``` + +For a more precise handling of errors the `tal:on-error` catcher/handler makes it quite easy. When a `TAL Statement` produces an error if there is a `tal:on-error` on the element or any parent element the error is caught at that point and handled according to the expression. + +The first three keywords are treated the same as `tal:content` and on error the element becomes one of those. + +```scheme +;; No keyword is the same as `text:` +(define-TAL (test-got-error thunk) #< +
  • + +EOF +) + +``` + +The result differs from the default catcher. + +```scheme +> (:> (test-got-error (lambda () "Nice! No error"))) +"
      \n
    • Nice! No error
    • \n
    " +> (:> (test-got-error (cut error "error here"))) +"
      Got an Error!
    " +``` + +Because we catch it on the `
      ` the handler does not give us the `
    • ` wrapper and it breaks the valid HTML! We did that on purpose, of course, and that's the idea behind a much more specific catcher. + +```scheme +(define-TAL (test-got-li-error thunk) #< +
    • +
    +EOF +) +``` + +That allows us to be much more clinical. + +```scheme +> (:> (test-got-li-error (cut error "error here"))) +"
    • Got an Error!
    " +``` + +But these are errors and though informing the is always a good idea perhaps we also want to handle it outside of the tal forms. + +For that reason the `err` identifier is bound to the exception object within the `tal:on-error` statement. + +```scheme +(define-TAL (error-li) "
  • Got an Error!
  • ") +(def err-log []) +(def (log-err err) (set! err-log (cons err err-log))) +(def (handle-ul-error err) (log-err err) (error-li)) +``` + +```scheme +(define-TAL (test-handle-ul-error thunk) #< +
  • + +EOF +) +``` + +```scheme +> (length err-log) +0 +> (:> (test-handle-ul-error (cut error "asd"))) +"
    • Got an Error!
    " +> (length err-log) +1 +``` + +But that may raise the question of: why we need an unordered list that is an error? + +That that there's the `ignore` and `ignore:` arguments. + +```scheme +(define-TAL (test-ignore-error thunk) #< +
  • + +EOF +) +``` + +Now there's no `
      ` tag if it errors! + +```scheme +> (:> (test-ignore-error (lambda () "LI here!"))) +"
        \n
      • LI here!
      • \n
      " +> (:> (test-ignore-error (cut error "No UL here!"))) +"" +``` + +Or, like, if we actually want something that's not an unordered list, we can do that as well. + +```scheme +(define-TAL (error-div err) #< +EOF +) + +(define-TAL (test-ignore-div-error thunk) #< +
    • +
    +EOF +) +``` + +```scheme +> (length err-log) +1 +> (:> (test-ignore-div-error (lambda () "No Error"))) +"
      \n
    • No Error
    • \n
    " +> (length err-log) +1 +> (:> (test-ignore-div-error (cut error "Got div"))) +"
    Error Here!
    " +> (length err-log) +2 +``` + + +## tal:repeat : Repeat an element + +*Syntax* + +```bnf +argument ::= '(' variable-name expression ')' +variable-name ::= Identifier +``` + +The `tal:repeat` statement replicates a sub-tree of your document once for each item in a sequence. The expression should evaluate to anything acceptable for `:std/iter` to repeat. + +```scheme +(define-TAL (test-b-repeat thing) #<
    +EOF +) +``` + +```scheme +> (:> (test-b-repeat '(1 2 3))) +"123" +> (:> (test-b-repeat "asd")) +"asd" +> (:> (test-b-repeat #(v e c))) +"vec" +``` + +If the iterator is empty then the statement element is deleted, otherwise it is repeated for each value sequentially. + +```scheme +> (:> (test-b-repeat '())) +"" +> (:> (test-b-repeat "")) +"" +``` + +The `variable-name` is used to define a local variable and a `repeat/variable-name` for a `Repeat` interface variable. For each repetition, the local variable is set to the current sequence element, and the repeat variable is set to an interface around the iteration object. + + +### The Repeat Interface + +You use the `Repeat` interface to access information about the current repetition (such as the repeat index). The repeat interface has the same name as the local variable prefixed with `repeat/` and has the following methods. + +- **index:** repetition number, starting from zero. + +- **number:** repetition number, starting from one. + +- **even?:** true for even-indexed repetitions (0, 2, 4, …). + +- **odd?:** true for odd-indexed repetitions (1, 3, 5, …). + +- **start?:** true for the starting repetition (index 0). + +- **end?:** true for the ending, or final, repetition. + +- **letter:** repetition number as a lower-case letter: “a” - “z”, “aa” - “az”, “ba” - “bz”, …, “za” - “zz”, “aaa” - “aaz”, and so forth. + +- **Letter:** upper-case version of `letter`. + +- **roman:** repetition number as a lower-case roman numeral: “i”, “ii”, “iii”, “iv”, “v”, etc. + +- **Roman:** upper-case version of `roman`. + +Iterating over a sequence: + +```scheme +(define-TAL (rep seq) #< + +

    +EOF +) +``` + +```scheme +> (:> (rep '(foo bar baz))) +"

    \n foo\n

    \n bar\n

    \n baz\n

    " +``` + +Inserting a sequence of table rows, and using the repeat variable to number the rows: + +```scheme +(def desc car) +(def price cdr) + +(define-TAL (checkout-table cart) #< + + 1 + Widget + $1.50 + + +EOF +) +``` + +```scheme +> (:> (checkout-table '(("Soilent Green" . "$People") ("Napkins" . "$42.00")))) +"\n \n \n \n \n \n \n \n \n \n
    1Soilent Green$People
    2Napkins$42.00
    " +``` + +That's better to see displayed. + +```scheme +> (display #) + + + + + + + + + + +
    1Soilent Green$People
    2Napkins$42.00
    +``` + +Nested repeats: + +```scheme +(define-TAL (nested-repeats rows cols) #< + + + + 1 * 1 = 1 + + + + +EOF +) +``` + +```scheme + > (:> (nested-repeats '(1 2 3) #(4 6 5))) +"\n \n \n \n \n \n \n \n
    \n 1 * 1 = 1\n \n 1 * 2 = 2\n \n 1 * 3 = 3\n
    \n 2 * 1 = 2\n \n 2 * 2 = 4\n \n 2 * 3 = 6\n
    \n 3 * 1 = 3\n \n 3 * 2 = 6\n \n 3 * 3 = 9\n
    " +``` + +That's also nice to see in long form. + +```scheme +> (display #) + + + + + + + + +
    + 1 * 1 = 1 + + 1 * 2 = 2 + + 1 * 3 = 3 +
    + 2 * 1 = 2 + + 2 * 2 = 4 + + 2 * 3 = 6 +
    + 3 * 1 = 3 + + 3 * 2 = 6 + + 3 * 3 = 9 +
    +``` \ No newline at end of file diff --git a/doc/reference/std/markup/sxml/xml.md b/doc/reference/std/markup/sxml/xml.md new file mode 100644 index 000000000..77b8c744d --- /dev/null +++ b/doc/reference/std/markup/sxml/xml.md @@ -0,0 +1,63 @@ +# XML: eXtensible Markup Language + +The module provides XML parsing and printing procedures. It also exports the [SXML](README.md) procedures. + +::: tip To use the bindings from this module: + +```scheme +(import :std/markup/xml) +``` + +::: + + +## Parsing + + +### read-xml + +```scheme +(read-xml source [namespaces: ()]) -> sxml | error + + source := port | string | u8vector + namespaces := alist or hash-table mapping urls to namespace prefixes +``` + +Reads and parses XML from *source* and returns SXML result. *namespaces* is optional alist or a hash table of mapping uri (string) -> namespace (string) same interface as `parse-xml` so that implementations can be swapped. Signals an error on invalid *source* value. + +::: tip Examples + +```scheme +> (import :std/markup/xml) +> (read-xml "foobarbarbaz") +(*TOP* (foo (element (@ (id "1")) "foobar") (element (@ (id "2")) "barbaz"))) +``` + +::: + + +## Printing + + +### write-xml + +```scheme +(write-xml sxml [port = (current-output-port)]) -> void + + sxml := SXML nodes + port := output port +``` + +Writes given *sxml* data as XML into output *port*. Signals an error on invalid *port*. + + +### print-sxml->xml + +```scheme +(print-sxml->xml sxml [port = (current-output-port)]) -> void + + sxml := SXML nodes + port := output port +``` + +Write given *sxml* into *port* after converting it to XML. Indents the result to multiple lines. diff --git a/doc/reference/std/xml.md b/doc/reference/std/xml.md deleted file mode 100644 index 8f4ef6737..000000000 --- a/doc/reference/std/xml.md +++ /dev/null @@ -1,375 +0,0 @@ -# XML - -The module provides XML parsing and generation procedures. It also has optional -dependency on LibXML for XML/HTML parsing. See details below. - -::: tip To use the bindings from this module: -``` scheme -(import :std/xml) -``` -::: - -## Overview - -This module adds utilities to work with XML. Gerbil Scheme uses SXML -to represent the XML data. This module is mostly ported from Oleg's XML package. -See more detailed info about SXML can be found at [http://okmij.org/ftp/Scheme/xml.html]. - -We also provide bindings for [LibXML](http://www.xmlsoft.org/) with the -[gerbil-libxml](https://github.com/mighty-gerbils/gerbil-libxml) external package. -That provides C-based XML parser procedures *parse-xml* and -*parse-html* including their options. - -## Parsing - -### read-xml -``` scheme -(read-xml source [namespaces: ()]) -> sxml | error - - source := port | string | u8vector - namespaces := alist or hash-table mapping urls to namespace prefixes -``` - -Reads and parses XML from *source* and returns SXML result. *namespaces* is -optional alist or a hash table of mapping uri (string) -> namespace (string) -same interface as ```parse-xml``` so that implementations can be swapped. Signals an -error on invalid *source* value. - -::: tip Examples -``` scheme -> (import :std/xml) -> (read-xml "foobarbarbaz") -(*TOP* (foo (element (@ (id "1")) "foobar") (element (@ (id "2")) "barbaz"))) -``` -::: - - -## SXML Queries - -### sxpath -``` scheme -(sxpath path) -> sxml - - path := list -``` - - Evaluate an abbreviated SXPath -``` - sxpath:: AbbrPath -> Converter, or - sxpath:: AbbrPath -> Node|Nodeset -> Nodeset -``` - - AbbrPath is a list. It is translated to the full SXPath according - to the following rewriting rules: -``` scheme - (sxpath '()) -> (node-join) - (sxpath '(path-component ...)) -> - (node-join (sxpath1 path-component) (sxpath '(...))) - (sxpath1 '//) -> (node-or - (node-self (node-typeof? '*any*)) - (node-closure (node-typeof? '*any*))) - (sxpath1 '(equal? x)) -> (select-kids (node-equal? x)) - (sxpath1 '(eq? x)) -> (select-kids (node-eq? x)) - (sxpath1 ?symbol) -> (select-kids (node-typeof? ?symbol) - (sxpath1 procedure) -> procedure - (sxpath1 '(?symbol ...)) -> (sxpath1 '((?symbol) ...)) - (sxpath1 '(path reducer ...)) -> - (node-reduce (sxpath path) (sxpathr reducer) ...) - (sxpathr number) -> (node-pos number) - (sxpathr path-filter) -> (filter (sxpath path-filter)) -``` - -### sxml-select -``` scheme -(sxml-select n predf [mapf = values]) -> sxml - - n := sxml nodes - predf := predicate function - mapf := transform function -``` - -Collects all children from node *n* that satisfy a predicate *predf*; optionally -transforms result with mapping function *mapf* once a node satisfies a -predicate, its children are not traversed. - -### sxml-attributes -``` scheme -(sxml-attributes n) -> list | #f - - n := sxml node -``` - -Returns the attributes of given node *n* or #f if node does have any attributes. - -### sxml-e -``` scheme -(sxml-e n) -> symbol | #f - - n := sxml node -``` - -Returns the element type of node *n* or #f if no type is found. - -### sxml-find -``` scheme -(sxml-find n predf [mapf = values]) -> sxml - - n := sxml nodes - predf := predicate function - mapf := transform function -``` - -Find the first child that satisfies a predicate *predf*, using depth-first search. -Predicate *predf* is a lambda which takes an node as parameter and returns an -boolean. -If optional *mapf* is given the results satisfying *predf* are transformed with -it. - -### sxml-select* -``` scheme -(sxml-select* n predf [mapf = values]) -> sxml - - n := sxml nodes - predf := predicate function - mapf := transform function -``` - -Select from immediate children of node *n* using predicate function -*predf*. Results satisfying *predf* are transformed if given optional mapping -function *mapf*. - -### sxml-attribute-e -``` scheme -(sxml-attribute-e n key) -> any | #f - - n := sxml node - key := string; node key -``` - -Returns the node *n* attribute value for given *key* or #f if value is not found. - -### sxml-attribute-getq -``` scheme -(sxml-attribute-getq key attrs) -> any - - key := string; node key - attrs := alist? -``` - -attribute list => value - -### sxml-class? -``` scheme -(sxml-class? klass) -> lambda - - klass := string; node class to match -``` - -returns dom class - -### sxml-find* -``` scheme -(sxml-find* n pred [mapf = values]) -> sxml | #f - - n := sxml node - pred := predicate fn - mapf := transform fn -``` - -find in immediate children - -### sxml-e? -``` scheme -(sxml-e? el) -> lambda - - el := sxml element -``` - -returns element type - -### sxml-id? -``` scheme -(sxml-id? id) -> lambda - - id := sxml node id value -``` - -returns dom id - -### sxml-children -``` scheme -(sxml-children n) -> list - - n := sxml node -``` - -returns nodes children as a list - -### sxml-find/context -``` scheme -(sxml-find/context n predf [mapf values]) -> sxml - - n := sxml node - predf := predicate fn to match - mapf := transform fn to apply to matches -``` - -find with context - -## Printing - - -### write-xml -``` scheme -(write-xml sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Writes given *sxml* data as XML into output *port*. Signals an error on invalid *port*. - -### write-html -``` scheme -(write-html sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Writes given *sxml* data as HTML to output *port*. Signals an error on invalid *port*. - -### print-sxml->html -``` scheme -(print-sxml->html sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` -Write given *sxml* into *port* after converting it to HTML. Indents the result -to multiple lines. - -### print-sxml->html* -``` scheme -(print-sxml->html* sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` -Same as ```print-sxml->html``` but skips formatting the result. - -### print-sxml->html-fast -``` scheme -(print-sxml->html-fast sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Same as ```print-sxml->html``` but skips formatting the result. - -### print-sxml->xhtml -``` scheme -(print-sxml->xhtml sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Write given *sxml* into *port* after converting it to XHTML. Indents the result -to multiple lines. - -### print-sxml->xhtml* -``` scheme -(print-sxml->xhtml* sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Same as ```print-sxml->xhtml``` but skips formatting the result. - -### print-sxml->xhtml-fast -``` scheme -(print-sxml->xhtml-fast sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Same as ```print-sxml->xhtml``` but skips formatting the result. - -### print-sxml->xml -``` scheme -(print-sxml->xml sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Write given *sxml* into *port* after converting it to XML. Indents the result -to multiple lines. - -### print-sxml->xml* -``` scheme -(print-sxml->xml* sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Same as ```print-sxml->xml``` but skips formatting the result. - -### print-sxml->xml-fast -``` scheme -(print-sxml->xml-fast sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` - -Same as ```print-sxml->xml``` but skips formatting the result. - -### pretty-print-sxml->xml-file -``` scheme -(pretty-print-sxml->xml-file item outpath [noblanks]) -> void - - sxml := SXML nodes - port := output port -``` -Serializes SXML data from *sxml* into XML and writes the result to a *port*. - -This depends on external ```xmllint``` program being in PATH. - -### pretty-print-sxml->xhtml-file -``` scheme -(pretty-print-sxml->xhtml-file sxml [port = (current-output-port)]) -> void - - sxml := SXML nodes - port := output port -``` -Serializes SXML data from *sxml* into XHTML and writes the result to a *port*. - -This depends on external ```xmllint``` program being in PATH. - - -### sxml->html-string-fragment -``` scheme -(sxml->html-string-fragment item [maybe-level] [quote-char = #\"]) -> string - - item := SXML nodes - maybe-level := #f | fixnum; how much to indent the result or skip indent if #f - quote-char := quote character to use -``` - -Serializes the given SXML nodes in *item* into HTML string and returns it as a -string. If *maybe-level* is given the result is indented. - -### sxml->xhtml-string -``` scheme -(sxml->xhtml-string item [maybe-level] [quote-char = #\"]) -> string - - item := SXML nodes - maybe-level := #f | fixnum; how much to indent the result or skip indent if #f - quote-char := quote character to use -``` diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index 1ed4ec32f..914b17411 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -316,13 +316,24 @@ "net/httpd/api" "net/httpd" "net/sasl" + ;; :std/markup/sxml + "markup/sxml/print" + (gxc: "markup/sxml/ssax") + "markup/sxml/sxpath" + "markup/sxml/xml" + "markup/sxml/sxml-inf" + (gxc: "markup/sxml/html/parser" (extra-inputs: ("markup/sxml/html/html-parser.scm"))) + "markup/sxml/tal/toplevel" + "markup/sxml/tal/iter" + "markup/sxml/tal/expander" + "markup/sxml/tal/syntax" + "markup/sxml" + ;; :std/markup/tal + "markup/tal" + ;:std/markup/html + "markup/html" ;; :std/xml - (gxc: "xml/ssax") - "xml/sxpath" - "xml/sxml" - (gsc: "xml/sxml-to-xml") - (ssi: "xml/sxml-to-xml") - "xml/print" + "markup/xml" "xml" ;; :std/crypto (static-include: "crypto/libcrypto-rfc5114.c") diff --git a/src/std/markup/README.org b/src/std/markup/README.org new file mode 100644 index 000000000..ec8222ffc --- /dev/null +++ b/src/std/markup/README.org @@ -0,0 +1,12 @@ +#+TITLE: Markup languages +#+EXPORT_FILE_NAME: ../../../doc/reference/std/markup/README.md +#+OPTIONS: toc:nil + +* Markup Languages + +Gerbil supports many different markup languages using [[file:README.org][SXML]] for +parsing, searching, manipulating and printing. + +- [[file:sxml/xml.org][XML]] :: eXtensible Markup Language +- [[file:sxml/html/README.org][HTML]] :: Hyper Text Markup Language +- [[file:sxml/tal/README.org][TAL]] :: Template Attribute Language diff --git a/src/std/markup/html.ss b/src/std/markup/html.ss new file mode 100644 index 000000000..35e36a38d --- /dev/null +++ b/src/std/markup/html.ss @@ -0,0 +1,7 @@ +(import :std/markup/sxml/html/parser :std/markup/sxml/print) +(export (import: :std/markup/sxml/html/parser) + current-html-void-tags + html-void-tag? + current-html-raw-tags + html-raw-tag?) +;;; This library is tangled from sxml/html/README.org diff --git a/src/std/xml-test.ss b/src/std/markup/sxml-test.ss similarity index 66% rename from src/std/xml-test.ss rename to src/std/markup/sxml-test.ss index 4617ac134..be25f4eb0 100644 --- a/src/std/xml-test.ss +++ b/src/std/markup/sxml-test.ss @@ -1,20 +1,21 @@ -(export xml-test) +(export sxml-test) -(import :std/test :std/xml) +(import :std/test :std/markup/sxml) -(def xml-test - (test-suite "test :std/xml" +(def sxml-test + (test-suite "test :std/sxml" (test-case "converting elements from sxml to html" ; normal elements with children include a closing tag - (check-output (print-sxml->html* + (check-output (write-sxml '(*TOP* (div (p "I'm paragraph one") - (p "I'm paragraph two")))) + (p "I'm paragraph two"))) port: (current-output-port) + xml?: #f) "

    I'm paragraph one

    I'm paragraph two

    ") ; void elements must never include a closing tag - (check-output (print-sxml->html* + (check-output (write-sxml '(*TOP* (area) (base) @@ -27,5 +28,5 @@ (link) (meta) (track) - (wbr))) + (wbr)) port: (current-output-port) xml?: #f) "

    ")))) \ No newline at end of file diff --git a/src/std/markup/sxml.ss b/src/std/markup/sxml.ss new file mode 100644 index 000000000..5487579b8 --- /dev/null +++ b/src/std/markup/sxml.ss @@ -0,0 +1,8 @@ +(import :std/build-config + :std/markup/sxml/print + :std/markup/sxml/sxpath + :std/markup/sxml/sxml-inf) +(export (import: + :std/markup/sxml/print + :std/markup/sxml/sxpath + :std/markup/sxml/sxml-inf)) diff --git a/src/std/markup/sxml/README.org b/src/std/markup/sxml/README.org new file mode 100644 index 000000000..1cf687be9 --- /dev/null +++ b/src/std/markup/sxml/README.org @@ -0,0 +1,417 @@ +#+TITLE: SXML: XML and HTML (AKA XML Infoset) data as S-expressions +#+EXPORT_FILE_NAME: ../../../../doc/reference/std/markup/sxml/README.md +#+OPTIONS: toc:nil + +* Contents :noexport: +:PROPERTIES: +:TOC: :include siblings :depth 3 :ignore (this) +:END: +:CONTENTS: +- [[#sxml-s-expression-extensible-markup-language][SXML: S-expression eXtensible Markup Language]] + - [[#concepts][Concepts]] + - [[#printer][Printer]] + - [[#write-sxml][write-sxml]] + - [[#sxml-queries][SXML Queries]] + - [[#sxpath][sxpath]] + - [[#sxml-select][sxml-select]] + - [[#sxml-attributes][sxml-attributes]] + - [[#sxml-e][sxml-e]] + - [[#sxml-find][sxml-find]] + - [[#sxml-select-0][sxml-select*]] + - [[#sxml-attribute-e][sxml-attribute-e]] + - [[#sxml-attribute-getq][sxml-attribute-getq]] + - [[#sxml-class][sxml-class?]] + - [[#sxml-find-0][sxml-find*]] + - [[#sxml-e-0][sxml-e?]] + - [[#sxml-id][sxml-id?]] + - [[#sxml-children][sxml-children]] + - [[#sxml-findcontext][sxml-find/context]] +:END: + +* SXML: S-expression eXtensible Markup Language +:PROPERTIES: +:CUSTOM_ID: sxml-s-expression-extensible-markup-language +:END: + +This module adds utilities to work with XML and HTML. It has been +inspired by Oleg's SXML package. See more detailed info about SXML can +be found [[http://okmij.org/ftp/Scheme/xml.html][here]]. + +::: tip To use the bindings from this module: + +#+begin_src scheme +(import :std/markup/sxml) +#+end_src + +::: + +** Concepts +:PROPERTIES: +:CUSTOM_ID: concepts +:END: + +"SXML is an abstract syntax tree of an XML document. SXML is also a +concrete representation of the XML Infoset in the form of +S-expressions." + +When developing in Gerbil we generally use sexps. XML and HTML are not +quite sexps. + +For parsing and printing have a look at [[file:xml.org][XML]] or the [[./html.md][HTML docs]] +depending on your needs. + +There's a lot more detail in the [[https://okmij.org/ftp/Scheme/SXML.html][SMXL Specification]] so for basics a +simple = +#+end_src + +If the second item is a list that starts with an =@= symbol, =(@ ...}= +marks the start of the attributes alist. + +Otherwise it's an element or a block of text. Simple! + +If you notice, the =write-sxml= function indents the html in a +whitespace sensitive way to ensure there are no extra characters in the +actual output. + +** Printer +:PROPERTIES: +:CUSTOM_ID: printer +:END: + +All of HTML, XML and XHTML are printed from the same function. + +*** write-sxml +:PROPERTIES: +:CUSTOM_ID: write-sxml +:END: + +#+begin_src scheme + (def (write-sxml + sxml + port: (port (current-sxml-output-port)) + xml?: (xml? (current-sxml-output-xml?)) + indent: (indent #f) + quote-char: (quote-char #\")) ...) + + sxml := An sxml element, a list of elements, or text. + port := A keyword for binding the output port + xml? := A keyword for boolean choosing XML or HTML. Defaults to #f + indent := A keyword where #f means no indentation and a number means indent (aka + pretty print) the output hiegenically staring at this level. + quote-char := A keyword that chooses the quote character, either #\" + or #\', for attributess. +#+end_src + +This is a generic abstract markup printer. The =:std/xml= and +=:std/html= printers are based off of this one for more specific +usage. + + +#+begin_src scheme + > (write-sxml '(*TOP* + (div + (p "I'm paragraph one") + (p "I'm paragraph two")))) +

    I'm paragraph one

    I'm paragraph two

    +#+end_src + +By default the =(current-sxml-output-port)= is set to +=(current-output-port)=. It may not be what is expected and is really +just for REPL use so set the port or parameterize +=(current-sxml-output-port)= for best results. + +The XML/HTML can be indented. It does so inside the tags so as not to +pollute or change semantics. + +#+begin_src scheme + > (write-sxml '(*TOP* + (div + (p "I'm paragraph one") + (p "I'm paragraph two"))) indent: 1) +

    I'm paragraph one

    I'm paragraph two

    +#+end_src + +For HTML, the default, empty tags with no close are allowed. + +#+begin_src scheme + > (write-sxml'(*TOP* + (area) + (base) + (br) + (col) + (embed) + (hr) + (img) + (input) + (link) + (meta) + (track) + (wbr))) + +

    +#+end_src + +When set to =xml?= things are different. + +#+begin_src scheme + > (write-sxml'(*TOP* + (area) + (base) + (br) + (col) + (embed) + (hr) + (img) + (input) + (link) + (meta) + (track) + (wbr)) xml?: #t) + +

    +#+end_src + + + + +** SXML Queries +:PROPERTIES: +:CUSTOM_ID: sxml-queries +:END: + +*** sxpath +:PROPERTIES: +:CUSTOM_ID: sxpath +:END: +#+begin_src scheme +(sxpath path) -> sxml + + path := list +#+end_src + +Evaluate an abbreviated SXPath + +#+begin_example + sxpath:: AbbrPath -> Converter, or + sxpath:: AbbrPath -> Node|Nodeset -> Nodeset +#+end_example + +AbbrPath is a list. It is translated to the full SXPath according to the +following rewriting rules: + +#+begin_src scheme + (sxpath '()) -> (node-join) + (sxpath '(path-component ...)) -> + (node-join (sxpath1 path-component) (sxpath '(...))) + (sxpath1 '//) -> (node-or + (node-self (node-typeof? '*any*)) + (node-closure (node-typeof? '*any*))) + (sxpath1 '(equal? x)) -> (select-kids (node-equal? x)) + (sxpath1 '(eq? x)) -> (select-kids (node-eq? x)) + (sxpath1 ?symbol) -> (select-kids (node-typeof? ?symbol) + (sxpath1 procedure) -> procedure + (sxpath1 '(?symbol ...)) -> (sxpath1 '((?symbol) ...)) + (sxpath1 '(path reducer ...)) -> + (node-reduce (sxpath path) (sxpathr reducer) ...) + (sxpathr number) -> (node-pos number) + (sxpathr path-filter) -> (filter (sxpath path-filter)) +#+end_src + +*** sxml-select +:PROPERTIES: +:CUSTOM_ID: sxml-select +:END: +#+begin_src scheme +(sxml-select n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +#+end_src + +Collects all children from node /n/ that satisfy a predicate /predf/; +optionally transforms result with mapping function /mapf/ once a node +satisfies a predicate, its children are not traversed. + +*** sxml-attributes +:PROPERTIES: +:CUSTOM_ID: sxml-attributes +:END: +#+begin_src scheme +(sxml-attributes n) -> list | #f + + n := sxml node +#+end_src + +Returns the attributes of given node /n/ or #f if node does have any +attributes. + +*** sxml-e +:PROPERTIES: +:CUSTOM_ID: sxml-e +:END: +#+begin_src scheme +(sxml-e n) -> symbol | #f + + n := sxml node +#+end_src + +Returns the element type of node /n/ or #f if no type is found. + +*** sxml-find +:PROPERTIES: +:CUSTOM_ID: sxml-find +:END: +#+begin_src scheme +(sxml-find n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +#+end_src + +Find the first child that satisfies a predicate /predf/, using +depth-first search. Predicate /predf/ is a lambda which takes an node as +parameter and returns an boolean. If optional /mapf/ is given the +results satisfying /predf/ are transformed with it. + +*** sxml-select* +:PROPERTIES: +:CUSTOM_ID: sxml-select-0 +:END: +#+begin_src scheme +(sxml-select* n predf [mapf = values]) -> sxml + + n := sxml nodes + predf := predicate function + mapf := transform function +#+end_src + +Select from immediate children of node /n/ using predicate function +/predf/. Results satisfying /predf/ are transformed if given optional +mapping function /mapf/. + +*** sxml-attribute-e +:PROPERTIES: +:CUSTOM_ID: sxml-attribute-e +:END: +#+begin_src scheme +(sxml-attribute-e n key) -> any | #f + + n := sxml node + key := string; node key +#+end_src + +Returns the node /n/ attribute value for given /key/ or #f if value is +not found. + +*** sxml-attribute-getq +:PROPERTIES: +:CUSTOM_ID: sxml-attribute-getq +:END: +#+begin_src scheme +(sxml-attribute-getq key attrs) -> any + + key := string; node key + attrs := alist? +#+end_src + +attribute list => value + +*** sxml-class? +:PROPERTIES: +:CUSTOM_ID: sxml-class +:END: +#+begin_src scheme +(sxml-class? klass) -> lambda + + klass := string; node class to match +#+end_src + +returns dom class + +*** sxml-find* +:PROPERTIES: +:CUSTOM_ID: sxml-find-0 +:END: +#+begin_src scheme +(sxml-find* n pred [mapf = values]) -> sxml | #f + + n := sxml node + pred := predicate fn + mapf := transform fn +#+end_src + +find in immediate children + +*** sxml-e? +:PROPERTIES: +:CUSTOM_ID: sxml-e-0 +:END: +#+begin_src scheme +(sxml-e? el) -> lambda + + el := sxml element +#+end_src + +returns element type + +*** sxml-id? +:PROPERTIES: +:CUSTOM_ID: sxml-id +:END: +#+begin_src scheme +(sxml-id? id) -> lambda + + id := sxml node id value +#+end_src + +returns dom id + +*** sxml-children +:PROPERTIES: +:CUSTOM_ID: sxml-children +:END: +#+begin_src scheme +(sxml-children n) -> list + + n := sxml node +#+end_src + +returns nodes children as a list + +*** sxml-find/context +:PROPERTIES: +:CUSTOM_ID: sxml-findcontext +:END: +#+begin_src scheme +(sxml-find/context n predf [mapf values]) -> sxml + + n := sxml node + predf := predicate fn to match + mapf := transform fn to apply to matches +#+end_src + +find with context + diff --git a/src/std/markup/sxml/html/README.org b/src/std/markup/sxml/html/README.org new file mode 100644 index 000000000..d008fcd78 --- /dev/null +++ b/src/std/markup/sxml/html/README.org @@ -0,0 +1,326 @@ +#+TITLE: Hyper Text Markup Language +#+EXPORT_FILE_NAME: ../../../../../doc/reference/std/markup/sxml/html/README.org +#+OPTIONS: toc:nil + +* Contents :noexport: +:PROPERTIES: +:TOC: :include siblings :depth 3 :ignore (this) +:END: +:CONTENTS: +- [[#html-hyper-text-markup-language-module][HTML: Hyper Text Markup Language Module]] + - [[#html-parser-and-printer][HTML Parser and Printer]] + - [[#element-aka-tag-types][Element, aka Tag Types]] + - [[#reading][Reading]] + - [[#writing][Writing]] + - [[#sxml-html][sxml->html]] + - [[#html-escape][html-escape]] + - [[#html-character-escapes][html-character-escapes]] +:END: + +* HTML: Hyper Text Markup Language Module +:PROPERTIES: +:CUSTOM_ID: html-hyper-text-markup-language-module +:END: + +HTML is a widely used Markup Language that, while very similar to [[./xml.md][XML]], +differs enough to have its own specific libraries. + + +::: tip To use the bindings from this module: + +#+begin_src scheme + (import :std/markup/html) +#+end_src + +::: + +If HTML templates for web development are up your alley have a look at +our [[file:tal/README.org][Template Attribute Language (TAL)]] which uses this parser and +printer. + + + +** HTML Parser and Printer +:PROPERTIES: +:CUSTOM_ID: html-parser-and-printer +:END: + + +*** Element, aka Tag Types +:PROPERTIES: +:CUSTOM_ID: element-aka-tag-types +:END: + +"There are six different kinds of elements: void elements, the template +element, raw text elements, escapable raw text elements, foreign +elements, and normal elements." + + +While HTML and XML are friends there are some elements in HTML that +cannot be expressed in XML. Knowing what/where they are is important +for both parsing and printing. + + +**** Void: =current-html-void-tags= and =html-void-tag?= + +#+begin_quote +Void elements + +area, base, br, col, embed, hr, img, input, link, meta, source, track, +wbr + +Void elements can't have any contents (since there's no end tag, no +content can be put between the start tag and the end tag). + +--https://html.spec.whatwg.org/multipage/syntax.html#void-elements +#+end_quote + +The void tags are stored in a parameter =current-html-void-tags=. It +has more than the spec says but there's more than one spec and version +so we try to be complete, + +#+begin_src scheme + > (current-html-void-tags) + (area base br col command embed hr img input keygen + link meta param source track wbr) +#+end_src + +There's an =html-void-tag?= procedure to test. It's case-insensitive +as HTML is meant to be. + +#+begin_src scheme + > (html-void-tag? 'InPut) +(input keygen link meta param source track wbr) +> (html-void-tag? 'InPuter) +#f +#+end_src + +**** Raw Text: =current-html-raw-tags= and =html-raw-tag?= + +#+begin_quote +Raw text elements + script, style +--https://html.spec.whatwg.org/multipage/syntax.html#raw-text-elements +#+end_quote + +These are not escaped non-html contents. + +#+begin_src scheme +> (current-html-raw-tags) +(script style xmp) +> (html-raw-tag? 'ScRipt) +(script style xmp) +> (html-raw-tag? 'html) +#f +#+end_src + + + +*** Reading +:PROPERTIES: +:CUSTOM_ID: reading +:END: + +=html-parser= is intended as a permissive HTML parser for people who +prefer the scalable interface described in Oleg Kiselyov's SSAX +parser, as well as providing simple convenience utilities. It +correctly handles all invalid HTML, inserting "virtual" starting and +closing tags as needed to maintain the proper tree structure. A major +goal of this parser is bug-for-bug compatibility with the way common +web browsers parse HTML. + +**** html->sxml +:PROPERTIES: +:CUSTOM_ID: html-sxml +:END: + +#+begin_src scheme + (def (html->sxml + port-or-string + start: (start (pgetq start: default-html->sxml-plist)) + end: (end (pgetq end: default-html->sxml-plist)) + decl: (decl (pgetq decl: default-html->sxml-plist)) + process: (process (pgetq process: default-html->sxml-plist)) + comment: (comment (pgetq comment: default-html->sxml-plist)) + text: (text (pgetq text: default-html->sxml-plist)) + bodyless: (bodyless (current-html-void-tags)) + literals: (literals (current-http-raw-tags))) + ...) +#+end_src + +Returns the SXML representation of the document from =port-or-string=, using the +default or provided parsing options. + +**** default-html->sxml-plist +:PROPERTIES: +:CUSTOM_ID: default-html-sxml-plist +:END: + +This is where the default parsing options come from. + +#+begin_src scheme :noweb-ref default-keys + (def default-html->sxml-plist + [start: (lambda (tag attrs seed virtual?) '()) + end: (lambda (tag attrs parent-seed seed virtual?) + `((,tag ,@(if (pair? attrs) + `((@ ,@attrs) ,@(reverse seed)) + (reverse seed))) + ,@parent-seed)) + decl: (lambda (tag attrs seed) `((*DECL* ,tag ,@attrs) ,@seed)) + process: (lambda (attrs seed) `((*PI* ,@attrs) ,@seed)) + comment: (lambda (text seed) `((*COMMENT* ,text) ,@seed)) + text: (lambda (text seed) (cons text seed))]) +#+end_src + +**** html-strip +:PROPERTIES: +:CUSTOM_ID: html-strip +:END: + +#+begin_src scheme + (html-strip port-or-string) +#+end_src + +Returns a string representation of the document from PORT with all tags +removed. No whitespace reduction or other rendering is done. + +#+begin_src scheme +> (html-strip "

    This is a title.

    \n\n

    This is the summary of things

    ") +"This is a title.\n\nThis is the summary of things" +#+end_src + +**** make-html-parser +:PROPERTIES: +:CUSTOM_ID: make-html-parser +:END: + +#+begin_src scheme + (make-html-parser start: #f end: #f text: #f + comment: #f decl: #f process: #f + entity: #f entities: *default-entities* + tag-levels: *tag-levels* + unnestables: *unnestables* + bodyless: (current-html-void-tags) + literals: (current-html-raw-tags) + terminators: *terminators*) +#+end_src + +Returns a procedure of two arguments, an initial seed and an optional +input port, which parses the HTML document from the port with the +callbacks specified by a keyword. + +The following callbacks are recognized: + +#+begin_example + start: tag attrs seed virtual? + fdown in foldts, called when a start-tag is encountered. + tag := tag name + attrs := tag attributes as a alist + seed := current seed value + virtual? =: #t if this start tag was inserted to fix the HTML tree +#+end_example + +#+begin_example + end: tag attrs parent-seed seed virtual? + fup in foldts, called when an end-tag is encountered. + tag := tag name + attrs := tag attributes of the corresponding start tag + parent -=SEED: parent seed value (i.e. seed passed to the start tag) + seed := current seed value + virtual? =: #t if this end tag was inserted to fix the HTML tree +#+end_example + +#+begin_example + text: text seed + fhere in foldts, called when any text is encountered. May be + called multiple times between a start and end tag, so you need + to string-append yourself if desired. + text := entity-decoded text + seed := current seed value +#+end_example + +#+begin_example + comment: text seed + fhere on comment data +#+end_example + +#+begin_example + decl: name attrs seed + fhere on declaration data + + process: list seed + fhere on process-instruction data +#+end_example + +In addition, entity-mappings may be overriden with the =entities:= +keyword. + + +** Writing +:PROPERTIES: +:CUSTOM_ID: writing +:END: + +*** sxml->html +:PROPERTIES: +:CUSTOM_ID: sxml-html +:END: + +#+begin_src scheme + (sxml->html sxml (port #f)) +#+end_src + +Convert the HTML representation of =sxml= to a string which it outputs +to the passed port. + +If the port is =#f=, or not provided, return a string. + +*** html-escape +:PROPERTIES: +:CUSTOM_ID: html-escape +:END: + + +#+begin_src scheme + (html-escape str (port #f) escapes: (esc #f)) +#+end_src + +Returns or writes an HTML escaped string to the port by default +replacing the characters =<>&"'= with the appropriate HTML entities. + +If the port is =#f=, or not provided, return a string. + +If other escapes are wanted a list can be passed with the =escapes:= +keyword. If false the defaults are in [[#html-character-escapes][html-character-escapes]]. + +#+begin_src scheme +> html-character-escapes +((#\< . "<") + (#\> . ">") + (#\& . "&") + (#\" . """) + (#\' . "'")) +> (html-escape "< ' >") +"< ' >" +> (html-escape "< ' >" escapes: '((#\< . "Less Than"))) +"Less Than ' >" +#+end_src + +*** html-character-escapes +:PROPERTIES: +:CUSTOM_ID: html-character-escapes +:END: + +These are the characters that are escaped when writing HTML. + +#+begin_src scheme +> html-character-escapes +((#\< . "<") + (#\> . ">") + (#\& . "&") + (#\" . """) + (#\' . "'") +#+end_src + + + diff --git a/src/std/markup/sxml/html/html-parser.scm b/src/std/markup/sxml/html/html-parser.scm new file mode 100644 index 000000000..84f899e89 --- /dev/null +++ b/src/std/markup/sxml/html/html-parser.scm @@ -0,0 +1,649 @@ +;; html-parser.scm -- SSAX-like tree-folding html parser +;; Copyright (c) 2003-2014 Alex Shinn. All rights reserved. +;; Copyright (c) 2023 Drew Crampsie + +;; CHANGELOG +;; 2023-12-30: + +;; Use `current-html-void-tags` for bodyless:. + +;; 2023-12-08: +;; +;; Changed (##sys#char->utf8-string (integer->char name))) +;; to (integer->utf8-string name) +;; +;; Add % in front of sxml->html, html->sxml, make-html-parser, +;; html-escape +;; +;; Make a function that returns sxml valid for printing +;; +;; Removed some comments. Have a look at the README.org + + +;; BSD-style license: http://synthcode.com/license.txt + +;; A permissive HTML parser supporting scalable streaming with a +;; folding interface. This copies the interface of Oleg Kiselyov's +;; SSAX parser, as well as providing simple convenience utilities. It +;; correctly handles all invalid HTML, inserting "virtual" starting +;; and closing tags as needed to maintain the proper tree structure +;; needed for the foldts down/up logic. A major goal of this parser +;; is bug-for-bug compatibility with the way common web browsers parse +;; HTML. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; text parsing utils + +(define (read-while pred . o) + (let ((in (if (pair? o) (car o) (current-input-port)))) + (call-with-output-string + (lambda (out) + (let lp () + (let ((c (peek-char in))) + (cond + ((and (not (eof-object? c)) (pred c)) + (write-char (read-char in) out) + (lp))))))))) + +(define (read-until pred . o) + (let ((in (if (pair? o) (car o) (current-input-port)))) + (call-with-output-string + (lambda (out) + (let lp () + (let ((c (peek-char in))) + (cond + ((not (or (eof-object? c) (pred c))) + (write-char (read-char in) out) + (lp))))))))) + +;; Generates a KMP reader that works on ports, returning the text read +;; up until the search string (or the entire port if the search string +;; isn't found). This is O(n) in the length of the string returned, +;; as opposed to the find-string-from-port? in SSAX which uses +;; backtracking for an O(nm) algorithm. This is hard-coded to +;; case-insensitively match, since that's what we need for HTML. A +;; more general utility would abstract the character matching +;; predicate and possibly provide a limit on the length of the string +;; read. +(define (make-string-reader/ci str) + (let* ((len (string-length str)) + (vec (make-vector len 0))) + (cond ((> len 0) + (vector-set! vec 0 -1) + (cond ((> len 1) (vector-set! vec 1 0))))) + (let lp ((i 2) (j 0)) + (cond + ((< i len) + (let ((c (string-ref str i))) + (cond + ((char-ci=? (string-ref str (- i 1)) (string-ref str j)) + (vector-set! vec i (+ j 1)) + (lp (+ i 1) (+ j 1))) + ((> j 0) + (lp i (vector-ref vec j))) + (else + (vector-set! vec i 0) + (lp (+ i 1) j))))))) + (lambda o + (let ((in (if (pair? o) (car o) (current-input-port)))) + (call-with-output-string + (lambda (out) + (let lp ((i 0)) + (cond + ((< i len) + (let ((c (peek-char in))) + (cond + ((eof-object? c) + (display (substring str 0 i) out)) + ((char-ci=? c (string-ref str i)) + (read-char in) + (lp (+ i 1))) + (else + (let* ((i2 (vector-ref vec i)) + (i3 (if (= -1 i2) 0 i2))) + (if (> i i3) (display (substring str 0 (- i i3)) out) #f) + (if (= -1 i2) (write-char (read-char in) out) #f) + (lp i3)))))))))))))) + +(define skip-whitespace (lambda x (apply read-while char-whitespace? x))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; html-specific readers + +(define (char-alphanumeric? c) + (or (char-alphabetic? c) (char-numeric? c))) + +(define (char-hex-numeric? c) + (or (char-numeric? c) + (memv (char-downcase c) '(#\a #\b #\c #\d #\e #\f)))) + +(define read-identifier (lambda x (apply read-while char-alphanumeric? x))) + +(define read-integer (lambda x (apply read-while char-numeric? x))) + +(define read-hex-integer (lambda x (apply read-while char-hex-numeric? x))) + +(define (read-entity in) + (read-char in) + (cond + ((eqv? (peek-char in) #\#) + (read-char in) + (cond + ((char-numeric? (peek-char in)) + (let* ((str (read-integer in)) + (num (string->number str))) + (cond ((eqv? (peek-char in) #\;) + (read-char in))) + (cons 'entity num))) + ((memv (peek-char in) '(#\x #\X)) + (read-char in) + (let* ((str (read-hex-integer in)) + (num (string->number str 16))) + (cond ((eqv? (peek-char in) #\;) + (read-char in))) + (cons 'entity num))) + (else + (cons 'text "&#")))) + ((char-alphabetic? (peek-char in)) + (let ((name (read-identifier in))) + (cond ((eqv? (peek-char in) #\;) + (read-char in))) + (cons 'entity name))) + (else + (cons 'text "&")))) + +(define (read-quoted in entities) + (let ((terminator (read-char in))) + (let lp ((res '())) + (cond + ((eof-object? (peek-char in)) + (reverse res)) + ((eqv? terminator (peek-char in)) + (read-char in) ; discard terminator + (reverse res)) + ((eqv? #\& (peek-char in)) + (let ((x (read-entity in))) + (lp (cons (or (and (eq? 'entity (car x)) + (get-entity entities (cdr x))) + (string-append "&" (cdr x))) + res)))) + (else + (lp (cons (read-until (lambda (c) (or (eqv? #\& c) (eqv? terminator c))) in) + res))))))) + +(define (read-pi in) + (let ((tag (read-identifier in))) + (skip-whitespace in) + (list + (if (equal? tag "") #f (string->symbol (string-downcase tag))) + (list->string + (reverse + (let loop ((res '())) + (let ((c (peek-char in))) + (cond + ((eof-object? c) + (read-char in) + res) + ((eqv? c #\?) + (read-char in) + (let loop2 ((res res)) + (cond + ((eof-object? (peek-char in)) + (cons #\? res)) + ((eqv? #\> (peek-char in)) + (read-char in) + res) + ((eqv? #\? (peek-char in)) + (read-char in) + (loop2 (cons c res))) + (else + (loop (cons c res)))))) + (else + (read-char in) + (loop (cons c res))))))))))) + +(define read-comment (make-string-reader/ci "-->")) + +(define (tag-char? c) + (and (char? c) + (or (char-alphanumeric? c) (memv c '(#\- #\+ #\* #\_ #\:))))) + +(define (read-attrs in entities) + (let loop ((attrs '())) + (skip-whitespace in) + (let ((c (peek-char in))) + (cond + ((or (eof-object? c) (eqv? c #\>)) + (read-char in) + (list #f (reverse attrs))) + ((eqv? c #\/) + (read-char in) + (skip-whitespace in) + (cond + ((eqv? #\> (peek-char in)) + (read-char in) + (list #t (reverse attrs))) + (else + (loop attrs)))) + ((eqv? c #\") + (read-char in) + (loop attrs)) + ((not (tag-char? c)) + (list #f (reverse attrs))) + (else + (let ((name (read-while tag-char? in))) + (if (string=? name "") + (loop attrs) + (let ((name (string->symbol (string-downcase name)))) + (cond + ((eqv? (peek-char in) #\=) + (read-char in) + (let ((value (if (memv (peek-char in) '(#\" #\')) + (apply string-append + (read-quoted in entities)) + (read-until + (lambda (c) + (or (char-whitespace? c) + (memv c '(#\' #\" #\< #\>)))) + in)))) + (loop (cons (list name value) attrs)))) + (else + (loop (cons (list name) attrs)))))))))))) + +(define (read-start in entities) + (let ((tag (string->symbol (string-downcase (read-while tag-char? in))))) + (cons tag (read-attrs in entities)))) + +(define (read-end in) + (let ((tag (read-while tag-char? in))) + (cond + ((equal? tag "") + (read-until (lambda (c) (eqv? c #\>)) in) + (read-char in) + #f) + (else + ;; discard closing attrs + (read-attrs in '()) + (string->symbol (string-downcase tag)))))) + +(define (read-decl in entities) + (let loop ((res '())) + (skip-whitespace in) + (let ((c (peek-char in))) + (cond + ((eof-object? c) + (reverse res)) + ((eqv? c #\") + (loop (cons (read-quoted in entities) res))) + ((eqv? c #\>) + (read-char in) + (reverse res)) + ((eqv? c #\<) + (read-char in) + (if (eqv? (peek-char in) #\!) (read-char in) #f) + (loop (cons (read-decl in entities) res))) + ((tag-char? c) + (loop (cons (string->symbol (read-while tag-char? in)) res))) + (else + (read-char in) + (loop res)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; the parser + +(define *default-entities* + '(("amp" . "&") ("quot" . "\"") ("lt" . "<") + ("gt" . ">") ("apos" . "'") ("nbsp" . " "))) + +(define (get-entity entities name) + (cond + ((number? name) (integer->utf8-string name)) + ((string->number name) + => (lambda (n) (integer->utf8-string n))) + ((assoc name entities) => cdr) + (else #f))) + +;; span's and div's can be used at any level +(define *tag-levels* + '(html (head body) table (thead tbody) tr (th td) p (b i u s))) + +(define *unnestables* + '(p li td tr)) + +#;(define *bodyless* + '(img hr br meta link)) + +#;(define *literals* + '(script xmp)) + +(define *terminators* + '(plaintext)) + +(define (tag-level tag-levels tag) + (let lp ((ls tag-levels) (i 0)) + (if (null? ls) + (+ i 1000) + (if (if (pair? (car ls)) + (memq tag (car ls)) + (eq? tag (car ls))) + i + (lp (cdr ls) (+ i 1)))))) + +(define read-cdata (make-string-reader/ci "]]>")) + +(define (read-html-token . o) + (let ((in (if (pair? o) (car o) (current-input-port))) + (entities (if (and (pair? o) (pair? (cdr o))) (cadr o) '()))) + (let ((c (peek-char in))) + (if (eof-object? c) + (cons 'eof c) + (case c + ((#\<) + (read-char in) + (case (peek-char in) + ((#\!) + (read-char in) + (cond + ((eqv? #\[ (peek-char in)) + (read-char in) + (let lp ((check '(#\C #\D #\A #\T #\A #\[)) + (acc '(#\[ #\! #\<))) + (cond + ((null? check) + (cons 'text (read-cdata in))) + ((let ((c (peek-char in))) + (and (not (eof-object? c)) (char-ci=? c (car check)))) + (lp (cdr check) (cons (read-char in) acc))) + (else + (cons 'text (list->string (reverse acc))))))) + ((and (eqv? #\- (peek-char in)) + (begin (read-char in) + (eqv? #\- (peek-char in)))) + (read-char in) + (cons 'comment (read-comment in))) + (else + (cons 'decl (read-decl in entities))))) + ((#\?) + (read-char in) + (cons 'process (read-pi in))) + ((#\/) + (read-char in) + (cons 'end (read-end in))) + (else + ;; start tags must immediately be followed by an + ;; alphabetic charater, or we just treat the < as text + (if (and (char? (peek-char in)) + (char-alphabetic? (peek-char in))) + (let ((res (read-start in entities))) + (if (cadr res) + (cons 'start/end (cons (car res) (cddr res))) + (cons 'start (cons (car res) (cddr res))))) + (cons 'text "<"))))) + ((#\&) + (read-entity in)) + (else + (cons 'text + (read-until (lambda (c) (or (eqv? c #\<) (eqv? c #\&))) + in)))))))) + +(define (%key-ref ls key default) + (cond ((memq key ls) => cadr) (else default))) + +(define (%make-html-parser . o) + (let* ((start (%key-ref o 'start: (lambda (t a s v) s))) + (end (%key-ref o 'end: (lambda (t a p s v) s))) + (text (%key-ref o 'text: (lambda (t s) s))) + (decl (%key-ref o 'decl: (lambda (t a s) s))) + (process (%key-ref o 'process: (lambda (t s) s))) + (comment (%key-ref o 'comment: (lambda (t s) s))) + (entities (%key-ref o 'entities: *default-entities*)) + (tag-levels (%key-ref o 'tag-levels: *tag-levels*)) + (unnestables (%key-ref o 'unnestables: *unnestables*)) + (bodyless (%key-ref o 'bodyless: (current-html-void-tags))) + (literals + (map (lambda (x) + (cons x (make-string-reader/ci + (string-append "string x) ">")))) + (%key-ref o 'literals: (current-html-raw-tags)))) + (terminators (%key-ref o 'terminators: *terminators*)) + (entity (%key-ref o 'entity: + (lambda (t s) + (text (or (get-entity entities t) + (string-append "&" t ";")) + s))))) + (define (entity->string sxml seed out) + (if (pair? sxml) + (if (eq? 'entity (car sxml)) + (entity->string (entity (cdr sxml) seed) seed out) + (for-each (lambda (x) (entity->string x seed out)) sxml)) + (display sxml out))) + (define (fix-attrs ls seed) + (map + (lambda (x) + (cons (car x) + (if (pair? (cdr x)) + (list + (call-with-output-string + (lambda (out) (entity->string (cadr x) seed out)))) + (cdr x)))) + ls)) + (define (fix-decl ls seed) + (map (lambda (x) + (if (pair? x) + (call-with-output-string + (lambda (out) (entity->string x seed out))) + x)) + ls)) + (lambda (seed . o) + (parameterize ((current-html-void-tags bodyless) + (current-html-raw-tags (map car literals))) + (let* ((src (if (pair? o) (car o) (current-input-port))) + (in (if (string? src) (open-input-string src) src))) + (let lp ((tok (read-html-token in entities)) + (seed seed) + (seeds '()) + (tags '())) + (case (car tok) + ((eof) ; close all open tags + (let lp ((t tags) (s seeds) (seed seed)) + (if (null? t) + seed + (lp (cdr t) (cdr s) + (end (caar t) (cadar t) (car s) seed 'eof))))) + ((start/end) + (let* ((tag (cadr tok)) + (rest (cons (fix-attrs (caddr tok) seed) (cdddr tok))) + (tok (cons tag rest))) + (lp `(end . ,tag) + (start tag (car rest) seed #f) + (cons seed seeds) + (cons tok tags)))) + ((start) + (let* ((tag (cadr tok)) + (rest (cons (fix-attrs (caddr tok) seed) (cdddr tok))) + (tok (cons tag rest))) + (cond + ((memq tag terminators) + (lp `(text . ,(read-until (lambda (c) #f) in)) + (start tag (car rest) seed #f) + (cons seed seeds) + (cons tok tags))) + ((assq tag literals) + => (lambda (lit) + (let ((body ((cdr lit) in)) + (seed2 (start tag (car rest) seed #f))) + (lp `(end . ,tag) + (if (equal? "" body) seed2 (text body seed2)) + (cons seed seeds) + (cons tok tags))))) + ((html-void-tag? tag) + (lp `(end . ,tag) + (start tag (car rest) seed #f) + (cons seed seeds) + (cons tok tags))) + ((and (pair? tags) (eq? tag (caar tags)) + (memq tag unnestables)) + ;;

    ...

    implies siblings, not nesting + (let ((seed2 + (end tag (cadar tags) (car seeds) seed 'sibling))) + (lp (read-html-token in entities) + (start tag (car rest) seed #f) + (cons seed2 (cdr seeds)) + (cons tok (cdr tags))))) + (else + (lp (read-html-token in entities) + (start tag (car rest) seed #f) + (cons seed seeds) + (cons tok tags)))))) + ((end) + (cond + ((not (cdr tok)) ;; nameless closing tag + (lp (read-html-token in entities) seed seeds tags)) + ((and (pair? tags) (eq? (cdr tok) (caar tags))) + (lp (read-html-token in entities) + (end (cdr tok) (fix-attrs (cadar tags) seed) + (car seeds) seed #f) + (cdr seeds) + (cdr tags))) + (else + (let ((this-level (tag-level tag-levels (cdr tok))) + (expected-level + (if (pair? tags) + (tag-level tag-levels (caar tags)) + -1))) + (cond + ((< this-level expected-level) + ;; higher-level tag, forcefully close preceding tags + (lp tok + (end (caar tags) (fix-attrs (cadar tags) seed) + (car seeds) seed 'parent-closed) + (cdr seeds) + (cdr tags))) + ((and (= this-level expected-level) (pair? (cdr tags))) + ;; equal, interleave (close prec tag, close this, + ;; re-open prec) + ;; => + ;; ^^^^ ^^^ + ;; XXXX handle backups > 1 here + (let* ((seed2 (end (caar tags) (cadar tags) + (car seeds) seed 'interleave)) + (seed3 (end (caadr tags) (cadadr tags) + (cadr seeds) seed2 #f))) + (let ((tok2 (read-html-token in entities))) + (cond + ((and (eq? 'end (car tok2)) + (eq? (caar tags) (cdr tok2))) + ;; simple case where the closing tag + ;; immediately follows + (lp (read-html-token in entities) seed3 + (cddr seeds) (cddr tags))) + (else + (lp tok2 + (start (caar tags) (cadar tags) seed3 + 'interleave) + (cons seed3 (cddr seeds)) + (cons (car tags) (cddr tags)))))))) + (else + ;; spurious end for a lower-level tag, add + ;; imaginary start + (let* ((seed2 (start (cdr tok) '() seed 'no-start)) + (seed3 (end (cdr tok) '() seed seed2 #f))) + (lp (read-html-token in entities) seed3 seeds tags)))))))) + ((text) + (lp (read-html-token in entities) (text (cdr tok) seed) seeds tags)) + ((entity) + (lp (read-html-token in entities) (entity (cdr tok) seed) seeds tags)) + ((comment) + (lp (read-html-token in entities) (comment (cdr tok) seed) seeds tags)) + ((decl) + (lp (read-html-token in entities) + (decl (cadr tok) (fix-decl (cddr tok) seed) seed) seeds tags)) + ((process) + (lp (read-html-token in entities) (process (cdr tok) seed) seeds tags)) + (else + (error "invalid token: " tok))))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; simple conversions + +(define %html->sxml + (let ((parse + (%make-html-parser + 'start: (lambda (tag attrs seed virtual?) '()) + 'end: (lambda (tag attrs parent-seed seed virtual?) + `((,tag ,@(if (pair? attrs) + `((@ ,@attrs) ,@(reverse seed)) + (reverse seed))) + ,@parent-seed)) + 'decl: (lambda (tag attrs seed) `((*DECL* ,tag ,@attrs) ,@seed)) + 'process: (lambda (attrs seed) `((*PI* ,@attrs) ,@seed)) + 'comment: (lambda (text seed) `((*COMMENT* ,text) ,@seed)) + 'text: (lambda (text seed) (cons text seed)) + ))) + (lambda o + (cons '*TOP* (reverse (apply parse '() o)))))) + +(define (html-escape-attr str) + (call-with-output-string + (lambda (out) (html-display-escaped-string str out)))) + +(define (html-attr->string attr) + (string-append + (symbol->string (car attr)) "=\"" + (html-escape-attr (if (pair? (cdr attr)) (cadr attr) (cdr attr))) + "\"")) + +(define (html-tag->string tag attrs) + (let lp ((ls attrs) (res (list (symbol->string tag) "<"))) + (if (null? ls) + (apply string-append (reverse (cons ">" res))) + (lp (cdr ls) (cons (html-attr->string (car ls)) (cons " " res)))))) + +(define html-character-escapes + '((#\< . "<") + (#\> . ">") + (#\& . "&") + (#\" . """) + (#\' . "'"))) + +(define (html-display-escaped-string str out . escapes) + (let ((start 0) + (end (string-length str)) + (escape (if (null? escapes) html-character-escapes (car escapes)))) + (let lp ((from start) (to start)) + (if (>= to end) + (display (substring str from to) out) + (cond + ((assq (string-ref str to) escape) + => (lambda (esc) + (display (substring str from to) out) + (display (cdr esc) out) + (lp (+ to 1) (+ to 1)))) + (else + (lp from (+ to 1)))))))) + +(define (%html-escape str . escapes) + (call-with-output-string + (lambda (out) (apply html-display-escaped-string str out escapes)))) + +(define (sxml-display-as-html sxml . o) + (let ((out (if (pair? o) (car o) (current-output-port)))) + (write-sxml sxml port: out xml?: #f))) + + +(define (%sxml->html sxml . o) + (call-with-output-string + (lambda (out) (sxml-display-as-html sxml out)))) + +;; just strips tags, no whitespace handling or formatting +(define (html-strip . o) + (call-with-output-string + (lambda (out) + (let ((parse + (%make-html-parser + 'start: (lambda (tag attrs seed virtual?) seed) + 'end: (lambda (tag attrs parent-seed seed virtual?) seed) + 'text: (lambda (text seed) (display text out))))) + (apply parse (cons #f #f) o))))) + +;) diff --git a/src/std/markup/sxml/html/parser.ss b/src/std/markup/sxml/html/parser.ss new file mode 100644 index 000000000..491249ac3 --- /dev/null +++ b/src/std/markup/sxml/html/parser.ss @@ -0,0 +1,74 @@ +(import :gerbil/gambit :std/error :std/srfi/1 :std/srfi/13 :std/text/utf8 + :std/markup/sxml/print) +(export + html->sxml + default-html->sxml-plist + html-strip + make-html-parser + + sxml->html + html-escape + html-character-escapes) + +(def (integer->utf8-string n) (utf8->string (u8vector n))) + +(def (print-sxml sxml (out (current-output-port))) + (sxml-display-as-html sxml out)) + +(def (sxml->html sxml (port #f)) + (if port (print-sxml sxml port) (%sxml->html sxml))) +(def (html-escape str (port #f) escapes: (esc #f)) + (unless esc (set! esc html-character-escapes)) + (if port (html-display-escaped-string str port esc) (%html-escape str esc))) + +(def default-html->sxml-plist + [start: (lambda (tag attrs seed virtual?) '()) + end: (lambda (tag attrs parent-seed seed virtual?) + `((,tag ,@(if (pair? attrs) + `((@ ,@attrs) ,@(reverse seed)) + (reverse seed))) + ,@parent-seed)) + decl: (lambda (tag attrs seed) `((*DECL* ,tag ,@attrs) ,@seed)) + process: (lambda (attrs seed) `((*PI* ,@attrs) ,@seed)) + comment: (lambda (text seed) `((*COMMENT* ,text) ,@seed)) + text: (lambda (text seed) (cons text seed))]) + +(include "html-parser.scm") + +(defsyntax (%mkref stx) + (syntax-case stx () + ((_ arg ...) + #'(concatenate (list (if arg [(symbol->keyword 'arg) arg] []) ...))))) + +(def (make-html-parser + start: (start #f) end: (end #f) text: (text #f) comment: (comment #f) + decl: (decl #f) process: (process #f) entity: (entity #f) + entities: (entities *default-entities*) + tag-levels: (tag-levels *tag-levels*) + unnestables: (unnestables *unnestables*) + bodyless: (bodyless (current-html-void-tags)) + literals: (literals (current-html-raw-tags)) + terminators: (terminators *terminators*)) + (apply %make-html-parser + (%mkref start end text comment decl process entity entities tag-levels + unnestables literals terminators))) + +(def (html->sxml + port-or-string + start: (start (pgetq start: default-html->sxml-plist)) + end: (end (pgetq end: default-html->sxml-plist)) + decl: (decl (pgetq decl: default-html->sxml-plist)) + process: (process (pgetq process: default-html->sxml-plist)) + comment: (comment (pgetq comment: default-html->sxml-plist)) + text: (text (pgetq text: default-html->sxml-plist)) + entity: (entity #f) entities: (entities *default-entities*) + tag-levels: (tag-levels *tag-levels*) + unnestables: (unnestables *unnestables*) + bodyless: (bodyless (current-html-void-tags)) + literals: (literals (current-html-raw-tags)) + terminators: (terminators *terminators*)) + (let ((parse + (apply %make-html-parser + (%mkref start end text comment decl process entity entities tag-levels + unnestables literals terminators)))) + (cons '*TOP* (reverse (parse '() port-or-string))))) diff --git a/src/std/markup/sxml/html/tal.ss b/src/std/markup/sxml/html/tal.ss new file mode 100644 index 000000000..1c643508a --- /dev/null +++ b/src/std/markup/sxml/html/tal.ss @@ -0,0 +1,5 @@ +;; See ./tal/README.org for the implementation. +(import (only-in :std/html/tal/syntax + define-TAL current-tal-output-port + current-tal:on-error tal:write)) +(export define-TAL current-tal-output-port current-tal:on-error tal:write) diff --git a/src/std/xml/oleg/README.md b/src/std/markup/sxml/oleg/README.md similarity index 100% rename from src/std/xml/oleg/README.md rename to src/std/markup/sxml/oleg/README.md diff --git a/src/std/xml/oleg/SSAX.scm b/src/std/markup/sxml/oleg/SSAX.scm similarity index 100% rename from src/std/xml/oleg/SSAX.scm rename to src/std/markup/sxml/oleg/SSAX.scm diff --git a/src/std/xml/oleg/SXPath.scm b/src/std/markup/sxml/oleg/SXPath.scm similarity index 100% rename from src/std/xml/oleg/SXPath.scm rename to src/std/markup/sxml/oleg/SXPath.scm diff --git a/src/std/xml/oleg/char-encoding.scm b/src/std/markup/sxml/oleg/char-encoding.scm similarity index 100% rename from src/std/xml/oleg/char-encoding.scm rename to src/std/markup/sxml/oleg/char-encoding.scm diff --git a/src/std/xml/oleg/define-opt.scm b/src/std/markup/sxml/oleg/define-opt.scm similarity index 100% rename from src/std/xml/oleg/define-opt.scm rename to src/std/markup/sxml/oleg/define-opt.scm diff --git a/src/std/xml/oleg/input-parse.scm b/src/std/markup/sxml/oleg/input-parse.scm similarity index 100% rename from src/std/xml/oleg/input-parse.scm rename to src/std/markup/sxml/oleg/input-parse.scm diff --git a/src/std/xml/oleg/look-for-str.scm b/src/std/markup/sxml/oleg/look-for-str.scm similarity index 100% rename from src/std/xml/oleg/look-for-str.scm rename to src/std/markup/sxml/oleg/look-for-str.scm diff --git a/src/std/xml/oleg/myenv-gerbil.scm b/src/std/markup/sxml/oleg/myenv-gerbil.scm similarity index 100% rename from src/std/xml/oleg/myenv-gerbil.scm rename to src/std/markup/sxml/oleg/myenv-gerbil.scm diff --git a/src/std/xml/oleg/parser-errors-vanilla.scm b/src/std/markup/sxml/oleg/parser-errors-vanilla.scm similarity index 100% rename from src/std/xml/oleg/parser-errors-vanilla.scm rename to src/std/markup/sxml/oleg/parser-errors-vanilla.scm diff --git a/src/std/markup/sxml/print.ss b/src/std/markup/sxml/print.ss new file mode 100644 index 000000000..84831b73a --- /dev/null +++ b/src/std/markup/sxml/print.ss @@ -0,0 +1,264 @@ +(import :std/srfi/13) +(export #t) + +; * Printer + +;; SXML is, well, sexps! So we know that things are either a "list" or an +;; "atom". + +;; An SXML element is a list that starts with a symbol. If the cadr is a +;; list starting with the =@= the cdr of that is the attributes as a +;; alist. + +(def (sxml-element? t) (and (pair? t) (symbol? (car t)))) +(def (sxml-element-attributes el) + (match el + ([name [(eq? '@) attr ...] _ ...] attr) + (else #f))) + +;; We are outputing/translating SXML into some kind of markup. Possibly XML. +(def current-sxml-output-port (make-parameter (current-output-port))) +(def current-sxml-output-xml? (make-parameter #t)) + +(def (write-sxml + sxml + port: (port (current-sxml-output-port)) + indent: (indent #f) + xml?: (xml? (current-sxml-output-xml?)) + quote-char: (quote-char #\")) + (def args [port: port indent: indent xml?: xml? quote-char: quote-char]) + + (match sxml + ([(? symbol? sym) _ ...] + (if (eqv? (string-ref (symbol->string sym) 0) #\*) + (apply write-sxml-special-tag sxml args) + (apply write-sxml-element sxml args))) + ((? pair?) (for-each (cut apply write-sxml <> args) sxml)) + ((? (or null? not)) (void)) + ((? procedure?) (apply write-sxml (sxml) args)) + (else (write-sxml-atom sxml port: port in-attribute?: #f)))) + + +; ** Atoms + +;; So an atom is simple enough as every atom in (X)(HT)ML is really just +;; a string of text with certain chars escaped. + +;; Inside an attribute the quote character needs escaping as well. + + +(def (write-sxml-atom + thing + port: (port (current-sxml-output-port)) + in-attribute?: (in-attribute? #f) + quote-char: (quote-char #\")) + + (def html-character-escapes + '((#\< . "<") + (#\> . ">") + (#\& . "&") + (#\" . """) + (#\' . "'"))) + + (def (escape-char? char) + (case char + ((#\" #\') in-attribute?) + ((#\< #\> #\&) #t) + (else #f))) + + (cond + ((char? thing) + (case thing + ((#\' #\") (if (and in-attribute? (eqv? thing quote-char)) + (write-string (assget thing html-character-escapes) port) + (write-char thing port))) + ((#\< #\> #\&) + (write-string (assget thing html-character-escapes) port)) + (else (write-char thing port)))) + ((string? thing) + (let* ((str thing) + (start 0) + (end (string-length str))) + (let lp ((from start) (to start)) + (if (>= to end) + (display (substring str from to) port) + (let (char (string-ref str to)) + (cond + ((escape-char? char) + (display (substring str from to) port) + (write-sxml-atom + char port: port in-attribute?: in-attribute? quote-char: quote-char) + (lp (+ to 1) (+ to 1))) + (else + (lp from (+ to 1))))))))) + (else (write-sxml-atom + (if (symbol? thing) + (symbol->string thing) + (call-with-output-string "" (cut write thing <>))) + port: port + in-attribute?: in-attribute? + quote-char: quote-char)))) + + +; ** Attributes + +(def (write-sxml-attribute + attr + port: (port (current-sxml-output-port)) + xml?: (xml? (current-sxml-output-xml?)) + quote-char: (quote-char #\")) + + (def (write-name n) + (match n + ((? symbol?) (write n port)) + ((? string?) (write-string n port)))) + (match attr + ((cons name val) + (when (and (null? val) xml?) + (error "Invalid attribute, XML needs a value." attr)) + (set! val (if (not (pair? val)) val (car val))) + (unless (not val) + (write-name name) + (when (not (null? val)) + (write-char #\= port) + (write-char quote-char port) + (write-sxml-atom + val in-attribute?: #t port: port quote-char: quote-char) + (write-char quote-char port)))) + ((? (or symbol? string?)) + (write-sxml-attribute + [attr] port: port quote-char: quote-char xml?: xml?)) + ((? not) (void)))) + +; ** Printing an HTML/XML element from SXML + +;; There are two types of "elements" in SXML. What I call "special" +;; elements are those whose names start with `#\*` as that's not valid +;; (X)(HT)ML but valid scheme + +;; Special *XML* tags are, case insensitive, `*comment*`, `*cdata*` and +;; `*unencoded*`. + +;; Special *HTML* tags are, case insensitive, `*decl*` (mostly for +;; doctype), `*pi*` (for processing instruction AKA php), `*comment*`, +;; `*unencoded*`. + + +; *** Write Special Elements +(def (sxml-special-tag? t) + (and (pair? t) (symbol? (car t)) (eqv? #\* (string-ref (symbol->string t) 0)))) + +(def (write-sxml-special-tag sxml + port: (port (current-sxml-output-port)) + xml?: (xml? (current-sxml-output-xml?)) + quote-char: (quote-char #\") + indent: (indent #f)) + (def both-specials '("*comment*" "*top*" "*unencoded*")) + (def xml-specials ["*cdata*" both-specials ...]) + (def html-specials ["*decl*" "*pi*" both-specials ...]) + (def tag (string-downcase (symbol->string (car sxml)))) + (def xml-special? (member tag xml-specials)) + (def html-special? (member tag html-specials)) + + (def (display-pi-or-decl bdy) + (when (car bdy) (display (car bdy) port)) + (for-each (lambda (x) (display " " port) (display x port)) + (cdr bdy))) + + + (unless (or (and xml? xml-special?) (and (not xml?) html-special?)) + (if xml? (error "Invalid XML tag" tag) + (error "Invalid HTML tag" tag))) + + (let* ((name (car sxml)) + (attributes (and (pair? (cadr sxml)) (eq? '@ (caadr sxml)) + (sxml-element-attributes sxml))) + (body (if (not attributes) (cdr sxml) (cddr sxml)))) + + (case (string->symbol tag) + ((*top*) + (write-sxml + body port: port xml?: xml? quote-char: quote-char indent: indent)) + ((*comment*) + (write-string "" port)) + ((*cdata*) + (write-string " port) body) + (write-string "]]>" port)) + ((*decl*) + (write-string "" port)) + ((*pi*) + (write-string "" port)) + ((*unencoded*) (for-each (cut display <> port) body)))) + + (void)) + +; *** The Printer for everything + +;; Now the guts. Pretty much self explanatory. + +(def current-indentation-width (make-parameter 0)) + +(def current-html-void-tags + (make-parameter + '(area base br col command embed hr img input keygen + link meta param source track wbr))) + +(def (html-void-tag? name) + (member name (current-html-void-tags) + (lambda xy (apply string-ci= (map symbol->string xy))))) + +(def current-html-raw-tags + (make-parameter '(script style xmp))) + +(def (html-raw-tag? name) + (member name (current-html-raw-tags) + (lambda xy (apply string-ci= (map symbol->string xy))))) + +(def (write-sxml-element + el + port: (port (current-sxml-output-port)) + indent: (maybe-level #f) + xml?: (xml? (current-sxml-output-xml?)) + quote-char: (quote-char #\")) + (def name (car el)) + (def attrs (sxml-element-attributes el)) + (def body (if attrs (cddr el) (cdr el))) + + (def (indent (end #f)) + (def n (if end maybe-level (current-indentation-width))) + (when maybe-level + (write-char #\newline port) + (let lp ((n n)) (write-char #\space port) + (when (>= n 1) (lp (1- n)))))) + + (parameterize ((current-indentation-width + (+ (current-indentation-width) (or maybe-level 0)))) + ;; Open Tag + (write-char #\< port) + (write-string (symbol->string name) port) + (when attrs + (for-each + (lambda (attr) + (write-char #\space port) + (write-sxml-attribute + attr port: port xml?: xml? quote-char: quote-char)) + attrs)) + (unless (html-void-tag? name) (indent)) + (when (and xml? (html-void-tag? name)) + (write-char #\space port) + (write-char #\/ port)) + (write-char #\> port) + ;; Body + (if (and (not xml?) (html-raw-tag? name)) + (for-each (cut display <> port) body) + (write-sxml + body port: port xml?: xml? + quote-char: quote-char + indent: (and maybe-level (current-indentation-width)))) + ;; End Tag + (unless (html-void-tag? name) + (write-char #\< port) (write-char #\/ port) + (write-string (symbol->string name) port) + (indent #t) (write-char #\> port))) + (void)) diff --git a/src/std/xml/ssax.ss b/src/std/markup/sxml/ssax.ss similarity index 100% rename from src/std/xml/ssax.ss rename to src/std/markup/sxml/ssax.ss diff --git a/src/std/xml/sxml.ss b/src/std/markup/sxml/sxml-inf.ss similarity index 100% rename from src/std/xml/sxml.ss rename to src/std/markup/sxml/sxml-inf.ss diff --git a/src/std/xml/sxpath.ss b/src/std/markup/sxml/sxpath.ss similarity index 100% rename from src/std/xml/sxpath.ss rename to src/std/markup/sxml/sxpath.ss diff --git a/src/std/markup/sxml/tal/README.org b/src/std/markup/sxml/tal/README.org new file mode 100644 index 000000000..dab361b4c --- /dev/null +++ b/src/std/markup/sxml/tal/README.org @@ -0,0 +1,1041 @@ +#+TITLE: TAL: The Template Attribute Language +#+EXPORT_FILE_NAME: ../../../../../doc/reference/std/markup/sxml/tal/README.org +#+OPTIONS: toc:nil + +* Contents :noexport: +:PROPERTIES: +:TOC: :include all :depth 3 :ignore this +:CUSTOM_ID: contents +:END: +:CONTENTS: +- [[#tal-the-template-attribute-language][TAL: The Template Attribute Language]] + - [[#attributes][Attributes]] + - [[#define-tal][define-TAL]] + - [[#current-tal-output-port][current-tal-output-port]] + - [[#talattributes--replace-element-attributes][tal:attributes : Replace element attributes]] + - [[#talcondition--conditionally-insert-or-remove-an-element][tal:condition : Conditionally insert or remove an element]] + - [[#talcontent--replace-the-content-of-an-element][tal:content : Replace the content of an element]] + - [[#taldefine][tal:define]] + - [[#talswitch--set-up-a-switch-statement][tal:switch : Set up a switch statement]] + - [[#talomit-tag--remove-an-element-leaving-its-contents][tal:omit-tag : Remove an element leaving its contents]] + - [[#current-talon-error-and-talon-error][current-tal:on-error and tal:on-error.]] + - [[#default-and-current-talon-error][Default and current-tal:on-error]] + - [[#talon-error][tal:on-error]] + - [[#talrepeat--repeat-an-element][tal:repeat : Repeat an element]] + - [[#the-repeat-interface][The Repeat Interface]] +:END: + +* TAL: The Template Attribute Language +:PROPERTIES: +:CUSTOM_ID: tal-the-template-attribute-language +:END: + +I, (drewc ), first got introduced to =TAL= around +'05 by [[https://github.com/segv][@segv]] in his [[https://web.archive.org/web/20160315020505/http://www.3ofcoins.net/2010/01/21/yaclml-in-pictures-part-ii-templating/][Common Lisp YACLML]] library. + +Its usefulness cannot be understated! + +Here is my attempt at the [[https://zope.readthedocs.io/en/latest/zopebook/AppendixC.html][Zope Template Attribute Language]]. + +::: tip To use the bindings from this module: + +#+begin_src scheme +(import :std/markup/tal) +#+end_src + +::: + + + +#+begin_quote +The Template Attribute Language (TAL) is a templating language used to +generate dynamic HTML and XML pages. Its main goal is to simplify the +collaboration between programmers and designers. This is achieved by +embedding TAL statements inside valid HTML (or XML) tags which can +then be worked on using common design tools. + +-- https://en.wikipedia.org/wiki/Template_Attribute_Language +#+end_quote + + +A =TAL Statement= is an attribute in an HTML tag that has a =tal:= +prefix. For the most part the attribute value is *Gerbil* code. + +To define a =TAL= procedure we take =HTML= code that may have Template +Attributes and transform it into a compiled function + +** Attributes +:PROPERTIES: +:CUSTOM_ID: attributes +:END: + + + - define :: creates local variables, valid in the element bearing + the attribute (including contained elements) or sets a "global". + - switch :: Set up a switch statement + - condition :: decides whether or not to render the tag (and all + contained text) + - repeat :: creates a loop variable and repeats the tag iterating a + sequence, e.g. for creating a selection list or a table + - case :: A case in a =tal:switch= statement + - content :: replaces the content of the tag + - replace :: replaces the tag (and therefore is not usable together + with content or attributes) + - attributes :: replaces the given attributes (e. g. by using + ~tal:attributes="(name name) (id name)"~ the name and id attributes of + an input field could be set to the value of the variable "name") + - omit-tag :: allows to omit the start and end tag and only render + the content if the given expression is true. + - on-error :: if an error occurs, this attribute works like the + content tag. + +If a tag has more than one TAL attribute they are evaluated in the +above (fairly logical) order. + +** =define-TAL= +:PROPERTIES: +:CUSTOM_ID: define-tal +:END: + +#+begin_src scheme + (define-TAL (name args ...) /key/ string-or-filename) + + (name args ...) := a definition for a function and parameters + similar to `def` + + /key/ := Optional, default `string:`, can also be file: + + string-or-filename := A literal string if the key is string: + A filename relative to the source if key is file: +#+end_src + + A function that outputs *HTML* makes up a portion of a web + application. Using =define-TAL= makes it easy to use a block of + *HTML/XHTML* as a function. + + #+begin_src scheme + (import :std/markup/tal :std/format :std/text/utf8 :std/sugar) + + (define-TAL (htmlist items) + "

    • + List Item
    ") + #+end_src + + That gives a function that prints the *HTML* to + =(current-tal-output-port)=. + + #+begin_src scheme + > (htmlist ["These" "are" "list" "items"]) +
    • i) These
    • ii) are
    • iii) list
    • iv) items
    + #+end_src + + We can get rid of the nested quotes by using the =#<<= syntax and + hide the ="='s with a quick definition. + + #+begin_src scheme + (def (fmt sym . args) (apply format (symbol->string sym) args)) + (define-TAL (foo item) #< + EOF + ) + > (foo 42) +

    'item:42'

    > + #+end_src + +Even better is the =file:= argument that pulls from a file. + +The following is placed in =foo.html= + +And the gerbil code is simple. + +#+begin_src html :tangle foo.html + + <title></head> + <body><p tal:replace="raw: (body)"></p></body> + </html> +#+end_src + +#+begin_src scheme + (define-TAL (foo.html title body) file: "foo.html") + (define-TAL (bar) "<hr>") +#+end_src + +We can then use it creatively. + +#+begin_src scheme +> (foo.html "Title <hr>" bar) +<html> + <head><title>Title <hr> +
    + +#+end_src + +As you can see it enables a fairly easy interaction between *HTML* +syntax and *Lisp*. + +** current-tal-output-port +:PROPERTIES: +:CUSTOM_ID: current-tal-output-port +:END: + +Output to where? The =current-tal-output-port= parameter of course! + +Most of the time it will be over a socket but for testing purposes +we'll tear it down to a string. + +#+begin_src scheme + (defrule (:> tal ...) + (let ((u8v (call-with-output-u8vector + #u8() (lambda (p) (parameterize ((current-tal-output-port p)) + tal ...))))) + (utf8->string u8v))) +#+end_src + +Now we can see what it outputs and have a testable form as well. + +#+begin_src scheme + > (:> (foo.html "Inside :>" bar)) + "\n Inside :>\n
    \n\n" +#+end_src + +As well as have a more documentation friendly output. + +#+begin_src html +> (display #) + + Inside :> +
    + +#+end_src + +** tal:attributes : Replace element attributes +:PROPERTIES: +:CUSTOM_ID: talattributes--replace-element-attributes +:END: + +/Syntax/: + +#+begin_src bnf +argument ::= attribute_statement [attribute_statement]* +attribute_statement ::= ( attribute_name expression ) +attribute_name ::= Name +#+end_src + +The =tal:attributes= statement is a way of setting the +=attribute_name= to the value of =expression=. + +#+begin_src scheme + (define-TAL (lnk href text) #< text + EOF + ) +#+end_src + +Works as expected + +#+begin_src scheme +> (:> (lnk "https://duckduckgo.com" "Search")) +"Search +#+end_src + +If the =expression= evaluates to =#f= the attribute is omitted. + +#+begin_src scheme + > (:> (lnk #f "No Anchor!")) + "No Anchor!" +#+end_src + +If the =tal:attributes= statement is on an element with a =tal:repeat= +statement, the replacement is made on each repetition of the element, +and the replacement expression is evaluated fresh for each repetition. + +#+begin_src scheme + (define-TAL (sel items) #< + +#+end_src + +If you use =tal:attributes= on an element with an active =tal:replace= +command, the =tal:attributes= statement is ignored because of the +order of operations. + +It can, of course, set more than one attribute. + +#+begin_src scheme + (define-TAL (att-textarea (rows 80) (cols 20)) #< + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (att-textarea)) +"" +> (:> (att-textarea 10 42)) +"" +#+end_src + +** tal:condition : Conditionally insert or remove an element +:PROPERTIES: +:CUSTOM_ID: talcondition--conditionally-insert-or-remove-an-element +:END: + +/Syntax/ + +#+begin_src bnf + argument ::= expression +#+end_src + +The =tal:condition= statement includes the statement element in the +template only if the expression evaluates to a value that's not =#f= +and omits it otherwise. + +#+begin_src scheme + (define-TAL (p-when value) #< P!

    + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (p-when "Lorum Ipsum")) +"

    Lorum Ipsum

    " +> (:> (p-when #f)) +"" +#+end_src + +It can be used for alternate conditions. + +#+begin_src scheme + (define-TAL (p-if) #< +

    Even

    +

    Odd

    + + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (p-if)) +"
    \n

    Even

    \n\n
    \n\n

    Odd

    \n
    \n

    Even

    \n\n
    \n\n

    Odd

    \n
    +#+end_src + +That's a good example of why "in-tag indentation" can be important. + +#+begin_src html +> (display #) +
    +

    Even

    + +
    + +

    Odd

    +
    +

    Even

    + +
    + +

    Odd

    +
    +#+end_src + +Outside of the =tal:condition= but inside the =tal:repeat= are those +newlines. Makes it nice to read but adds things that could mess up the +display and really are not needed. + +#+begin_src scheme + (define-TAL (p-if-in-tag) #< +

    Even

    Odd

    + + EOF + ) +#+end_src + +That gives us something "nicer". + +#+begin_src scheme +> (:> (p-if-in-tag)) +"
    \n

    Even

    \n
    \n

    Odd

    \n
    \n

    Even

    \n
    \n

    Odd

    \n
    " +#+end_src + +Which kinda looks like what I'm trying to portray. + +#+begin_src html + > (display #) +
    +

    Even

    +
    +

    Odd

    +
    +

    Even

    +
    +

    Odd

    +
    +#+end_src + + +** tal:content : Replace the content of an element +:PROPERTIES: +:CUSTOM_ID: talcontent--replace-the-content-of-an-element +:END: + +/Syntax/ + +#+begin_src bnf +argument ::= (['text:'] | 'as-is:' | 'raw:') expression +#+end_src + + +You can insert =text:= or =as-is:= in place of its children with the +=tal:content= statement. The statement argument is exactly like that of +=tal:replace=, and is interpreted in the same fashion. + +If the expression evaluates to =#f= , the statement element is left +childless. f the expression evaluates to default, then the element’s +contents are unchanged. + +#+begin_src scheme + (define-TAL (div-content cnt) #< Default content here + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (div-content default:)) +"
    Default content here
    " +> (:> (div-content "New Content")) +"
    New Content
    " +> (:> (div-content #f)) +"
    " +#+end_src + +The default replacement behavior is =text:= which replaces +angle-brackets and ampersands with their HTML entity equivalents. + +#+begin_src scheme + (define-TAL (div-text-content cnt) #< Default content here + EOF + ) +#+end_src + + +#+begin_src scheme + > (let (txt "Content in a
    ") + [(:> (div-content txt)) (:> (div-text-content txt))]) + ("
    Content in a <div/>
    " + "
    Content in a <div/>
    ") +#+end_src + +The =as-is:= keyword passes the replacement text through unchanged +allowing HTML/XML markup to be inserted. This can break your page if +the text contains unanticipated markup (e.g.. text submitted via a web +form), which is the reason that it is not the default. + + +#+begin_src scheme + (define-TAL (div-html-content cnt) #< Default content here
    + EOF + ) +#+end_src + +The =default:= still works. + +#+begin_src scheme +> (:> (div-html-content default:)) +"
    Default content here
    " +#+end_src + +Finally the =raw:= keyword doesn't do anything with the expression +beyond run it. + +#+begin_src scheme + (define-TAL (div-raw-content cnt) #< Default content here + EOF + ) +#+end_src +#+begin_src scheme +> (:> (div-raw-content default:)) +"
    " +#+end_src + +Running something which outputs to =current-tal-output-port= will do +the right thing, + + +#+begin_src scheme + (define-TAL (div-proc-content cnt) #< + EOF + ) +#+end_src + +#+begin_src scheme + > (:> (div-proc-content + (lambda () + (div-text-content "esc:
    ") + (div-html-content "hr:
    ")))) + "
    esc: <hr>
    hr:
    " +#+end_src + + + +** =tal:define= +:PROPERTIES: +:CUSTOM_ID: taldefine +:END: + +The =tal:define= command either wraps a =with*= around the tag (by +default or with the =local:= keyword) and/or =set!='ing things with +the =set!:= keyword. + + +#+begin_src scheme + (define-TAL (let-and-set x y) #< +

    Number?

    +

    The Answer?

    +

    We can set as well Bar =

    + +

    Setting is what you expect:

    + + EOF + ) +#+end_src + + + +#+begin_src scheme +> (:> (let-and-set 1 2)) +"
    \n +

    Number? 42

    \n

    The Answer? Yes!

    \n

    We can set as well Bar = 42

    \n\n

    Setting is what you expect: 42

    \n
    " +#+end_src +#+begin_src scheme + > (display (html-strip #)) + + Number? 42 + The Answer? Yes! + We can set as well Bar = 42 + + Setting is what you expect: 42 +#+end_src + + +#+begin_src scheme +> (:> (let-and-set 1 3)) +"
    \n

    Number? 63

    \n

    The Answer? No

    \n

    We can set as well Bar = 42

    \n\n

    Setting is what you expect: 42

    \n
    " +#+end_src +#+begin_src scheme +> (display (html-strip #)) + + Number? 63 + The Answer? No + We can set as well Bar = 42 + + Setting is what you expect: 42 + > +#+end_src + +** tal:switch : Set up a switch statement +:PROPERTIES: +:CUSTOM_ID: talswitch--set-up-a-switch-statement +:END: + +If everything is testing the same item, and only one can succeed, a +bunch of conditionals gets hairy. There's a =tal:switch= and some +=tal:case= statements to round it up. + +#+begin_src scheme + (define-TAL (switch-case item) #< + This is why I did not use cond or if. +

    We've got foo!

    Because where would this go? +

    Else is working

    + + EOF + ) + +#+end_src + +#+begin_src scheme + (define-TAL (switch-case item) #< + This is why I did not use cond or if. +

    We've got foo!

    Because where would this go? +

    Else is working

    + + EOF + ) + > (:> (switch-case 'asd)) + "
    \n This is why I did not use cond or if.\n Because where would this go? \n

    Else is working

    \n
    " + > (:> (switch-case 'foo)) + "
    \n This is why I did not use cond or if.\n

    We've got foo!

    Because where would this go? \n \n
    " + > + +#+end_src + + +** tal:omit-tag : Remove an element leaving its contents +:PROPERTIES: +:CUSTOM_ID: talomit-tag--remove-an-element-leaving-its-contents +:END: + +/Syntax/ + +#+begin_src bnf +argument ::= [ expression ] +#+end_src + + +The =tal:omit-tag= statement leaves the contents of an element in +place while omitting the surrounding start and end tags. + +If the expression evaluates to =#f= then normal processing of the +element continues and the tags are not omitted. If the expression +evaluates to a true value, or no expression is provided, the statement +element is replaced with its contents. + +#+begin_src scheme + (define-TAL (notag) #< Just The text! and a link

    + EOF + ) + + (define-TAL (maybe-tag val) #< Is this a Paragraph? Who knows!

    + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (notag)) +" Just The text! and a link " +> (:> (maybe-tag #f)) +" Is this a Paragraph? Who knows!" +> (:> (maybe-tag 42)) +"

    Is this a Paragraph? Who knows!

    " +#+end_src + +** current-tal:on-error and tal:on-error. +:PROPERTIES: +:CUSTOM_ID: current-talon-error-and-talon-error +:END: + +Handling errors in a decent way is built into our =TAL= be +default. This is, from experience, made to make most of the page work +if there is an unwanted and unseen error. + +To update the "outside" error handler outside of the =TAL= form/file +there is a =current-tal:on-error=. For "inside" use the =tal:on-error= +attribute is very useful. + +*** Default and current-tal:on-error +:PROPERTIES: +:CUSTOM_ID: default-and-current-talon-error +:END: + +By default the form that errors will write the error message prefixed +with =ERROR:= in place of what is most likely its contents. + +#+begin_src scheme +(define-TAL (test-no-on-error thunk) #< +
  • + +EOF +) +#+end_src + +In running it we can see it still runs and does not mess up the page +that much. + +#+begin_src scheme +> (:> (test-no-on-error (cut error "This is the error message: "))) +"
      \n
    • ERROR: This is the error message: <escaped>
    • \n
    " +#+end_src + +We can change it. + +#+begin_src scheme + > (:> (parameterize ((current-tal:on-error + (lambda (e) '(log-error e) + (tal:write "Nothing wrong here!")))) + (test-no-on-error (cut error "Something Wrong!")))) + "
      \n
    • Nothing wrong here!
    • \n
    " +#+end_src + +But in reality that abstraction's just there so pages still run with +bugs in them. Even better for all involved is the =tal:on-error= +attribute. + +*** tal:on-error +:PROPERTIES: +:CUSTOM_ID: talon-error +:END: + +/Syntax/ + +#+begin_src bnf + argument ::= (['text:'] | 'as-is:' | 'raw:' | 'ignore:' | 'ignore') expression +#+end_src + + +For a more precise handling of errors the =tal:on-error= +catcher/handler makes it quite easy. When a =TAL Statement= produces +an error if there is a =tal:on-error= on the element or any parent +element the error is caught at that point and handled according to the +expression. + +The first three keywords are treated the same as =tal:content= and on +error the element becomes one of those. + +#+begin_src scheme + ;; No keyword is the same as `text:` + (define-TAL (test-got-error thunk) #< +
  • + + EOF + ) + +#+end_src + +The result differs from the default catcher. + +#+begin_src scheme + > (:> (test-got-error (lambda () "Nice! No error"))) + "
      \n
    • Nice! No error
    • \n
    " + > (:> (test-got-error (cut error "error here"))) + "
      Got an Error!
    " +#+end_src + +Because we catch it on the =
      = the handler does not give us the +=
    • = wrapper and it breaks the valid HTML! We did that on purpose, +of course, and that's the idea behind a much more specific catcher. + + +#+begin_src scheme + (define-TAL (test-got-li-error thunk) #< +
    • +
    + EOF + ) +#+end_src + +That allows us to be much more clinical. + +#+begin_src scheme +> (:> (test-got-li-error (cut error "error here"))) +"
    • Got an Error!
    " +#+end_src + +But these are errors and though informing the is always a good idea +perhaps we also want to handle it outside of the tal forms. + +For that reason the =err= identifier is bound to the exception object +within the =tal:on-error= statement. + +#+begin_src scheme + (define-TAL (error-li) "
  • Got an Error!
  • ") + (def err-log []) + (def (log-err err) (set! err-log (cons err err-log))) + (def (handle-ul-error err) (log-err err) (error-li)) +#+end_src + +#+begin_src scheme + (define-TAL (test-handle-ul-error thunk) #< +
  • + + EOF + ) +#+end_src + +#+begin_src scheme +> (length err-log) +0 +> (:> (test-handle-ul-error (cut error "asd"))) +"
    • Got an Error!
    " +> (length err-log) +1 +#+end_src + +But that may raise the question of: why we need an unordered list that +is an error? + +That that there's the =ignore= and =ignore:= arguments. + + +#+begin_src scheme + (define-TAL (test-ignore-error thunk) #< +
  • + + EOF + ) +#+end_src + +Now there's no =
      = tag if it errors! + +#+begin_src scheme +> (:> (test-ignore-error (lambda () "LI here!"))) +"
        \n
      • LI here!
      • \n
      " +> (:> (test-ignore-error (cut error "No UL here!"))) +"" +#+end_src + +Or, like, if we actually want something that's not an unordered list, +we can do that as well. + + +#+begin_src scheme + (define-TAL (error-div err) #< + EOF + ) + + (define-TAL (test-ignore-div-error thunk) #< +
    • +
    + EOF + ) +#+end_src + +#+begin_src scheme +> (length err-log) +1 +> (:> (test-ignore-div-error (lambda () "No Error"))) +"
      \n
    • No Error
    • \n
    " +> (length err-log) +1 +> (:> (test-ignore-div-error (cut error "Got div"))) +"
    Error Here!
    " +> (length err-log) +2 +#+end_src + + +** tal:repeat : Repeat an element +:PROPERTIES: +:CUSTOM_ID: talrepeat--repeat-an-element +:END: + +/Syntax/ + +#+begin_src bnf +argument ::= '(' variable-name expression ')' +variable-name ::= Identifier +#+end_src + +The =tal:repeat= statement replicates a sub-tree of your document once +for each item in a sequence. The expression should evaluate to +anything acceptable for =:std/iter= to repeat. + +#+begin_src scheme + (define-TAL (test-b-repeat thing) #<
    + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (test-b-repeat '(1 2 3))) +"123" +> (:> (test-b-repeat "asd")) +"asd" +> (:> (test-b-repeat #(v e c))) +"vec" +#+end_src + + +If the iterator is empty then the statement element is deleted, +otherwise it is repeated for each value sequentially. + +#+begin_src scheme +> (:> (test-b-repeat '())) +"" +> (:> (test-b-repeat "")) +"" +#+end_src + +The =variable-name= is used to define a local variable and a +=repeat/variable-name= for a =Repeat= interface variable. For each +repetition, the local variable is set to the current sequence element, +and the repeat variable is set to an interface around the iteration +object. + +*** The Repeat Interface +:PROPERTIES: +:CUSTOM_ID: the-repeat-interface +:END: + +You use the =Repeat= interface to access information about the current +repetition (such as the repeat index). The repeat interface has the +same name as the local variable prefixed with =repeat/= and has the +following methods. + + - index :: repetition number, starting from zero. + + - number :: repetition number, starting from one. + + - even? :: true for even-indexed repetitions (0, 2, 4, …). + + - odd? :: true for odd-indexed repetitions (1, 3, 5, …). + + - start? :: true for the starting repetition (index 0). + + - end? :: true for the ending, or final, repetition. + + - letter :: repetition number as a lower-case letter: “a” - “z”, + “aa” - “az”, “ba” - “bz”, …, “za” - “zz”, “aaa” - “aaz”, and so + forth. + + - Letter :: upper-case version of =letter=. + + - roman :: repetition number as a lower-case roman numeral: “i”, + “ii”, “iii”, “iv”, “v”, etc. + + - Roman :: upper-case version of =roman=. + + +Iterating over a sequence: + +#+begin_src scheme + (define-TAL (rep seq) #< + +

    + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (rep '(foo bar baz))) +"

    \n foo\n

    \n bar\n

    \n baz\n

    " +#+end_src + +Inserting a sequence of table rows, and using the repeat variable to number the rows: + +#+begin_src scheme + (def desc car) + (def price cdr) + + (define-TAL (checkout-table cart) #< + + 1 + Widget + $1.50 + + + EOF + ) +#+end_src + +#+begin_src scheme +> (:> (checkout-table '(("Soilent Green" . "$People") ("Napkins" . "$42.00")))) +"\n \n \n \n \n \n \n \n \n \n
    1Soilent Green$People
    2Napkins$42.00
    " +#+end_src + +That's better to see displayed. + +#+begin_src scheme +> (display #) + + + + + + + + + + +
    1Soilent Green$People
    2Napkins$42.00
    +#+end_src + +Nested repeats: + + +#+begin_src scheme + (define-TAL (nested-repeats rows cols) #< + + + + 1 * 1 = 1 + + + + + EOF + ) +#+end_src + + +#+begin_src scheme + > (:> (nested-repeats '(1 2 3) #(4 6 5))) +"\n \n \n \n \n \n \n \n
    \n 1 * 1 = 1\n \n 1 * 2 = 2\n \n 1 * 3 = 3\n
    \n 2 * 1 = 2\n \n 2 * 2 = 4\n \n 2 * 3 = 6\n
    \n 3 * 1 = 3\n \n 3 * 2 = 6\n \n 3 * 3 = 9\n
    " +#+end_src + +That's also nice to see in long form. +#+begin_src scheme +> (display #) + + + + + + + + +
    + 1 * 1 = 1 + + 1 * 2 = 2 + + 1 * 3 = 3 +
    + 2 * 1 = 2 + + 2 * 2 = 4 + + 2 * 3 = 6 +
    + 3 * 1 = 3 + + 3 * 2 = 6 + + 3 * 3 = 9 +
    +#+end_src + + diff --git a/src/std/markup/sxml/tal/expander.ss b/src/std/markup/sxml/tal/expander.ss new file mode 100644 index 000000000..b4cc318eb --- /dev/null +++ b/src/std/markup/sxml/tal/expander.ss @@ -0,0 +1,254 @@ +(import :std/misc/alist + :std/markup/sxml/tal/toplevel + :std/markup/sxml/html/parser :std/format) +(export #t) + +(def (fmt sym . args) (apply format (symbol->string sym) args)) + +(def tal-end + (lambda (tag attrs parent-seed seed v?) + ;;(displayln "Got " tag seed " and pssed" parent-seed) + (if (tal-attrs? attrs) + (let (el (expand-tal-element tag attrs [] seed v?)) + [(cut push-toplevel-element el) ;] + (identity parent-seed) ...]) + (html-end tag attrs parent-seed seed v?)))) + +(def (html->tal-form html) (sxml->tal-form (html->sxml html end: tal-end))) + +(def (tal-attr? attr attrs) + (let (alist (member attr attrs (lambda (a b) (eq? a (and (pair? b) (car b)))))) + (and alist (car alist)))) + +(def (expand-tal-element tag attrs parent-seed seed v?) + (def els [[tal:define? . expand-tal:define] + [tal:switch? . expand-tal:switch] + [tal:condition? . expand-tal:condition] + [tal:repeat? . expand-tal:repeat] + [tal:case? . expand-tal:case] + [tal:content? . expand-tal:content] + [tal:replace? . expand-tal:replace] + [tal:attributes? . expand-tal:attributes] + [tal:omit-tag? . expand-tal:omit-tag] + [tal:on-error? . expand-tal:on-error]]) + (def (tal?) + (let lp ((els els)) + (if (null? els) #f + (with* (([this rest ...] els) + ([t? . exp] this)) + (if (t? attrs) exp (lp rest)))))) + (cond ((tal?) => (cut <> tag attrs parent-seed seed v?)) + (else + (let (tag (html-end tag attrs parent-seed seed v?)) + (sxml->tal-form tag))))) + +(def (tal:define? attrs) (tal-attr? 'tal:define attrs)) + +(def (expand-tal:define tag attrs parent-seed seed virtual?) + (def attr (tal:define? attrs)) + (with* (([_ bindings] attr) (globals []) (locals []) + (in (open-input-string bindings))) + (def (push-g b) (set! globals (cons b globals))) + (def (push-l b) (set! locals (cons b locals))) + + (let lp () + (let* ((form (read in)) + (binding (if (keyword? form) (read in) form))) + ;(displayln form) + (unless (eof-object? binding) + (if (eq? form set!:) + (push-g binding) + (push-l binding)) + (lp)))) + ['tal:form + (if (null? globals) globals + [(cons 'begin (map (cut cons 'set! <>) globals))]) + ... + (if (null? locals) + (expand-tal-element tag (remove1 attr attrs) parent-seed seed virtual?) + ['with* (reverse locals) + (expand-tal-element tag (remove1 attr attrs) parent-seed seed virtual?)])])) + +(def (tal:switch? attrs) (tal-attr? 'tal:switch attrs)) +(def (tal:case? attrs) (tal-attr? 'tal:case attrs)) + +(def (expand-tal:switch tag attrs parent-seed seed v?) + (def attr (tal:switch? attrs)) + (with* (([_ str] attr) (in (open-input-string str)) (switch (read in))) + ['tal:switch switch (expand-tal-element tag (remove1 attr attrs) parent-seed seed v?)])) + +(def (expand-tal:case tag attrs parent-seed seed v?) + (def attr (tal:case? attrs)) + ;; (displayln "ws" (call-with-output-string (cut write parent-seed <>)) "seed" seed) + (with* (([_ str] attr) (in (open-input-string str)) (case (read in))) + ['tal:case + case (expand-tal-element tag (remove1 attr attrs) parent-seed seed v?)])) + +(def (tal:condition? attrs) (tal-attr? 'tal:condition attrs)) +(def (expand-tal:condition tag attrs parent-seed seed v?) + (def attr (tal:condition? attrs)) + ;; (displayln attrs: attrs " ws" (call-with-output-string (cut write parent-seed <>)) "seed" seed) + (with* (([_ str] attr) (in (open-input-string str)) (case (read in))) + ['when case (expand-tal-element tag (remove1 attr attrs) parent-seed seed v?)])) + +(def (tal:repeat? attrs) (tal-attr? 'tal:repeat attrs)) +(def (expand-tal:repeat tag attrs ps seed v?) + (def attr (tal:repeat? attrs)) + (set! attrs (remove1 attr attrs)) + (with* (([_ str] attr) + ([var expression] (read (open-input-string str))) + (var.repeat (string->symbol + (string-append (symbol->string var) ".repeat"))) + (repeat/var (string->symbol + (string-append "repeat/" (symbol->string var))))) + `(let (,var.repeat (tal:repeat ,expression)) + (using (,repeat/var ,var.repeat : Repeat) + (let tal:loop () + (unless (Repeat-end? ,repeat/var) + (let (,var (tal:repeat-next! ,var.repeat)) + ,(expand-tal-element tag attrs ps seed v?) + (tal:loop)))))))) + +(def (tal:content? attrs) (tal-attr? 'tal:content attrs)) + +(def (expand-tal:content tag attrs parent-seed seed virtual?) + (def cnt (tal:content? attrs)) + (with* (([_ content] cnt) + (type text:) + (body []) + (p (open-input-string content))) + (let lp ((bdy [])) + (def form (read p)) + (if (and (null? bdy) (keyword? form)) + (begin (if (member form [text: as-is: raw:]) + (set! type form) + (error "Unknown content type" form)) + (lp [])) + (if (eof-object? form) + (set! body (reverse bdy)) + (lp (cons form bdy))))) + + (when (null? body) (set! body [""])) + + (set! body (cons 'begin body)) + + (unless (eq? type raw:) + (set! body + `(let ((%body ,body)) + (if (eq? %body default:) + ,(sxml->tal-form (reverse seed)) + ,['tal:write (if (eq? type as-is:) '%body + `(html-escape (tal:stringify %body)))])))) + + + (expand-tal-element + tag (remove1 cnt attrs) parent-seed + [(cut push-toplevel-element body)] virtual?))) + +(def (tal:replace? attrs) (tal-attr? 'tal:replace attrs)) + +(def (expand-tal:replace tag attrs parent-seed seed virtual?) + (def attr (tal:replace? attrs)) + (with* (([_ content] attr) (type text:) (body []) + (p (open-input-string content))) + (let lp ((bdy [])) + (def form (read p)) + (if (and (null? bdy) (keyword? form)) + (begin (if (member form [text: as-is: raw:]) + (set! type form) + (error "Unknown content type for replace" attr)) + (lp [])) + (if (eof-object? form) + (set! body (reverse bdy)) + (lp (cons form bdy))))) + + (when (null? body) (set! body [""])) + + (set! body (cons 'begin body)) + (unless (eq? type raw:) + (set! body ['tal:write (if (eq? type as-is:) body + `(html-escape (tal:stringify ,body)))])) + + body)) + +(def (tal:on-error? attrs) (tal-attr? 'tal:on-error attrs)) + +(def (expand-tal:on-error tag attrs parent-seed seed virtual?) + (def attr (tal:on-error? attrs)) + (with* (([_ handler] attr) (type text:) (body []) + (p (open-input-string handler))) + (let lp ((bdy [])) + (def form (read p)) + (if (and (null? bdy) + (or (keyword? form) (eq? form 'ignore))) + (begin (if (member form [text: as-is: raw: ignore: 'ignore]) + (set! type form) + (error "Unknown content type for replace" attr)) + (lp [])) + (if (eof-object? form) + (set! body (reverse bdy)) + (lp (cons form bdy))))) + ;;(displayln "Type: " type " body: " body) + + (when (null? body) (set! body [""])) + + (set! body (cons 'begin body)) + (unless (member type [raw: 'ignore ignore:]) + (set! body ['tal:write (if (eq? type as-is:) body + `(html-escape (tal:stringify ,body)))])) + `(try + (let (u8v (call-with-output-u8vector + #u8() (lambda (p) (parameterize ((current-tal-output-port p)) + (parameterize ((current-tal-catcher #t)) + ,(expand-tal-element + tag (remove1 attr attrs) parent-seed seed virtual?)))))) + (write-u8vector u8v (current-tal-output-port))) + (catch (err) + ,(if (member type ['ignore ignore:]) + body + (expand-tal-element + tag (remove1 attr attrs) parent-seed + [(cut push-toplevel-element body)] + virtual?)))))) + +(def (tal:attributes? attrs) (tal-attr? 'tal:attributes attrs)) + +(def (expand-tal:attributes tag attrs parent-seed seed virtual?) + (def _attrs (tal:attributes? attrs)) + (set! attrs (remove1 _attrs attrs)) + ;; (error _attrs) + (with* (([_ attrs-str] _attrs) + (alst (read (open-input-string + (string-append "(" attrs-str ")"))))) + ;;(displayln "attrs:" attrs alst) + (if (null? attrs) (set! attrs alst) + (for-each (lambda (kv) (with ([k . v] kv) (aset! attrs k v))) + alst)) + ['tal:html + ['quasiquote [tag + ['@ (map (lambda (kv) `(,(car kv) ,['unquote (cadr kv)])) attrs) ...] + ['unquote + `(lambda () + (begin0 [] + (push-toplevel-element + (call-with-output-u8vector + #u8() (lambda (p) + (parameterize((current-tal-output-port p)) + ,(sxml->tal-form (reverse seed))))))))]]]])) + +(def (tal:omit-tag? attrs) (tal-attr? 'tal:omit-tag attrs)) + +(def (expand-tal:omit-tag tag attrs parent-seed seed virtual?) + (def attr (tal:omit-tag? attrs)) + ;; (error _attrs) + (with* (([_ str] attr) + (in (open-input-string + (string-append "(" str ")"))) + (cnd (read in))) + + (if (null? cnd) + (sxml->tal-form (reverse seed)) + ['if (cons 'begin cnd) + (sxml->tal-form (reverse seed)) + (expand-tal-element + tag (remove1 attr attrs) parent-seed seed virtual?)]))) diff --git a/src/std/markup/sxml/tal/iter.ss b/src/std/markup/sxml/tal/iter.ss new file mode 100644 index 000000000..0e1c33a9c --- /dev/null +++ b/src/std/markup/sxml/tal/iter.ss @@ -0,0 +1,102 @@ +(import :std/interface :std/contract :std/iter :std/generic :std/markup/sxml/tal/toplevel) +(export #t) +(declare (fixnum)) + +(defstruct tal:repeat (iter next-item index) + constructor: :init! transparent: #t) + +(defmethod {:init! tal:repeat} + (lambda (self seq) + (def itr (iter seq)) + (using (self :- tal:repeat) + (set! self.index -1) + (set! self.iter itr) + (set! self.next-item (iter-next! itr))))) + +(def (tal:repeat-next! self) + (if (iterator? self) (set! self (iterator-e self))) + (using (self : tal:repeat) + (def item self.next-item) + (set! self.index (1+ self.index)) + (set! self.next-item (iter-next! self.iter)) + item)) + +(defmethod (:iter (self tal:repeat)) (make-iterator self tal:repeat-next!)) + +(interface Repeat + (index) ;; repetition number, starting from zero. + (number) ;; repetition number, starting from one. + (even? ) ;; true for even-indexed repetitions (0, 2, 4, …). + (odd?) ;; true for odd-indexed repetitions (1, 3, 5, …). + (start?) ;; true for the starting repetition (index 0). + (end?) ;; true for the ending, or final, repetition. + ; (first?) ;; true for the first item in a group - see note below + ;(last?) ;; true for the last item in a group - see note below + ;; length- - length of the sequence, which will be the total number of repetitions " + (letter) ;; repetition number as a lower-case letter: “a” - “z”, + ;; “aa” - “az”, “ba” - “bz”, …, “za” - “zz”, “aaa” - + ;; “aaz”, and so forth. + + (Letter) ;; upper-case version of - letter- . + + (roman) ;; repetition number as a lower-case roman numeral: “i”, + ;; “ii”, “iii”, “iv”, “v”, etc. + + ;; upper-case version of - roman- . + (Roman)) + + +(defmethod {index tal:repeat} tal:repeat-index) +(defmethod {number tal:repeat} (lambda (r) (1+ (tal:repeat-index r)))) +(defmethod {even? tal:repeat} (lambda (r) (even? (tal:repeat-index r)))) +(defmethod {odd? tal:repeat} (lambda (r) (odd? (tal:repeat-index r)))) +(defmethod {start? tal:repeat} (lambda (r) (= 0 (tal:repeat-index r)))) +(defmethod {end? tal:repeat} (lambda (r) (eq? iter-end (tal:repeat-next-item r)))) +(def (integer->letters number (base-char #\a)) + (def bn (char->integer base-char)) + (list->string + (reverse + (let lp ((number number)) + (set! number (1- number)) + ;(displayln "Get Num:" number ) + (if (< number 0) [] + (cons (integer->char (+ bn (modulo number 26))) + (lp (floor (/ number 26))))))))) + +(defmethod {letter tal:repeat} + (lambda (r) (integer->letters (1+ (tal:repeat-index r))))) + +(defmethod {Letter tal:repeat} + (lambda (r) (integer->letters (1+ (tal:repeat-index r)) #\A))) + +(def roman-decimal + '(("M" . 1000) + ("CM" . 900) + ("D" . 500) + ("CD" . 400) + ("C" . 100) + ("XC" . 90) + ("L" . 50) + ("XL" . 40) + ("X" . 10) + ("IX" . 9) + ("V" . 5) + ("IV" . 4) + ("I" . 1))) + +(def (integer->roman value) + (apply string-append + (let loop ((v value) + (decode roman-decimal)) + (let ((r (caar decode)) + (d (cdar decode))) + (cond + ((= v 0) '()) + ((>= v d) (cons r (loop (- v d) decode))) + (else (loop v (cdr decode)))))))) + +(defmethod {roman tal:repeat} + (lambda (r) (string-downcase (integer->roman (1+ (tal:repeat-index r)))))) + +(defmethod {Roman tal:repeat} + (lambda (r) (integer->roman (1+ (tal:repeat-index r))))) diff --git a/src/std/markup/sxml/tal/parser.ss b/src/std/markup/sxml/tal/parser.ss new file mode 100644 index 000000000..c1855b6d9 --- /dev/null +++ b/src/std/markup/sxml/tal/parser.ss @@ -0,0 +1 @@ +(def (tal:repeat? attrs) (tal-attr? 'tal:repeat attrs)) diff --git a/src/std/markup/sxml/tal/syntax.ss b/src/std/markup/sxml/tal/syntax.ss new file mode 100644 index 000000000..e0769ec20 --- /dev/null +++ b/src/std/markup/sxml/tal/syntax.ss @@ -0,0 +1,130 @@ +(import :std/sugar (for-syntax :std/text/utf8) :std/text/utf8 + (for-syntax :std/error) :std/error + (for-syntax :std/misc/ports) :std/misc/ports + (for-syntax :std/source) :std/source + (for-syntax :gerbil/runtime/syntax) + :std/markup/sxml/html/parser :std/markup/sxml/tal/toplevel + (for-syntax :std/markup/sxml/tal/expander) :std/markup/sxml/tal/expander) +(export #t) +(def current-tal-output-port (make-parameter (current-output-port))) + +(defrule (tal:write thing) + (let ((t thing) + (p (current-tal-output-port))) + (if (u8vector? t) (write-u8vector t p) + (if (string? t) (write-u8vector (string->utf8 t) p) + (display t p))) + "")) + +(defrule (tal:stringify thing) + (let ((str thing)) + (if (string? str) str + (if (not str) "" (with-output-to-string "" (cut display str)))))) + +(defstruct tal:switch-value (e)) +(def current-tal-switch (make-parameter #f)) + +(defsyntax (tal:switch-test stx) + (syntax-case stx () + ((_ (value test: test)) + #'(let* ((ts (current-tal-switch)) + (tv (and (tal:switch-value? ts) + (tal:switch-value-e ts)))) + (and ts (test tv value)))) + ((macro else:) #'(macro (#t test: (lambda _ #t)))) + ((macro value) #'(macro (value test: equal?))))) + +(defrule (tal:switch exp body ...) + (parameterize ((current-tal-switch (make-tal:switch-value exp))) + body ...)) + +(defrule (tal:case exp body ...) + (when (tal:switch-test exp) + (current-tal-switch #f) + body ...)) + +(defrule (tal:html sxml) + (tal:write + (u8vector-concatenate + (map (lambda (x) + (cond + ((u8vector? x) x) + ((string? x) (string->utf8 x)) + (else (call-with-output-u8vector #u8() (cut write x <>))))) + (cdr (sxml->tal-form sxml)))))) + +(def current-tal-catcher (make-parameter #f)) + +(extern namespace: #f RuntimeException-exception) +(def (tal-error-to-string e) + (cond ((RuntimeException? e) + (tal-error-to-string (RuntimeException-exception e))) + (else (or (error-message e) + (call-with-output-string + "" + (lambda (p) (parameterize ((current-error-port p)) + (display-exception e)))))))) + +(def current-tal:on-error + (make-parameter + (lambda (e) (tal:write (html-escape (string-append + "ERROR: " (tal-error-to-string e))))))) + +(defsyntax (tal:form stx) + (syntax-case stx (tal:form) + ((_ out rest ...) + (stx-string? #'out) + (let* ((str (stx-e #'out)) + (vec (string->utf8 str))) + (with-syntax ((u8v vec)) + #'(begin (write-u8vector u8v (current-tal-output-port)) + (tal:form rest ...))))) + ((_ (tal:form nest ...) rest ...) + #'(tal:form nest ... rest ...)) + ((_ form rest ...) + #'(begin ;;(tal:write "Where are we") (displayln 'form) + (let (tal-raise? (current-tal-catcher)) + (try + (let (u8v (call-with-output-u8vector + #u8() (lambda (p) (parameterize ((current-tal-output-port p)) + form)))) + (write-u8vector u8v (current-tal-output-port))) + (catch (e) + (if tal-raise? (raise e) + ((current-tal:on-error) e))))) + (tal:form rest ...))) + ((_) #'(void)))) + +(defsyntax (define-TAL stx) + (syntax-case stx () + ((_ (name args ...) str) + (stx-string? #'str) + #'(define-TAL (name args ...) string: str)) + ((_ (name args ...) file: pathname) + (stx-string? #'pathname) + (let* ((file (stx-e #'pathname)) + (locat (stx-source stx)) + (con (##locat-container locat)) + (path (##container->path con)) + (dir (if path (path-directory path) (current-directory))) + (str (read-file-string (path-expand file dir)))) + (with-syntax ((syn str)) + #'(define-TAL (name args ...) string: syn)))) + ((macro (name args ...) string: str) + (let* ((nname (stx-e #'name)) + (aargs (stx-e #'(args ...))) + (mod (make-symbol (gensym) nname '::module)) + (exprt (make-symbol (gensym) nname '::proc))) + (datum->syntax + #'name + `(begin + (module ,mod + (export (rename-out #t (,nname ,exprt))) + (import :std/contract :std/interface :std/sugar :std/text/utf8 + :std/html (rename-in :std/markup/sxml/tal/iter (tal:repeat? tal-iter?)) + :std/markup/sxml/tal/toplevel :std/markup/sxml/tal/expander + :std/markup/sxml/tal/syntax) + (def (,nname ,@aargs) + ,(html->tal-form (stx-e #'str)))) + (import ,mod) + (def ,nname ,exprt))))))) diff --git a/src/std/markup/sxml/tal/toplevel.ss b/src/std/markup/sxml/tal/toplevel.ss new file mode 100644 index 000000000..d9594806c --- /dev/null +++ b/src/std/markup/sxml/tal/toplevel.ss @@ -0,0 +1,38 @@ +(import :std/markup/sxml/html/parser :std/srfi/1) +(export #t) + +(def current-toplevel (make-parameter #f)) +(def current-out-str (make-parameter #f)) + +(def (sxml->tal-form sxml) + (def top [html:]) + (def str (open-output-string "")) + ;; (display str) + (parameterize ((current-toplevel top) + (current-out-str str)) + (sxml->html sxml str) + (push-toplevel (get-output-string (current-out-str))) + (cons 'tal:form (reverse (cdr top)) #; + (concatenate (map (lambda (x) (if (list? x) x [x])) + ))))) +(def (push-toplevel thing) + (def top (current-toplevel)) + ;;(unless (string? thing) (displayln "pushing " thing)) + (match top + ([title . smrof] (set! (cdr top) (cons thing smrof))))) + + +(def (push-toplevel-element el) + (push-toplevel (get-output-string (current-out-str))) + (push-toplevel el) + []) + +(def (tal-attrs? attrs) + (let lp ((ats attrs)) + (if (null? ats) #f + (with ([name . rest] (car ats)) + (if (string-prefix? "tal:" (symbol->string name)) + #t + (lp (cdr ats))))))) + +(def html-end (pgetq end: default-html->sxml-plist)) diff --git a/src/std/markup/sxml/xml.ss b/src/std/markup/sxml/xml.ss new file mode 100644 index 000000000..75f9816ba --- /dev/null +++ b/src/std/markup/sxml/xml.ss @@ -0,0 +1,18 @@ +;;; -*- Gerbil -*- +;;; (C) vyzo at hackzen.org +;;; (C) me at drewc.ca +;;; XML interface + +(import :std/build-config + :std/markup/sxml/ssax + :std/markup/sxml/print) + +(export (import: :std/markup/sxml/ssax) + #t) + +(def (write-xml sxml (port (current-output-port))) + (write-sxml sxml xml?: #t port: port)) + +(def (print-sxml->xml sxml (port (current-output-port)) + indent: (maybe-indent 1)) + (write-sxml sxml xml?: #t port: port indent: maybe-indent)) diff --git a/src/std/markup/tal.ss b/src/std/markup/tal.ss new file mode 100644 index 000000000..2bdd305a3 --- /dev/null +++ b/src/std/markup/tal.ss @@ -0,0 +1,4 @@ +(import (only-in :std/markup/sxml/tal/syntax + define-TAL current-tal-output-port + current-tal:on-error tal:write)) +(export define-TAL current-tal-output-port current-tal:on-error tal:write) diff --git a/src/std/markup/xml.ss b/src/std/markup/xml.ss new file mode 100644 index 000000000..7d8453cd0 --- /dev/null +++ b/src/std/markup/xml.ss @@ -0,0 +1,14 @@ +;;; -*- Gerbil -*- +;;; (C) vyzo at hackzen.org +;;; XML interface + +(import :std/build-config + :std/markup/sxml/sxpath + :std/markup/sxml/sxml-inf + :std/markup/sxml/ssax + :std/markup/sxml/xml) +(export (import: + :std/markup/sxml/ssax + :std/markup/sxml/sxpath + :std/markup/sxml/sxml-inf + :std/markup/sxml/xml)) diff --git a/src/std/net/s3/api.ss b/src/std/net/s3/api.ss index 832efa05a..c077205b4 100644 --- a/src/std/net/s3/api.ss +++ b/src/std/net/s3/api.ss @@ -9,7 +9,7 @@ :std/net/uri :std/crypto/digest :std/text/hex - :std/xml + :std/markup/xml :std/error :std/sugar :std/srfi/19) diff --git a/src/std/net/smtp/README.org b/src/std/net/smtp/README.org index 4e2d39aef..dba13a4bd 100644 --- a/src/std/net/smtp/README.org +++ b/src/std/net/smtp/README.org @@ -1,4 +1,4 @@ -#+TITLE: SMTP: Simple Mail Tranfser Protocol +#+TITLE: SMTP: Simple Mail Transfer Protocol #+begin_src scheme :tangle ../smtp.ss (import ./smtp/api) (export (import: ./smtp/api)) @@ -8,7 +8,7 @@ :PROPERTIES: :EXPORT_FILE_NAME: ../../../../doc/reference/std/net/smtp.md :EXPORT_OPTIONS: toc:nil -:EXPORT_TITLE: SMTP: Simple Mail Tranfser Protocol +:EXPORT_TITLE: SMTP: Simple Mail Transfer Protocol :END: ** SMTP: Simple Mail Tranfser Protocol diff --git a/src/std/xml.ss b/src/std/xml.ss index 4a22d0bef..bd67c2b8b 100644 --- a/src/std/xml.ss +++ b/src/std/xml.ss @@ -2,12 +2,6 @@ ;;; (C) vyzo at hackzen.org ;;; XML interface -(import :std/build-config - :std/xml/ssax - :std/xml/sxpath - :std/xml/sxml - :std/xml/print) -(export (import: :std/xml/ssax - :std/xml/sxpath - :std/xml/sxml - :std/xml/print)) +;;; DEPRECATED: Please use :std/markup/xml +(import :std/markup/xml) +(export (import: :std/markup/xml))