Skip to content

Commit

Permalink
More Parsec Changes
Browse files Browse the repository at this point in the history
  • Loading branch information
drewc committed Jan 13, 2025
1 parent 416fed4 commit ec4ff99
Show file tree
Hide file tree
Showing 3 changed files with 1,051 additions and 119 deletions.
290 changes: 274 additions & 16 deletions src/std/parsec-test.ss
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
:std/error
:std/interactive
:srfi/13
:std/format
:std/parser/stream
:std/parser/base
:std/monad/interface
Expand All @@ -19,7 +20,8 @@
"parsec/char"
"parsec/syntax"
(only-in :std/sugar hash try)
(only-in :gerbil/core error-object? with-catch))
(only-in :gerbil/core error-object? with-catch)
:std/misc/plist)
(export parsec-test)
(begin-foreign (include "~~lib/_gambit#.scm"))
(defsyntax (test-inline stx)
Expand Down Expand Up @@ -51,7 +53,17 @@ This is an object: https://orgmode.org/worg/dev/org-element-api.html
* Heading 2
This is the end") (substring str 1 (string-length str))))
This is a paragraph.
#+NAME: test-block-key
#+begin_src scheme :tangle first-org-test.ss
(displayln '|This is a test|)
#+end_src
This is the second paragraph.
This is the end, also in a paragraph.") (substring str 1 (string-length str))))

;; (set-test-verbose! #f)
(def parsec-test
Expand Down Expand Up @@ -255,6 +267,14 @@ This is the end") (substring str 1 (string-length str))))
"fourty-two"))
() ;; null is one message that represent failure and what zero does by
;; default
> (def-parsec end-block (.>> (.many (.char #\space)) (.string "here")))
;; This should fail because `.or` is LL(1)
> (run-parser (do-parsec (.or end-block (.return []))) " her")
#f
;; This should pass because `.try` makes it LL(+inf.0)
> (run-parser (do-parsec (.or (.try end-block) (.return []))) " her")
()
> (run-parser (do-parsec (.or (.try end-block) (.return []))) " here")
> (defrule (u id body ...) (using (id (current-parser) :- ParsecCombinators) body ...))
> (caar ((u t (let (la #f)
(t.or
Expand Down Expand Up @@ -297,6 +317,18 @@ This is the end") (substring str 1 (string-length str))))
;#\a
;> (caar (u _ (_.run (test-try) "b ::=foo")))
;#\b
> (run-parser (u t (t.not (t.satisfy char-whitespace?)))
"asd")

#t
> (run-parser (u t (t.not (t.eof))) "a")
#t
> (run-parser (u t (t.not (t.eof)) (t.any-token)) "a")
#\a
> (Error? (run-parser (u t (t.not (t.eof))) "" #f))
#t
> (Error? (run-parser (u t (t.not (t.satisfy char-whitespace?))) " " #f))
#t
> (def token
(du (_ (make-parsecT) : ParsecChar)
c <- (_.letter)
Expand Down Expand Up @@ -371,7 +403,11 @@ This is the end") (substring str 1 (string-length str))))
> (run-parser KEYWORD "#+TITLE: Org Mode keyword!")
(keyword key: "TITLE" value: "Org Mode keyword!")
> (def-parsec-bnf
GENERIC-LINE ::= (.many-till (.any-token) EOL)
GENERIC-LINE ::=
kar <- (.any-token)
kdr <- (if (char=? #\newline kar)(.return [])
(.many-till (.any-token) EOL))
(.return (if (char=? #\newline kar) kdr (cons kar kdr)))

LINE-NO-TRY ::= (.or KEYWORD GENERIC-LINE))

Expand Down Expand Up @@ -417,7 +453,7 @@ This is the end") (substring str 1 (string-length str))))
> (run-parser HEADLINE " * bar")
#f
> (def-parsec (element p (indent? #t))
(do-parsec
#; (do-parsec
end <- (.or (.eof) (.return 42))
(if (eof-object? end) (.zero) (.return end)))
off <- (.xoff)
Expand All @@ -429,6 +465,9 @@ This is the end") (substring str 1 (string-length str))))
(.return [(car el) [properties: off: off soff: soff eoff: eoff]
(cdr el) ...]))

> (def (element-ref el prop) (pget prop (cddr el)))
> (def (element-ref-set! el prop val) (pget-set! prop (cddr el) val))

> (run-parser (element KEYWORD) " #+foo: bar")
(keyword (properties: off: 0 soff: 3 eoff: 13) key: "foo" value: "bar")
> (def-parsec GENERIC-TEXT-LINE
Expand All @@ -443,11 +482,11 @@ This is the end") (substring str 1 (string-length str))))

> (run-parser FIRST-INDENTED-ELEMENT " #+foo: bar baz")
(keyword (properties: off: 0 soff: 3 eoff: 17) key: "foo" value: "bar baz")
> (def-parsec
FIRST-ELEMENT (.or (.or FIRST-NON-INDENTED-ELEMENT
FIRST-INDENTED-ELEMENT)
(element GENERIC-TEXT-LINE #f)))
> (def test-few-lines "#+TITLE: asd\n123\n* asd\n*asd\n #+not key \nend\n")
> (def-parsec FIRST-ELEMENT
(.or (.or FIRST-NON-INDENTED-ELEMENT
FIRST-INDENTED-ELEMENT)
(element GENERIC-TEXT-LINE #f)))
> (def test-few-lines "#+TITLE: asd\n123\n* asd\n*asd\n #+not key \n**not headline \n\nend\n")

> (run-parser FIRST-ELEMENT test-few-lines)
(keyword (properties: off: 0 soff: 0 eoff: 13) key: "TITLE" value: "asd")
Expand All @@ -460,7 +499,7 @@ This is the end") (substring str 1 (string-length str))))



> (run-parser
> #;(run-parser
(do-parsec
FIRST-ELEMENT FIRST-ELEMENT
line <- (.liftM location-line (.location))
Expand All @@ -470,15 +509,234 @@ This is the end") (substring str 1 (string-length str))))
port <- (.get)
(.liftM (cut displayln "huh?: "(port-lines port) " line:" line "Col: " col " s:" <>) (.get))
(.liftM (cut displayln "line:" try-line "Col: " try-col " s:" <>) (.try (.get)))
(.try (element HEADLINE #f))
;FIRST-ELEMENT
#;(.try (.read-char))
#; (.any-token) #;FIRST-NON-INDENTED-ELEMENT) test-few-lines)
(.try (element HEADLINE #f))))
(run-parser (do-parsec FIRST-ELEMENT FIRST-ELEMENT FIRST-ELEMENT) test-few-lines)
(headline (properties: off: 17 soff: 17 eoff: 23) "asd")

> (run-parser (do-parsec FIRST-ELEMENT FIRST-ELEMENT FIRST-ELEMENT FIRST-ELEMENT) test-few-lines)
(line (properties: off: 23 soff: 23 eoff: 28) "*asd")

> (run-parser (do-parsec
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT) test-few-lines)
(line (properties: off: 28 soff: 28 eoff: 40) " #+not key ")
> (run-parser (do-parsec
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT FIRST-ELEMENT) test-few-lines)
(line (properties: off: 40 soff: 40 eoff: 56) "**not headline ")
> (run-parser (do-parsec
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT FIRST-ELEMENT
FIRST-ELEMENT) test-few-lines)
(line (properties: off: 56 soff: 56 eoff: 57) "")

> (run-parser (do-parsec (.many FIRST-ELEMENT)) test-few-lines)
((keyword (properties: off: 0 soff: 0 eoff: 13) key: "TITLE" value: "asd")
(line (properties: off: 13 soff: 13 eoff: 17) "123")
(headline (properties: off: 17 soff: 17 eoff: 23) "asd")
(line (properties: off: 23 soff: 23 eoff: 28) "*asd")
(line (properties: off: 28 soff: 28 eoff: 40) " #+not key ")
(line (properties: off: 40 soff: 40 eoff: 56) "**not headline ")
(line (properties: off: 56 soff: 56 eoff: 57) "")
(line (properties: off: 57 soff: 57 eoff: 61) "end"))
> (def lesser-block-names
["comment" "example" "export" "src" "verse"])


> (def-parsec-bnf
LESSER-BLOCK-BEGIN ::=
(.string "#+begin_" char-ci=?)
name? <- (.liftM list->string (.many (.satisfy (? (not char-whitespace?)))))
name <- (let (nm (member name? lesser-block-names string-ci=?))
(if nm (.return (car nm)) (.fail "Not a (lesser block name" name?)))
sep <- (.satisfy (? char-whitespace?))
data <- (if (eqv? sep #\newline) #f (.many-till (.any-token) EOL))
(.return ['lesser-block-begin name: name data: (list->string data)])

(LESSER-BLOCK-END name) ::=
(.string "#+end_" char-ci=?)
(.string name char-ci=?)
(.not (.satisfy (? (not char-whitespace?))))
(.return ['lesser-block-end name: name])

(LESSER-BLOCK-CONTENTS name) ::=
(.liftM (cut map cadr <>)
(.many-till GENERIC-TEXT-LINE
(.try (element (LESSER-BLOCK-END name)))))

LESSER-BLOCK ::=
begin <- (element LESSER-BLOCK-BEGIN)
contents <- (.liftM list->vector (LESSER-BLOCK-CONTENTS (element-ref begin name:)))
end <- (.xoff)
(let (props (cadr begin))
(set! (pget eoff: (cdr props)) end)
(.return ['lesser-block props (cddr begin) ... contents: contents])))

> (def lb-test " #+Begin_src scheme\n (displayln \"test\")\n #f \n #+eNd_src not blocked")

> (run-parser LESSER-BLOCK lb-test)
(lesser-block
(properties: off: 0 soff: 2 eoff: 59)
name: "src" data: "scheme"
contents: #(" (displayln \"test\")" " #f "))
> (def-parsec (debug (item ""))
st <- (.get)
off <- (.xoff)
pk <- (.peek-char)
(begin (displayln (format "Here: ~a ~r ~a" off pk st))
(.return pk)))
> (def-parsec TRY-LESSER-BLOCK (.try LESSER-BLOCK))
> (def lesser-elements [KEYWORD TRY-LESSER-BLOCK])
> (def-parsec %LESSER-ELEMENTS
(let lp ((les lesser-elements))
(if (null? les) (element GENERIC-TEXT-LINE #f)
(.or (car les) (lp (cdr les))))))
> (def (make-paragraph lines)
(let* ((end-props (cdadar lines))
(lines (reverse lines))
(start-props (cdadar lines)))
(set! (pget eoff: start-props) (pget eoff: end-props))
['paragraph start-props (map caddr lines) ...]))
> (def-parsec LESSER-ELEMENTS
lines <- (.return [])
(begin
(def (add-line! ln) (set! lines (cons ln lines)))
(def-parsec PARSE-LESSER-ELEMENTS
el <- (.or %LESSER-ELEMENTS (.return Nothing))
(cond ((Nothing? el)
(if (null? lines) (.fail) (.return [(make-paragraph lines)])))
((eq? (car el) 'line) (add-line! el) PARSE-LESSER-ELEMENTS)
(else (.return (if (null? lines) [el]
[(make-paragraph lines) el])))))
PARSE-LESSER-ELEMENTS))
> (run-parser (do-parsec (.many (do-parsec (debug) LESSER-ELEMENTS))) "#+TITLE: foo\n\n#+bar: baz\n #+begin_src scheme\n#t\n#f\n#+end_src \n here")
(((keyword key: "TITLE" value: "foo")) ((paragraph (off: 13 soff: 13 eoff: 14) "") (keyword key: "bar" value: "baz")) ((lesser-block (properties: off: 25 soff: 26 eoff: 60) name: "src" data: "scheme" contents: #("#t" "#f"))) ((paragraph (off: 60 soff: 60 eoff: 68) " " " here")))
)

)

(test-inline
test-case: "Markdown Syntax Parsing tests"
> (def-parsec (INDENT (n 3))
space <- (.or (.char #\space) (.return #f))
(cond ((not space) (.return []))
((= n 1) (.return [space]))
(else (.liftM (cut cons space <>) (INDENT (1- n))))))
> (def-parsec (block p)
i <- (INDENT)
block <- p
(.return [(car block) pre-space: (length i) (cdr block) ...]))
> (def-parsec ATX-HEADING
hashes <- (.many-till (.char #\#) (.or (.char #\space) (.eof)))
chars <- (let (n (length hashes))
(if (or (> 1 n) (> n 6))
(.throw "Invalid hash number for ATX heading" n)
(.many-till (.any-token) (.or (.char #\newline) (.eof)))))
(.return ['ATX-HEADING level: (length hashes) string: (list->string chars)]))
> (def ATX-BLOCK (block ATX-HEADING))

> (run-parser ATX-BLOCK " ### foo")
(ATX-HEADING pre-space: 1 level: 3 string: "foo")
> (error-message (run-parser ATX-BLOCK " ### foo" #f))
"Invalid hash number for ATX heading"
> (error-irritants (run-parser ATX-BLOCK " ### foo" #f))
(0)
> (run-parser ATX-BLOCK "#+id: foo")
#f
> (error-irritants (run-parser ATX-BLOCK "####### foo" #f))
(7)
> (run-parser ATX-BLOCK "####")
(ATX-HEADING pre-space: 0 level: 4 string: "")
> (def-parsec OPEN-CODE-FENCE
backtick <- (.liftM (cut string-ref <> 0)
(.or (.string "```") (.string "~~~")))
rest <- (.many (.char backtick))
info-string <- (.many-till (.any-token) (.or (.char #\newline) (.eof)))
(.return ['OPEN-CODE-FENCE backtick: backtick ticks: (+ 3 (length rest))
info-string: (string-trim (list->string info-string))]))

> (run-parser OPEN-CODE-FENCE "```scheme")
(OPEN-CODE-FENCE backtick: #\` ticks: 3 info-string: "scheme")
> (def OPEN-CODE-FENCE-BLOCK (block OPEN-CODE-FENCE))

> (run-parser OPEN-CODE-FENCE-BLOCK " ```")
(OPEN-CODE-FENCE pre-space: 3 backtick: #\` ticks: 3 info-string: "")
> (run-parser OPEN-CODE-FENCE-BLOCK " ````scheme")
(OPEN-CODE-FENCE pre-space: 1 backtick: #\` ticks: 4 info-string: "scheme")
> (run-parser OPEN-CODE-FENCE-BLOCK " ```")
#f
> (run-parser OPEN-CODE-FENCE-BLOCK "~~~~~scheme :noweb-ref md-test")
(OPEN-CODE-FENCE pre-space: 0 backtick: #\~ ticks: 5 info-string: "scheme :noweb-ref md-test")
> (def-parsec (debug (where "Here"))
st <- (.get)
off <- (.xoff)
pk <- (.peek-char)
(begin (displayln (format "~a: ~a ~r ~a" where off pk st))
(.return pk)))
> (def-parsec-bnf
(CLOSE-CODE-FENCE char n) ::=
(INDENT)
(.string (make-string n char))
(.many-till (.char #\space) (.or (.char #\newline) (.eof)))

FENCED-CODE-BLOCK-LINE ::=
(.liftM list->string
(.many-till (.any-token) (.or (.char #\newline) (.eof))))

(FENCED-CODE-BLOCK-CONTENT char n) ::=
(.many-till FENCED-CODE-BLOCK-LINE #;(.>> (debug "code block line?") )
(.or (.try (CLOSE-CODE-FENCE char n))#;(.>> (debug "before try clode code") ) (.eof)))

FENCED-CODE ::=
open <- (.liftM cdr OPEN-CODE-FENCE)
content <- (FENCED-CODE-BLOCK-CONTENT (pgetq backtick: open) (pgetq ticks: open))
(.return ['FENCED-CODE open ... content: content]))

> (run-parser FENCED-CODE "```\nasd\n123\n ``` ")
(FENCED-CODE backtick: #\` ticks: 3 info-string: "" content: ("asd" "123"))
> (run-parser (block FENCED-CODE) " ````\nasd\n123\n ```` ")
(FENCED-CODE pre-space: 2 backtick: #\` ticks: 4 info-string: "" content: ("asd" "123"))
> (run-parser (block FENCED-CODE) " ~~~~~~scheme\nasd\n123\n")
(FENCED-CODE pre-space: 3 backtick: #\~ ticks: 6 info-string: "scheme" content: ("asd" "123"))

;; Embededed bakticks
> (run-parser (do-parsec (.or (.try (block FENCED-CODE)) (.any-token))) " ```scheme\n e \n ``\n foo\n ```")
(FENCED-CODE pre-space: 2 backtick: #\` ticks: 3 info-string: "scheme" content: (" e " " ``" " foo"))
> (def-parsec-bnf
OPEN-CONTAINER ::=
(.string "... ")
(.liftM
(lambda (lst)
(let* ((line (list->string lst))
(spc (string-index line #\space))
(name (if spc (substring line 0 spc) line))
(title (and spc (substring line (1+ spc) (string-length line)))))
['OPEN-CONTAINER name: name title: title]))
(.many-till (.any-token) (.or (.char #\newline) (.eof))))

CLOSE-CONTAINER ::=
(INDENT) (.string "...")
(.many-till (.char #\space) (.or (.char #\newline) (.eof)))


CONTAINER ::=
open <- (.liftM cdr OPEN-CONTAINER)
contents <- (.liftM list->string
(.many-till (.any-token)
(.try (.>> (.char #\newline) CLOSE-CONTAINER))))
(.return ['CONTAINER open ... contents: contents]))


> (run-parser OPEN-CONTAINER "... tip Title for Tip!")
(OPEN-CONTAINER name: "tip" title: "Title for Tip!")
> (run-parser CLOSE-CONTAINER " ...")
()
> (run-parser CONTAINER "... tip \nbar \n foo \n ...")
(CONTAINER name: "tip" title: "" contents: "bar \n foo ")
> (run-parser CONTAINER "... warning Don't do this: \n .... \n foo \n ...")
(CONTAINER name: "warning" title: "Don't do this: " contents: " .... \n foo "))




Expand Down
Loading

0 comments on commit ec4ff99

Please sign in to comment.