From f7d6d9e8a36a1d4eea200c2f0b47aa730a3babaf Mon Sep 17 00:00:00 2001
From: Joe Hildebrand \n ").concat(null!==r?"":"","\n ").concat(t,"\n ").concat(null!==r?"":"","\n \n \n \n ").concat((n/1024).toFixed(2),"\n \n kB\n \n \n \n ").concat(o.toFixed(2),"\n \n ms\n \n \n \n ").concat((n/1024/(o/1e3)).toFixed(2),"\n \n kB/s\n \n \n "))}var t=parseInt($("#run-count").val(),10),r={cache:$("#cache").is(":checked")};isNaN(t)||t<=0?alert("Number of runs must be a positive integer."):Runner.run(benchmarks,t,r,{readFile:function(e){return $.ajax({type:"GET",url:"https://raw.githubusercontent.com/peggyjs/peggy/".concat(BRANCH,"/benchmark/").concat(e),dataType:"text",async:!1}).responseText},testStart:function(){},testFinish:function(e,t,r,n){u("individual",t.title,"https://github.com/peggyjs/peggy/blob/".concat(BRANCH,"/benchmark/").concat(e.id,"/").concat(t.file),r,n)},benchmarkStart:function(u){e.append("\n \n "\n '))},benchmarkFinish:function(e,t,r){u("benchmark-total",e.title+" total",null,t,r)},start:function(){$("#run-count, #cache, #run").attr("disabled","disabled"),e.show(),$("#results-table tr").slice(1).remove()},finish:function(e,t){u("total","Total",null,e,t),$.scrollTo("max",{axis:"y",duration:500}),$("#run-count, #cache, #run").removeAttr("disabled")}})})),$(document).ready((function(){return $("#run").focus()})),browser_stub}));
\ No newline at end of file
+!function(e,u){"object"==typeof exports&&"undefined"!=typeof module?module.exports=u():"function"==typeof define&&define.amd?define(u):(e="undefined"!=typeof globalThis?globalThis:e||self).browser=u()}(this,(function(){var commonjsGlobal="undefined"!=typeof globalThis?globalThis:"undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},browser_stub={},GrammarLocation$4=function(){function e(e,u){this.source=e,this.start=u}return e.prototype.toString=function(){return String(this.source)},e.prototype.offset=function(e){return{line:e.line+this.start.line-1,column:1===e.line?e.column+this.start.column-1:e.column,offset:e.offset+this.start.offset}},e.offsetStart=function(e){return e.source&&"function"==typeof e.source.offset?e.source.offset(e.start):e.start},e.offsetEnd=function(e){return e.source&&"function"==typeof e.source.offset?e.source.offset(e.end):e.end},e}(),grammarLocation=GrammarLocation$4,__extends=commonjsGlobal&&commonjsGlobal.__extends||(extendStatics=function(e,u){return extendStatics=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,u){e.__proto__=u}||function(e,u){for(var t in u)Object.prototype.hasOwnProperty.call(u,t)&&(e[t]=u[t])},extendStatics(e,u)},function(e,u){if("function"!=typeof u&&null!==u)throw new TypeError("Class extends value "+String(u)+" is not a constructor or null");function t(){this.constructor=e}extendStatics(e,u),e.prototype=null===u?Object.create(u):(t.prototype=u.prototype,new t)}),extendStatics,GrammarLocation$3=grammarLocation,setProtoOf=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,u){e.__proto__=u}||function(e,u){for(var t in u)Object.prototype.hasOwnProperty.call(u,t)&&(e[t]=u[t])},GrammarError$3=function(e){function u(t,n,r){var o=e.call(this,t)||this;return setProtoOf(o,u.prototype),o.name="GrammarError",o.location=n,void 0===r&&(r=[]),o.diagnostics=r,o.stage=null,o.problems=[["error",t,n,r]],o}return __extends(u,e),u.prototype.toString=function(){var u=e.prototype.toString.call(this);this.location&&(u+="\n at ",void 0!==this.location.source&&null!==this.location.source&&(u+="".concat(this.location.source,":")),u+="".concat(this.location.start.line,":").concat(this.location.start.column));for(var t=0,n=this.diagnostics;t\n ").concat(u.title,' \n \n \n ").concat(null!==n?"":"","\n ").concat(t,"\n ").concat(null!==n?"":"","\n \n \n \n ").concat((r/1024).toFixed(2),"\n \n kB\n \n \n \n ").concat(o.toFixed(2),"\n \n ms\n \n \n \n ").concat((r/1024/(o/1e3)).toFixed(2),"\n \n kB/s\n \n \n "))}var t=parseInt($("#run-count").val(),10),n={cache:$("#cache").is(":checked")};isNaN(t)||t<=0?alert("Number of runs must be a positive integer."):Runner.run(benchmarks,t,n,{readFile:function(e){return $.ajax({type:"GET",url:"https://raw.githubusercontent.com/peggyjs/peggy/".concat(BRANCH,"/benchmark/").concat(e),dataType:"text",async:!1}).responseText},testStart:function(){},testFinish:function(e,t,n,r){u("individual",t.title,"https://github.com/peggyjs/peggy/blob/".concat(BRANCH,"/benchmark/").concat(e.id,"/").concat(t.file),n,r)},benchmarkStart:function(u){e.append("\n \n "\n '))},benchmarkFinish:function(e,t,n){u("benchmark-total",e.title+" total",null,t,n)},start:function(){$("#run-count, #cache, #run").attr("disabled","disabled"),e.show(),$("#results-table tr").slice(1).remove()},finish:function(e,t){u("total","Total",null,e,t),$.scrollTo("max",{axis:"y",duration:500}),$("#run-count, #cache, #run").removeAttr("disabled")}})})),$(document).ready((function(){return $("#run").focus()})),browser_stub}));
\ No newline at end of file
diff --git a/docs/js/test-bundle.min.js b/docs/js/test-bundle.min.js
index 874328ff..828fb24f 100644
--- a/docs/js/test-bundle.min.js
+++ b/docs/js/test-bundle.min.js
@@ -5,4 +5,4 @@
// Copyright (c) 2023- the Peggy authors
// Licensed under the MIT License.
-!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports,require("chai"),require("whatwg-url")):"function"==typeof define&&define.amd?define(["exports","chai","whatwg-url"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).browser={},e.chai,e.whatwgURL)}(this,(function(exports,require$$0$1,require$$0){var commonjsGlobal="undefined"!=typeof globalThis?globalThis:"undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},sourceMap$1={},sourceMapGenerator$1={},base64Vlq$1={},base64$6={};const intToCharMap$1="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".split("");base64$6.encode=function(e){if(0<=e&&e\n ").concat(u.title,' \n \n Command Line
--extra-options-file <file>
, you will need to ensure you
are using the correct types. In particular, you may specify "plugin" as a
string, or "plugins" as an array of objects that have a use
-method. Always use the long (two-dash) form of the option. Options that
-contain dashes should be specified in camel case. You may also specify an
-"input" field instead of using the command line. For example:
+method. Always use the long (two-dash) form of the option, without the
+dashes, as the key. Options that contain internal dashes should be specified
+in camel case. You may also specify an "input" field instead of using the
+command line. For example:
// config.js or config.cjs
@@ -223,18 +224,22 @@ Command Line
-You can test generated parser immediately if you specify the -t/--test
or -T/--test-file
-option. This option conflicts with the option -m/--source-map
unless -o/--output
is
-also specified. This option conflicts with the --ast
option.
+You can test generated parser immediately if you specify the
+-t/--test
or -T/--test-file
+option. This option conflicts with the
+--ast
option, and also conflicts with the
+-m/--source-map
option unless -o/--output
is also
+specified.
The CLI will exit with the code:
0
if all was success1
if you supply incorrect or conflicting parameters2
if all parameters is correct, you specify the -t/--test
or -T/--test-file
option
-and specified input does not parsed with the specified grammar0
: if successful1
: if you supply incorrect or conflicting parameters2
: if you specified the
+-t/--test
or -T/--test-file
option and the specified
+input fails parsing with the specified grammarExamples:
@@ -280,9 +285,10 @@import * as peggy from "peggy";
-For use in browsers, include the Peggy library in your web page or application using
-the <script>
tag. If Peggy detects an AMD loader, it will
-define itself as a module, otherwise the API will be available in the
+
For use in browsers, include the Peggy library in your web page or
+application using the <script>
tag. If Peggy detects an AMD loader, it will define
+itself as a module, otherwise the API will be available in the
peg
global object.
To generate a parser, call the peggy.generate
method and pass your
@@ -311,7 +317,7 @@
false
).
dependencies
format
is set to "amd"
,
"commonjs"
, "es"
, or "umd"
.
@@ -340,11 +346,13 @@ grammarSource
source
in the location objects, that returned by the
-location()
API function (default: undefined
).location()
API function (default: undefined
). It is
+recommended that if you do not use a string, the object you supply has a
+useful toString()
implementation.info
tracer
trace()
function.
+ trace()
takes a single parameter which is an object containing
+ "type" ("rule.enter", "rule.fail", "rule.match"), "rule" (the rule name as a
+ string), "location", and, if the type is
+ "rule.match", "result" (what the rule returned).
+...
(any others)options
variableAs you can see above, parsers can also support their own custom options. For example:
const parser = peggy.generate(`
-{
-// options are available in the per-parse initializer
-console.log(options.validWords); // outputs "[ 'boo', 'baz', 'boop' ]"
-}
+ {
+ // options are available in the per-parse initializer
+ console.log(options.validWords); // outputs "[ 'boo', 'baz', 'boop' ]"
+ }
-validWord = @word:$[a-z]+ &{ return options.validWords.includes(word) }
+ validWord = @word:$[a-z]+ &{ return options.validWords.includes(word) }
`);
const result = parser.parse("boo", {
-validWords: [ "boo", "baz", "boop" ]
+ validWords: [ "boo", "baz", "boop" ]
});
console.log(result); // outputs "boo"
@@ -477,22 +491,22 @@ Grammar Syntax and Semantics
values.
start
-= additive
+ = additive
additive
-= left:multiplicative "+" right:additive { return left + right; }
-/ multiplicative
+ = left:multiplicative "+" right:additive { return left + right; }
+ / multiplicative
multiplicative
-= left:primary "*" right:multiplicative { return left * right; }
-/ primary
+ = left:primary "*" right:multiplicative { return left * right; }
+ / primary
primary
-= integer
-/ "(" additive:additive ")" { return additive; }
+ = integer
+ / "(" additive:additive ")" { return additive; }
-integer "integer"
-= digits:[0-9]+ { return parseInt(digits.join(""), 10); }
+integer "simple number"
+ = digits:[0-9]+ { return parseInt(digits.join(""), 10); }
On the top level, the grammar consists of rules (in our example, there are five of them). Each rule has a name (e.g. @@ -533,34 +547,34 @@
{{'{{'}}
-function makeInteger(o) {
-return parseInt(o.join(""), 10);
-}
+ function makeInteger(o) {
+ return parseInt(o.join(""), 10);
+ }
}}
{
-if (options.multiplier) {
-input = "(" + input + ")*(" + options.multiplier + ")";
-}
+ if (options.multiplier) {
+ input = `(${input})*(${options.multiplier})`;
+ }
}
start
-= additive
+ = additive
additive
-= left:multiplicative "+" right:additive { return left + right; }
-/ multiplicative
+ = left:multiplicative "+" right:additive { return left + right; }
+ / multiplicative
multiplicative
-= left:primary "*" right:multiplicative { return left * right; }
-/ primary
+ = left:primary "*" right:multiplicative { return left * right; }
+ / primary
primary
-= integer
-/ "(" additive:additive ")" { return additive; }
+ = integer
+ / "(" additive:additive ")" { return additive; }
-integer "integer"
-= digits:[0-9]+ { return makeInteger(digits); }
+integer "simple number"
+ = digits:[0-9]+ { return makeInteger(digits); }
The parsing expressions of the rules are used to match the input text to the grammar. There are various types of expressions — matching characters or @@ -587,8 +601,8 @@
One special case of parser expression is a parser action — a piece of JavaScript code inside curly braces (“{” and “}”) that takes match -results of some of the the preceding expressions and returns a JavaScript value. -This value is considered match result of the preceding expression (in other +results of the preceding expression and returns a JavaScript value. +This value is then considered match result of the preceding expression (in other words, the parser action is a match result transformer).
In our arithmetics example, there are many parser actions. Consider the @@ -700,7 +714,7 @@
rule
Match a parsing expression of a rule recursively and return its match +
Match a parsing expression of a rule (perhaps recursively) and return its match result.
expression |..|
is an equivalent of expression |0..|
+ expression |..|
is equivalent to expression |0..|
and expression *
expression |1..|
is an equivalent of expression +
expression |1..|
is equivalent to expression +
Optionally, delimiter
expression can be specified. Delimiter must appear
- between expressions exactly once and it is not included in the final array.
Optionally, delimiter
expression can be specified. The
+ delimiter is a separate parser expression, its match results are ignored,
+ and it must appear between matched expressions exactly once.
count
, min
and max
can be represented as:
label : expression
Match the expression and remember its match result under given label. -The label must be a JavaScript identifier, but not in the list of reserved words. -By default this is a list of JavaScript reserved words, -but plugins can change it.
+Match the expression and remember its match result under given label. The +label must be a JavaScript identifier, which includes not being in the list of +reserved words. By default this is a list of JavaScript +reserved words, but plugins can change it.
Labeled expressions are useful together with actions, where saved match results can be accessed by action's JavaScript code.
@@ -1017,7 +1033,7 @@Match the expression and if the label exists, remember its match result under given label. The label must be a JavaScript identifier if it exists, but not in the list of reserved words. -By default this is a list of JavaScript reserved words, +By default this is a list of JavaScript reserved words, but plugins can change it.
Return the value of this expression from the rule, or "pluck" it. You @@ -1181,38 +1197,51 @@
One of the most frequent questions about Peggy grammars is how to parse a delimited list of items. The cleanest current approach is:
-list = word|.., _ "," _|
- word = $[a-z]i+
- _ = [ \t]*
+list
+ = word|.., _ "," _|
+word
+ = $[a-z]i+
+_
+ = [ \t]*
If you want to allow a trailing delimiter, append it to the end of the rule:
-list = word|.., delimiter| delimiter?
- delimiter = _ "," _
- word = $[a-z]i+
- _ = [ \t]*
+list
+ = word|.., delimiter| delimiter?
+delimiter
+ = _ "," _
+word
+ = $[a-z]i+
+_
+ = [ \t]*
In the grammars created before the repetition operator was added to the peggy (in 2.1.0) you could see that approach, which is equivalent of the new approach with the repetition operator, but less efficient on long lists:
-list = head:word tail:(_ "," _ @word)* { return [head, ...tail]; }
-word = $[a-z]i+
-_ = [ \t]*
+list
+ = head:word tail:(_ "," _ @word)* { return [head, ...tail]; }
+word
+ = $[a-z]i+
+_
+ = [ \t]*
Note that the @
in the tail section plucks the word out of the
parentheses, NOT out of the rule itself.
As described above, you can annotate your grammar rules with human-readable names that will be used in error messages. For example, this production:
+As described above, you can annotate your grammar rules with human-readable +names that will be used in error messages. For example, this production:
-integer "integer"
-= digits:[0-9]+
+integer "simple number"
+ = digits:[0-9]+
will produce an error message like:
-Expected integer but "a" found.+
Expected simple number but "a" found.-
when parsing a non-number, referencing the human-readable name "integer." Without the human-readable name, Peggy instead uses a description of the character class that failed to match:
+when parsing a non-number, referencing the human-readable name "simple +number." Without the human-readable name, Peggy instead uses a description of +the character class that failed to match:
Expected [0-9] but "a" found.@@ -1245,46 +1274,46 @@
let source = ...;
try {
-peggy.generate(text, { grammarSource: source, ... }); // throws SyntaxError or GrammarError
-parser.parse(input, { grammarSource: source2, ... }); // throws SyntaxError
+ peggy.generate(text, { grammarSource: source, ... }); // throws SyntaxError or GrammarError
+ parser.parse(input, { grammarSource: source2, ... }); // throws SyntaxError
} catch (e) {
-if (typeof e.format === "function") {
-console.log(e.format([
-{ source, text },
-{ source: source2, text: input },
-...
-]));
-} else {
-throw e;
-}
+ if (typeof e.format === "function") {
+ console.log(e.format([
+ { source, text },
+ { source: source2, text: input },
+ ...
+ ]));
+ } else {
+ throw e;
+ }
}
Messages generated by format()
look like this
Error: Possible infinite loop when parsing (left recursion: start -> proxy -> end -> start)
--> .\recursion.pegjs:1:1
-|
+ |
1 | start = proxy;
-| ^^^^^
+ | ^^^^^
note: Step 1: call of the rule "proxy" without input consumption
--> .\recursion.pegjs:1:9
-|
+ |
1 | start = proxy;
-| ^^^^^
+ | ^^^^^
note: Step 2: call of the rule "end" without input consumption
--> .\recursion.pegjs:2:11
-|
+ |
2 | proxy = a:end { return a; };
-| ^^^
+ | ^^^
note: Step 3: call itself without input consumption - left recursion
--> .\recursion.pegjs:3:8
-|
+ |
3 | end = !start
-| ^^^^^
+ | ^^^^^
A plugin may register additional passes that can generate GrammarError
s to report about
problems, but they shouldn't do that by throwing an instance of GrammarError
. They should
-use a session API instead.
location()
function, which returns you the following object:
{
-source: options.grammarSource,
-start: { offset: 23, line: 5, column: 6 },
-end: { offset: 25, line: 5, column: 8 }
+ source: options.grammarSource,
+ start: { offset: 23, line: 5, column: 6 },
+ end: { offset: 25, line: 5, column: 8 }
}
@@ -1325,9 +1354,9 @@ For the per-parse initializer, the location is the start of the input, i.e.
{
-source: options.grammarSource,
-start: { offset: 0, line: 1, column: 1 },
-end: { offset: 0, line: 1, column: 1 }
+ source: options.grammarSource,
+ start: { offset: 0, line: 1, column: 1 },
+ end: { offset: 0, line: 1, column: 1 }
}
@@ -1338,21 +1367,22 @@ Line and column are somewhat expensive to compute, so if you just need the
-offset, there's also a function offset()
that returns just the start offset,
-and a function range()
that returns the object:
offset()
that returns just the
+start offset, and a function range()
that returns the object:
-
-{
-source: options.grammarSource,
-start: 23,
-end: 25
-}
-
+{
+ source: options.grammarSource,
+ start: 23,
+ end: 25
+}
-(i.e. difference from the location()
result only in type of start
and end
-properties, which contain just an offset instead of the Location
object.)
(i.e. difference from the location()
result only in type of
+start
and end
properties, which contain just an
+offset instead of the Location
+object.)
All notes about values for location()
object is also applicable to the range()
+
All of the notes about values for location()
object are also
+applicable to the range()
and offset()
calls.
Currently, Peggy only works with the Basic Multilingual Plane (BMP) of Unicode. @@ -1360,13 +1390,17 @@
Changing this behavior may be a breaking change and will not to be done before -Peggy 2.0. You can join to the discussion for this topic on the GitHub Discussions page.
+Changing this behavior might be a breaking change, so it will likely cause +a major version number increase if it happens. You can join to the discussion +for this topic on the GitHub Discussions +page.
A plugin is an object with the use(config, options)
method. That method will be
-called for all plugins in the options.plugins
array, supplied to the generate()
+
A plugin is an object with the use(config, options)
method.
+That method will be called for all plugins in the options.plugins
+array, supplied to the generate()
method.
use
accepts these parameters:
config
generate
— passes used for actual code generatingA plugin that implement a pass usually should push it to the end of the correct
-array. Pass is a simple function with signature pass(ast, options, session)
:
A plugin that implements a pass should usually push it to the end of the correct
+array. Each pass is a function with the signature pass(ast, options, session)
:
ast
— the AST created by the config.parser.parse()
methodconfig
Default list contains JavaScript reserved words, and can be found +
Default list contains JavaScript reserved words, and can be found
in the peggy.RESERVED_WORDS
property.
options
Each compilation request is represented by a Session
instance. An object of this class
-is created by the compiler and passed to an each pass as a 3rd parameter. The session
+is created by the compiler and given to each pass as a 3rd parameter. The session
object gives access to the various compiler services. At the present time there is only
one such service: reporting of diagnostics.