diff --git a/HISTORY b/HISTORY index 8ada774..0c10edd 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,10 @@ -HISTORY for LPeg 0.12 +HISTORY for LPeg 1.0 + +* Changes from version 0.12 to 1.0 + --------------------------------- + + group "names" can be any Lua value + + some bugs fixed + + other small improvements * Changes from version 0.11 to 0.12 --------------------------------- diff --git a/lpcap.c b/lpcap.c index d90b935..c9085de 100644 --- a/lpcap.c +++ b/lpcap.c @@ -1,5 +1,5 @@ /* -** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $ +** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -126,7 +126,7 @@ static Capture *findback (CapState *cs, Capture *cap) { continue; /* opening an enclosing capture: skip and get previous */ if (captype(cap) == Cgroup) { getfromktable(cs, cap->idx); /* get group name */ - if (lua_equal(L, -2, -1)) { /* right group? */ + if (lp_equal(L, -2, -1)) { /* right group? */ lua_pop(L, 2); /* remove reference name and group name */ return cap; } @@ -462,7 +462,7 @@ static int pushcapture (CapState *cs) { case Carg: { int arg = (cs->cap++)->idx; if (arg + FIXEDARGS > cs->ptop) - return luaL_error(L, "reference to absent argument #%d", arg); + return luaL_error(L, "reference to absent extra argument #%d", arg); lua_pushvalue(L, arg + FIXEDARGS); return 1; } diff --git a/lpcap.h b/lpcap.h index c0a0e38..d762fdc 100644 --- a/lpcap.h +++ b/lpcap.h @@ -1,5 +1,5 @@ /* -** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ +** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ */ #if !defined(lpcap_h) @@ -18,7 +18,7 @@ typedef enum CapKind { typedef struct Capture { const char *s; /* subject position */ - short idx; /* extra info about capture (group name, arg index, etc.) */ + unsigned short idx; /* extra info (group name, arg index, etc.) */ byte kind; /* kind of capture */ byte siz; /* size of full capture + 1 (0 = not a full capture) */ } Capture; diff --git a/lpcode.c b/lpcode.c index 2cc0e0d..362ec20 100644 --- a/lpcode.c +++ b/lpcode.c @@ -1,5 +1,5 @@ /* -** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $ +** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -33,26 +33,30 @@ static const Charset *fullset = &fullset_; */ /* -** Check whether a charset is empty (IFail), singleton (IChar), -** full (IAny), or none of those (ISet). +** Check whether a charset is empty (returns IFail), singleton (IChar), +** full (IAny), or none of those (ISet). When singleton, '*c' returns +** which character it is. (When generic set, the set was the input, +** so there is no need to return it.) */ static Opcode charsettype (const byte *cs, int *c) { - int count = 0; + int count = 0; /* number of characters in the set */ int i; - int candidate = -1; /* candidate position for a char */ - for (i = 0; i < CHARSETSIZE; i++) { + int candidate = -1; /* candidate position for the singleton char */ + for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */ int b = cs[i]; - if (b == 0) { - if (count > 1) return ISet; /* else set is still empty */ + if (b == 0) { /* is byte empty? */ + if (count > 1) /* was set neither empty nor singleton? */ + return ISet; /* neither full nor empty nor singleton */ + /* else set is still empty or singleton */ } - else if (b == 0xFF) { - if (count < (i * BITSPERCHAR)) - return ISet; + else if (b == 0xFF) { /* is byte full? */ + if (count < (i * BITSPERCHAR)) /* was set not full? */ + return ISet; /* neither full nor empty nor singleton */ else count += BITSPERCHAR; /* set is still full */ } - else if ((b & (b - 1)) == 0) { /* byte has only one bit? */ - if (count > 0) - return ISet; /* set is neither full nor empty */ + else if ((b & (b - 1)) == 0) { /* has byte only one bit? */ + if (count > 0) /* was set not empty? */ + return ISet; /* neither full nor empty nor singleton */ else { /* set has only one char till now; track it */ count++; candidate = i; @@ -77,6 +81,7 @@ static Opcode charsettype (const byte *cs, int *c) { } } + /* ** A few basic operations on Charsets */ @@ -84,16 +89,11 @@ static void cs_complement (Charset *cs) { loopset(i, cs->cs[i] = ~cs->cs[i]); } - static int cs_equal (const byte *cs1, const byte *cs2) { loopset(i, if (cs1[i] != cs2[i]) return 0); return 1; } - -/* -** computes whether sets cs1 and cs2 are disjoint -*/ static int cs_disjoint (const Charset *cs1, const Charset *cs2) { loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) return 1; @@ -101,7 +101,8 @@ static int cs_disjoint (const Charset *cs1, const Charset *cs2) { /* -** Convert a 'char' pattern (TSet, TChar, TAny) to a charset +** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a +** charset and return 1; else return 0. */ int tocharset (TTree *tree, Charset *cs) { switch (tree->tag) { @@ -116,7 +117,7 @@ int tocharset (TTree *tree, Charset *cs) { return 1; } case TAny: { - loopset(i, cs->cs[i] = 0xFF); /* add all to the set */ + loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ return 1; } default: return 0; @@ -125,13 +126,16 @@ int tocharset (TTree *tree, Charset *cs) { /* -** Checks whether a pattern has captures +** Check whether a pattern tree has captures */ int hascaptures (TTree *tree) { tailcall: switch (tree->tag) { case TCapture: case TRunTime: return 1; + case TCall: + tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ + case TOpenCall: assert(0); default: { switch (numsiblings[tree->tag]) { case 1: /* return hascaptures(sib1(tree)); */ @@ -161,7 +165,7 @@ int hascaptures (TTree *tree) { ** p is nullable => nullable(p) ** nofail(p) => p cannot fail ** The function assumes that TOpenCall is not nullable; -** this will be checked again when the grammar is fixed.) +** this will be checked again when the grammar is fixed. ** Run-time captures can do whatever they want, so the result ** is conservative. */ @@ -198,7 +202,7 @@ int checkaux (TTree *tree, int pred) { case TCall: /* return checkaux(sib2(tree), pred); */ tree = sib2(tree); goto tailcall; default: assert(0); return 0; - }; + } } @@ -245,16 +249,20 @@ int fixedlenx (TTree *tree, int count, int len) { /* ** Computes the 'first set' of a pattern. ** The result is a conservative aproximation: -** match p ax -> x' for some x ==> a in first(p). +** match p ax -> x (for some x) ==> a belongs to first(p) +** or +** a not in first(p) ==> match p ax -> fail (for all x) +** ** The set 'follow' is the first set of what follows the ** pattern (full set if nothing follows it). -** The function returns 0 when this set can be used for -** tests that avoid the pattern altogether. +** +** The function returns 0 when this resulting set can be used for +** test instructions that avoid the pattern altogether. ** A non-zero return can happen for two reasons: -** 1) match p '' -> '' ==> returns 1. -** (tests cannot be used because they always fail for an empty input) -** 2) there is a match-time capture ==> returns 2. -** (match-time captures should not be avoided by optimizations) +** 1) match p '' -> '' ==> return has bit 1 set +** (tests cannot be used because they would always fail for an empty input); +** 2) there is a match-time capture ==> return has bit 2 set +** (optimizations should not bypass match-time captures). */ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { tailcall: @@ -265,7 +273,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { } case TTrue: { loopset(i, firstset->cs[i] = follow->cs[i]); - return 1; + return 1; /* accepts the empty string */ } case TFalse: { loopset(i, firstset->cs[i] = 0); @@ -280,7 +288,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { } case TSeq: { if (!nullable(sib1(tree))) { - /* return getfirst(sib1(tree), fullset, firstset); */ + /* when p1 is not nullable, p2 has nothing to contribute; + return getfirst(sib1(tree), fullset, firstset); */ tree = sib1(tree); follow = fullset; goto tailcall; } else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ @@ -324,7 +333,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { /* else go through */ } case TBehind: { /* instruction gives no new information */ - /* call 'getfirst' to check for math-time captures */ + /* call 'getfirst' only to check for math-time captures */ int e = getfirst(sib1(tree), follow, firstset); loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ return e | 1; /* always can accept the empty string */ @@ -335,8 +344,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { /* -** If it returns true, then pattern can fail only depending on the next -** character of the subject +** If 'headfail(tree)' true, then 'tree' can fail only depending on the +** next character of the subject. */ static int headfail (TTree *tree) { tailcall: @@ -403,9 +412,9 @@ int sizei (const Instruction *i) { switch((Opcode)i->i.code) { case ISet: case ISpan: return CHARSETINSTSIZE; case ITestSet: return CHARSETINSTSIZE + 1; - case ITestChar: case ITestAny: case IChoice: case IJmp: - case ICall: case IOpenCall: case ICommit: case IPartialCommit: - case IBackCommit: return 2; + case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: + case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: + return 2; default: return 1; } } @@ -422,16 +431,17 @@ typedef struct CompileState { /* -** code generation is recursive; 'opt' indicates that the code is -** being generated under a 'IChoice' operator jumping to its end. -** 'tt' points to a previous test protecting this code. 'fl' is -** the follow set of the pattern. +** code generation is recursive; 'opt' indicates that the code is being +** generated as the last thing inside an optional pattern (so, if that +** code is optional too, it can reuse the 'IChoice' already in place for +** the outer pattern). 'tt' points to a previous test protecting this +** code (or NOINST). 'fl' is the follow set of the pattern. */ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, const Charset *fl); -void reallocprog (lua_State *L, Pattern *p, int nsize) { +void realloccode (lua_State *L, Pattern *p, int nsize) { void *ud; lua_Alloc f = lua_getallocf(L, &ud); void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), @@ -446,7 +456,7 @@ void reallocprog (lua_State *L, Pattern *p, int nsize) { static int nextinstruction (CompileState *compst) { int size = compst->p->codesize; if (compst->ncode >= size) - reallocprog(compst->L, compst->p, size * 2); + realloccode(compst->L, compst->p, size * 2); return compst->ncode++; } @@ -462,6 +472,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) { } +/* +** Add an instruction followed by space for an offset (to be set later) +*/ static int addoffsetinst (CompileState *compst, Opcode op) { int i = addinstruction(compst, op, 0); /* instruction */ addinstruction(compst, (Opcode)0, 0); /* open space for offset */ @@ -470,6 +483,9 @@ static int addoffsetinst (CompileState *compst, Opcode op) { } +/* +** Set the offset of an instruction +*/ static void setoffset (CompileState *compst, int instruction, int offset) { getinstr(compst, instruction + 1).offset = offset; } @@ -478,7 +494,7 @@ static void setoffset (CompileState *compst, int instruction, int offset) { /* ** Add a capture instruction: ** 'op' is the capture instruction; 'cap' the capture kind; -** 'key' the key into ktable; 'aux' is optional offset +** 'key' the key into ktable; 'aux' is the optional capture offset ** */ static int addinstcap (CompileState *compst, Opcode op, int cap, int key, @@ -494,12 +510,18 @@ static int addinstcap (CompileState *compst, Opcode op, int cap, int key, #define target(code,i) ((i) + code[i + 1].offset) +/* +** Patch 'instruction' to jump to 'target' +*/ static void jumptothere (CompileState *compst, int instruction, int target) { if (instruction >= 0) setoffset(compst, instruction, target - instruction); } +/* +** Patch 'instruction' to jump to current position +*/ static void jumptohere (CompileState *compst, int instruction) { jumptothere(compst, instruction, gethere(compst)); } @@ -616,13 +638,13 @@ static void codebehind (CompileState *compst, TTree *tree) { /* ** Choice; optimizations: -** - when p1 is headfail -** - when first(p1) and first(p2) are disjoint; than +** - when p1 is headfail or +** when first(p1) and first(p2) are disjoint, than ** a character not in first(p1) cannot go to p1, and a character ** in first(p1) cannot go to p2 (at it is not in first(p2)). ** (The optimization is not valid if p1 accepts the empty string, ** as then there is no character at all...) -** - when p2 is empty and opt is true; a IPartialCommit can resuse +** - when p2 is empty and opt is true; a IPartialCommit can reuse ** the Choice already active in the stack. */ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, @@ -649,7 +671,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, } else { /* == - test(fail(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ + test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ int pcommit; int test = codetestset(compst, &cs1, e1); int pchoice = addoffsetinst(compst, IChoice); @@ -737,7 +759,7 @@ static void coderep (CompileState *compst, TTree *tree, int opt, /* L1: test (fail(p1)) -> L2;

; jmp L1; L2: */ int jmp; int test = codetestset(compst, &st, 0); - codegen(compst, tree, opt, test, fullset); + codegen(compst, tree, 0, test, fullset); jmp = addoffsetinst(compst, IJmp); jumptohere(compst, test); jumptothere(compst, jmp, test); @@ -863,7 +885,8 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, /* ** Main code-generation function: dispatch to auxiliar functions -** according to kind of tree +** according to kind of tree. ('needfollow' should return true +** only for consructions that use 'fl'.) */ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, const Charset *fl) { @@ -906,6 +929,7 @@ static void peephole (CompileState *compst) { Instruction *code = compst->p->code; int i; for (i = 0; i < compst->ncode; i += sizei(&code[i])) { + redo: switch (code[i].i.code) { case IChoice: case ICall: case ICommit: case IPartialCommit: case IBackCommit: case ITestChar: case ITestSet: @@ -927,8 +951,7 @@ static void peephole (CompileState *compst) { int fft = finallabel(code, ft); code[i] = code[ft]; /* jump becomes that instruction... */ jumptothere(compst, i, fft); /* but must correct its offset */ - i--; /* reoptimize its label */ - break; + goto redo; /* reoptimize its label */ } default: { jumptothere(compst, i, ft); /* optimize label */ @@ -947,13 +970,13 @@ static void peephole (CompileState *compst) { /* ** Compile a pattern */ -Instruction *compile (lua_State *L, Pattern *p) { +Instruction *lpeg_compile (lua_State *L, Pattern *p) { CompileState compst; compst.p = p; compst.ncode = 0; compst.L = L; - reallocprog(L, p, 2); /* minimum initial size */ + realloccode(L, p, 2); /* minimum initial size */ codegen(&compst, p->tree, 0, NOINST, fullset); addinstruction(&compst, IEnd, 0); - reallocprog(L, p, compst.ncode); /* set final size */ + realloccode(L, p, compst.ncode); /* set final size */ peephole(&compst); return p->code; } diff --git a/lpcode.h b/lpcode.h index 5c9d54f..c31ab1e 100644 --- a/lpcode.h +++ b/lpcode.h @@ -1,5 +1,5 @@ /* -** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $ +** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ */ #if !defined(lpcode_h) @@ -16,15 +16,23 @@ int checkaux (TTree *tree, int pred); int fixedlenx (TTree *tree, int count, int len); int hascaptures (TTree *tree); int lp_gc (lua_State *L); -Instruction *compile (lua_State *L, Pattern *p); -void reallocprog (lua_State *L, Pattern *p, int nsize); +Instruction *lpeg_compile (lua_State *L, Pattern *p); +void realloccode (lua_State *L, Pattern *p, int nsize); int sizei (const Instruction *i); #define PEnullable 0 #define PEnofail 1 +/* +** nofail(t) implies that 't' cannot fail with any input +*/ #define nofail(t) checkaux(t, PEnofail) + +/* +** (not nullable(t)) implies 't' cannot match without consuming +** something +*/ #define nullable(t) checkaux(t, PEnullable) #define fixedlen(t) fixedlenx(t, 0, 0) diff --git a/lpeg.html b/lpeg.html index 4747e30..c0a7f09 100644 --- a/lpeg.html +++ b/lpeg.html @@ -10,7 +10,7 @@ - +

@@ -22,7 +22,7 @@
LPeg
- Parsing Expression Grammars For Lua, version 0.12 + Parsing Expression Grammars For Lua, version 1.0
@@ -195,13 +195,16 @@

lpeg.version ()

lpeg.setmaxstack (max)

-Sets the maximum size for the backtrack stack used by LPeg to +Sets a limit for the size of the backtrack stack used by LPeg to track calls and choices. +(The default limit is 400.) Most well-written patterns need little backtrack levels and -therefore you seldom need to change this maximum; -but a few useful patterns may need more space. -Before changing this maximum you should try to rewrite your +therefore you seldom need to change this limit; +before changing it you should try to rewrite your pattern to avoid the need for extra space. +Nevertheless, a few useful patterns may overflow. +Also, with recursive grammars, +subjects with deep recursion may also need larger limits.

@@ -682,7 +685,8 @@

lpeg.Cb (name)

Creates a back capture. This pattern matches the empty string and produces the values produced by the most recent -group capture named name. +group capture named name +(where name can be any Lua value).

@@ -762,7 +766,8 @@

lpeg.Cg (patt [, name])

It groups all values returned by patt into a single capture. The group may be anonymous (if no name is given) -or named with the given name. +or named with the given name +(which can be any non-nil Lua value).

@@ -1375,13 +1380,13 @@

Arithmetic expressions

Download

LPeg -source code.

+source code.

License

-Copyright © 2013 Lua.org, PUC-Rio. +Copyright © 2007-2015 Lua.org, PUC-Rio.

Permission is hereby granted, free of charge, @@ -1419,7 +1424,7 @@

License

-$Id: lpeg.html,v 1.71 2013/04/11 19:17:41 roberto Exp $ +$Id: lpeg.html,v 1.75 2015/09/28 17:17:41 roberto Exp $

diff --git a/lpprint.c b/lpprint.c index 05fa648..174d168 100644 --- a/lpprint.c +++ b/lpprint.c @@ -1,5 +1,5 @@ /* -** $Id: lpprint.c,v 1.7 2013/04/12 16:29:49 roberto Exp $ +** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -52,7 +52,7 @@ static void printjmp (const Instruction *op, const Instruction *p) { } -static void printinst (const Instruction *op, const Instruction *p) { +void printinst (const Instruction *op, const Instruction *p) { const char *const names[] = { "any", "char", "set", "testany", "testchar", "testset", @@ -221,10 +221,10 @@ void printtree (TTree *tree, int ident) { void printktable (lua_State *L, int idx) { int n, i; - lua_getfenv(L, idx); + lua_getuservalue(L, idx); if (lua_isnil(L, -1)) /* no ktable? */ return; - n = lua_objlen(L, -1); + n = lua_rawlen(L, -1); printf("["); for (i = 1; i <= n; i++) { printf("%d = ", i); diff --git a/lpprint.h b/lpprint.h index e640f74..6329760 100644 --- a/lpprint.h +++ b/lpprint.h @@ -1,5 +1,5 @@ /* -** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ +** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $ */ @@ -18,6 +18,7 @@ void printtree (TTree *tree, int ident); void printktable (lua_State *L, int idx); void printcharset (const byte *st); void printcaplist (Capture *cap, Capture *limit); +void printinst (const Instruction *op, const Instruction *p); #else diff --git a/lptree.c b/lptree.c index a5dfeb4..f5a16dc 100644 --- a/lptree.c +++ b/lptree.c @@ -1,11 +1,12 @@ /* -** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $ +** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $ ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include #include #include +#include #include "lua.h" @@ -126,6 +127,189 @@ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { } + +/* +** {=================================================================== +** KTable manipulation +** +** - The ktable of a pattern 'p' can be shared by other patterns that +** contain 'p' and no other constants. Because of this sharing, we +** should not add elements to a 'ktable' unless it was freshly created +** for the new pattern. +** +** - The maximum index in a ktable is USHRT_MAX, because trees and +** patterns use unsigned shorts to store those indices. +** ==================================================================== +*/ + +/* +** Create a new 'ktable' to the pattern at the top of the stack. +*/ +static void newktable (lua_State *L, int n) { + lua_createtable(L, n, 0); /* create a fresh table */ + lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */ +} + + +/* +** Add element 'idx' to 'ktable' of pattern at the top of the stack; +** Return index of new element. +** If new element is nil, does not add it to table (as it would be +** useless) and returns 0, as ktable[0] is always nil. +*/ +static int addtoktable (lua_State *L, int idx) { + if (lua_isnil(L, idx)) /* nil value? */ + return 0; + else { + int n; + lua_getuservalue(L, -1); /* get ktable from pattern */ + n = lua_rawlen(L, -1); + if (n >= USHRT_MAX) + luaL_error(L, "too many Lua values in pattern"); + lua_pushvalue(L, idx); /* element to be added */ + lua_rawseti(L, -2, ++n); + lua_pop(L, 1); /* remove 'ktable' */ + return n; + } +} + + +/* +** Return the number of elements in the ktable at 'idx'. +** In Lua 5.2/5.3, default "environment" for patterns is nil, not +** a table. Treat it as an empty table. In Lua 5.1, assumes that +** the environment has no numeric indices (len == 0) +*/ +static int ktablelen (lua_State *L, int idx) { + if (!lua_istable(L, idx)) return 0; + else return lua_rawlen(L, idx); +} + + +/* +** Concatentate the contents of table 'idx1' into table 'idx2'. +** (Assume that both indices are negative.) +** Return the original length of table 'idx2' (or 0, if no +** element was added, as there is no need to correct any index). +*/ +static int concattable (lua_State *L, int idx1, int idx2) { + int i; + int n1 = ktablelen(L, idx1); + int n2 = ktablelen(L, idx2); + if (n1 + n2 > USHRT_MAX) + luaL_error(L, "too many Lua values in pattern"); + if (n1 == 0) return 0; /* nothing to correct */ + for (i = 1; i <= n1; i++) { + lua_rawgeti(L, idx1, i); + lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ + } + return n2; +} + + +/* +** When joining 'ktables', constants from one of the subpatterns must +** be renumbered; 'correctkeys' corrects their indices (adding 'n' +** to each of them) +*/ +static void correctkeys (TTree *tree, int n) { + if (n == 0) return; /* no correction? */ + tailcall: + switch (tree->tag) { + case TOpenCall: case TCall: case TRunTime: case TRule: { + if (tree->key > 0) + tree->key += n; + break; + } + case TCapture: { + if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) + tree->key += n; + break; + } + default: break; + } + switch (numsiblings[tree->tag]) { + case 1: /* correctkeys(sib1(tree), n); */ + tree = sib1(tree); goto tailcall; + case 2: + correctkeys(sib1(tree), n); + tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ + default: assert(numsiblings[tree->tag] == 0); break; + } +} + + +/* +** Join the ktables from p1 and p2 the ktable for the new pattern at the +** top of the stack, reusing them when possible. +*/ +static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { + int n1, n2; + lua_getuservalue(L, p1); /* get ktables */ + lua_getuservalue(L, p2); + n1 = ktablelen(L, -2); + n2 = ktablelen(L, -1); + if (n1 == 0 && n2 == 0) /* are both tables empty? */ + lua_pop(L, 2); /* nothing to be done; pop tables */ + else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */ + lua_pop(L, 1); /* pop 2nd table */ + lua_setuservalue(L, -2); /* set 1st ktable into new pattern */ + } + else if (n1 == 0) { /* first table is empty? */ + lua_setuservalue(L, -3); /* set 2nd table into new pattern */ + lua_pop(L, 1); /* pop 1st table */ + } + else { + lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ + /* stack: new p; ktable p1; ktable p2; new ktable */ + concattable(L, -3, -1); /* from p1 into new ktable */ + concattable(L, -2, -1); /* from p2 into new ktable */ + lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */ + lua_pop(L, 2); /* pop other ktables */ + correctkeys(t2, n1); /* correction for indices from p2 */ + } +} + + +/* +** copy 'ktable' of element 'idx' to new tree (on top of stack) +*/ +static void copyktable (lua_State *L, int idx) { + lua_getuservalue(L, idx); + lua_setuservalue(L, -2); +} + + +/* +** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' +** from tree at the top of the stack, and correct corresponding +** tree. +*/ +static void mergektable (lua_State *L, int idx, TTree *stree) { + int n; + lua_getuservalue(L, -1); /* get ktables */ + lua_getuservalue(L, idx); + n = concattable(L, -1, -2); + lua_pop(L, 2); /* remove both ktables */ + correctkeys(stree, n); +} + + +/* +** Create a new 'ktable' to the pattern at the top of the stack, adding +** all elements from pattern 'p' (if not 0) plus element 'idx' to it. +** Return index of new element. +*/ +static int addtonewktable (lua_State *L, int p, int idx) { + newktable(L, 1); + if (p) + mergektable(L, p, NULL); + return addtoktable(L, idx); +} + +/* }====================================================== */ + + /* ** {====================================================== ** Tree generation @@ -155,7 +339,7 @@ static Pattern *getpattern (lua_State *L, int idx) { static int getsize (lua_State *L, int idx) { - return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; + return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; } @@ -168,12 +352,16 @@ static TTree *gettree (lua_State *L, int idx, int *len) { /* -** create a pattern +** create a pattern. Set its uservalue (the 'ktable') equal to its +** metatable. (It could be any empty sequence; the metatable is at +** hand here, so we use it.) */ static TTree *newtree (lua_State *L, int len) { size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); Pattern *p = (Pattern *)lua_newuserdata(L, size); luaL_getmetatable(L, PATTERN_T); + lua_pushvalue(L, -1); + lua_setuservalue(L, -3); lua_setmetatable(L, -2); p->code = NULL; p->codesize = 0; return p->tree; @@ -206,29 +394,6 @@ static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { } -/* -** Add element 'idx' to 'ktable' of pattern at the top of the stack; -** create new 'ktable' if necessary. Return index of new element. -*/ -static int addtoktable (lua_State *L, int idx) { - if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */ - return 0; - else { - int n; - lua_getfenv(L, -1); /* get ktable from pattern */ - n = lua_objlen(L, -1); - if (n == 0) { /* is it empty/non-existent? */ - lua_pop(L, 1); /* remove it */ - lua_createtable(L, 1, 0); /* create a fresh table */ - } - lua_pushvalue(L, idx); /* element to be added */ - lua_rawseti(L, -2, n + 1); - lua_setfenv(L, -2); /* set it as ktable for pattern */ - return n + 1; - } -} - - /* ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it @@ -304,7 +469,7 @@ static TTree *getpatt (lua_State *L, int idx, int *len) { case LUA_TFUNCTION: { tree = newtree(L, 2); tree->tag = TRunTime; - tree->key = addtoktable(L, idx); + tree->key = addtonewktable(L, 0, idx); sib1(tree)->tag = TTrue; break; } @@ -319,123 +484,6 @@ static TTree *getpatt (lua_State *L, int idx, int *len) { } -/* -** Return the number of elements in the ktable of pattern at 'idx'. -** In Lua 5.2, default "environment" for patterns is nil, not -** a table. Treat it as an empty table. In Lua 5.1, assumes that -** the environment has no numeric indices (len == 0) -*/ -static int ktablelen (lua_State *L, int idx) { - if (!lua_istable(L, idx)) return 0; - else return lua_objlen(L, idx); -} - - -/* -** Concatentate the contents of table 'idx1' into table 'idx2'. -** (Assume that both indices are negative.) -** Return the original length of table 'idx2' -*/ -static int concattable (lua_State *L, int idx1, int idx2) { - int i; - int n1 = ktablelen(L, idx1); - int n2 = ktablelen(L, idx2); - if (n1 == 0) return 0; /* nothing to correct */ - for (i = 1; i <= n1; i++) { - lua_rawgeti(L, idx1, i); - lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ - } - return n2; -} - - -/* -** Make a merge of ktables from p1 and p2 the ktable for the new -** pattern at the top of the stack. -*/ -static int joinktables (lua_State *L, int p1, int p2) { - int n1, n2; - lua_getfenv(L, p1); /* get ktables */ - lua_getfenv(L, p2); - n1 = ktablelen(L, -2); - n2 = ktablelen(L, -1); - if (n1 == 0 && n2 == 0) { /* are both tables empty? */ - lua_pop(L, 2); /* nothing to be done; pop tables */ - return 0; /* nothing to correct */ - } - if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */ - lua_pop(L, 1); /* pop 2nd table */ - lua_setfenv(L, -2); /* set 1st ktable into new pattern */ - return 0; /* nothing to correct */ - } - if (n1 == 0) { /* first table is empty? */ - lua_setfenv(L, -3); /* set 2nd table into new pattern */ - lua_pop(L, 1); /* pop 1st table */ - return 0; /* nothing to correct */ - } - else { - lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ - /* stack: new p; ktable p1; ktable p2; new ktable */ - concattable(L, -3, -1); /* from p1 into new ktable */ - concattable(L, -2, -1); /* from p2 into new ktable */ - lua_setfenv(L, -4); /* new ktable becomes p env */ - lua_pop(L, 2); /* pop other ktables */ - return n1; /* correction for indices from p2 */ - } -} - - -static void correctkeys (TTree *tree, int n) { - if (n == 0) return; /* no correction? */ - tailcall: - switch (tree->tag) { - case TOpenCall: case TCall: case TRunTime: case TRule: { - if (tree->key > 0) - tree->key += n; - break; - } - case TCapture: { - if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) - tree->key += n; - break; - } - default: break; - } - switch (numsiblings[tree->tag]) { - case 1: /* correctkeys(sib1(tree), n); */ - tree = sib1(tree); goto tailcall; - case 2: - correctkeys(sib1(tree), n); - tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ - default: assert(numsiblings[tree->tag] == 0); break; - } -} - - -/* -** copy 'ktable' of element 'idx' to new tree (on top of stack) -*/ -static void copyktable (lua_State *L, int idx) { - lua_getfenv(L, idx); - lua_setfenv(L, -2); -} - - -/* -** merge 'ktable' from rule at stack index 'idx' into 'ktable' -** from tree at the top of the stack, and correct corresponding -** tree. -*/ -static void mergektable (lua_State *L, int idx, TTree *rule) { - int n; - lua_getfenv(L, -1); /* get ktables */ - lua_getfenv(L, idx); - n = concattable(L, -1, -2); - lua_pop(L, 2); /* remove both ktables */ - correctkeys(rule, n); -} - - /* ** create a new tree, whith a new root and one sibling. ** Sibling must be on the Lua stack, at index 1. @@ -464,7 +512,7 @@ static TTree *newroot2sib (lua_State *L, int tag) { tree->u.ps = 1 + s1; memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); - correctkeys(sib2(tree), joinktables(L, 1, 2)); + joinktables(L, 1, sib2(tree), 2); return tree; } @@ -524,8 +572,8 @@ static int lp_choice (lua_State *L) { */ static int lp_star (lua_State *L) { int size1; - int n = luaL_checkint(L, 2); - TTree *tree1 = gettree(L, 1, &size1); + int n = (int)luaL_checkinteger(L, 2); + TTree *tree1 = getpatt(L, 1, &size1); if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ TTree *tree = newtree(L, (n + 1) * (size1 + 1)); if (nullable(tree1)) @@ -593,7 +641,7 @@ static int lp_sub (lua_State *L) { sib1(tree)->tag = TNot; /* ...not... */ memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ - correctkeys(sib1(tree), joinktables(L, 1, 2)); + joinktables(L, 1, sib1(tree), 2); } return 1; } @@ -634,8 +682,8 @@ static int lp_behind (lua_State *L) { TTree *tree; TTree *tree1 = getpatt(L, 1, NULL); int n = fixedlen(tree1); + luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length"); luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); - luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); tree = newroot1sib(L, TBehind); tree->u.n = n; @@ -649,7 +697,7 @@ static int lp_behind (lua_State *L) { static int lp_V (lua_State *L) { TTree *tree = newleaf(L, TOpenCall); luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); - tree->key = addtoktable(L, 1); + tree->key = addtonewktable(L, 0, 1); return 1; } @@ -662,7 +710,7 @@ static int lp_V (lua_State *L) { static int capture_aux (lua_State *L, int cap, int labelidx) { TTree *tree = newroot1sib(L, TCapture); tree->cap = cap; - tree->key = addtoktable(L, labelidx); + tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); return 1; } @@ -670,10 +718,9 @@ static int capture_aux (lua_State *L, int cap, int labelidx) { /* ** Fill a tree with an empty capture, using an empty (TTrue) sibling. */ -static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { +static TTree *auxemptycap (TTree *tree, int cap) { tree->tag = TCapture; tree->cap = cap; - tree->key = addtoktable(L, idx); sib1(tree)->tag = TTrue; return tree; } @@ -682,8 +729,18 @@ static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { /* ** Create a tree for an empty capture */ -static TTree *newemptycap (lua_State *L, int cap, int idx) { - return auxemptycap(L, newtree(L, 2), cap, idx); +static TTree *newemptycap (lua_State *L, int cap) { + return auxemptycap(newtree(L, 2), cap); +} + + +/* +** Create a tree for an empty capture with an associated Lua value +*/ +static TTree *newemptycapkey (lua_State *L, int cap, int idx) { + TTree *tree = auxemptycap(newtree(L, 2), cap); + tree->key = addtonewktable(L, 0, idx); + return tree; } @@ -722,10 +779,8 @@ static int lp_tablecapture (lua_State *L) { static int lp_groupcapture (lua_State *L) { if (lua_isnoneornil(L, 2)) return capture_aux(L, Cgroup, 0); - else { - luaL_checkstring(L, 2); + else return capture_aux(L, Cgroup, 2); - } } @@ -741,14 +796,14 @@ static int lp_simplecapture (lua_State *L) { static int lp_poscapture (lua_State *L) { - newemptycap(L, Cposition, 0); + newemptycap(L, Cposition); return 1; } static int lp_argcapture (lua_State *L) { - int n = luaL_checkint(L, 1); - TTree *tree = newemptycap(L, Carg, 0); + int n = (int)luaL_checkinteger(L, 1); + TTree *tree = newemptycap(L, Carg); tree->key = n; luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); return 1; @@ -756,8 +811,8 @@ static int lp_argcapture (lua_State *L) { static int lp_backref (lua_State *L) { - luaL_checkstring(L, 1); - newemptycap(L, Cbackref, 1); + luaL_checkany(L, 1); + newemptycapkey(L, Cbackref, 1); return 1; } @@ -771,9 +826,10 @@ static int lp_constcapture (lua_State *L) { if (n == 0) /* no values? */ newleaf(L, TTrue); /* no capture */ else if (n == 1) - newemptycap(L, Cconst, 1); /* single constant capture */ + newemptycapkey(L, Cconst, 1); /* single constant capture */ else { /* create a group capture with all values */ TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); + newktable(L, n); /* create a 'ktable' for new tree */ tree->tag = TCapture; tree->cap = Cgroup; tree->key = 0; @@ -781,10 +837,12 @@ static int lp_constcapture (lua_State *L) { for (i = 1; i <= n - 1; i++) { tree->tag = TSeq; tree->u.ps = 3; /* skip TCapture and its sibling */ - auxemptycap(L, sib1(tree), Cconst, i); + auxemptycap(sib1(tree), Cconst); + sib1(tree)->key = addtoktable(L, i); tree = sib2(tree); } - auxemptycap(L, tree, Cconst, i); + auxemptycap(tree, Cconst); + tree->key = addtoktable(L, i); } return 1; } @@ -794,7 +852,7 @@ static int lp_matchtime (lua_State *L) { TTree *tree; luaL_checktype(L, 2, LUA_TFUNCTION); tree = newroot1sib(L, TRunTime); - tree->key = addtoktable(L, 2); + tree->key = addtonewktable(L, 1, 2); return 1; } @@ -851,7 +909,7 @@ static int collectrules (lua_State *L, int arg, int *totalsize) { lua_pushnil(L); /* prepare to traverse grammar table */ while (lua_next(L, arg) != 0) { if (lua_tonumber(L, -2) == 1 || - lua_equal(L, -2, postab + 1)) { /* initial rule? */ + lp_equal(L, -2, postab + 1)) { /* initial rule? */ lua_pop(L, 1); /* remove value (keep key for lua_next) */ continue; } @@ -928,36 +986,40 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { /* ** Check whether a rule can be left recursive; raise an error in that -** case; otherwise return 1 iff pattern is nullable. Assume ktable at -** the top of the stack. +** case; otherwise return 1 iff pattern is nullable. +** The return value is used to check sequences, where the second pattern +** is only relevant if the first is nullable. +** Parameter 'nb' works as an accumulator, to allow tail calls in +** choices. ('nb' true makes function returns true.) +** Assume ktable at the top of the stack. */ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, - int nullable) { + int nb) { tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: case TFalse: - return nullable; /* cannot pass from here */ + return nb; /* cannot pass from here */ case TTrue: case TBehind: /* look-behind cannot have calls */ return 1; case TNot: case TAnd: case TRep: /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ - tree = sib1(tree); nullable = 1; goto tailcall; + tree = sib1(tree); nb = 1; goto tailcall; case TCapture: case TRunTime: - /* return verifyrule(L, sib1(tree), passed, npassed); */ + /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ tree = sib1(tree); goto tailcall; case TCall: - /* return verifyrule(L, sib2(tree), passed, npassed); */ + /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ tree = sib2(tree); goto tailcall; - case TSeq: /* only check 2nd child if first is nullable */ + case TSeq: /* only check 2nd child if first is nb */ if (!verifyrule(L, sib1(tree), passed, npassed, 0)) - return nullable; - /* else return verifyrule(L, sib2(tree), passed, npassed); */ + return nb; + /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ tree = sib2(tree); goto tailcall; case TChoice: /* must check both children */ - nullable = verifyrule(L, sib1(tree), passed, npassed, nullable); - /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */ + nb = verifyrule(L, sib1(tree), passed, npassed, nb); + /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ tree = sib2(tree); goto tailcall; case TRule: if (npassed >= MAXRULES) @@ -1000,7 +1062,7 @@ static void verifygrammar (lua_State *L, TTree *grammar) { */ static void initialrulename (lua_State *L, TTree *grammar, int frule) { if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ - int n = lua_objlen(L, -1) + 1; /* index for name */ + int n = lua_rawlen(L, -1) + 1; /* index for name */ lua_pushvalue(L, frule); /* rule's name */ lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ sib1(grammar)->key = n; @@ -1016,9 +1078,9 @@ static TTree *newgrammar (lua_State *L, int arg) { luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); g->tag = TGrammar; g->u.n = n; lua_newtable(L); /* create 'ktable' */ - lua_setfenv(L, -2); + lua_setuservalue(L, -2); buildgrammar(L, g, frule, n); - lua_getfenv(L, -1); /* get 'ktable' for new tree */ + lua_getuservalue(L, -1); /* get 'ktable' for new tree */ finalfix(L, frule - 1, g, sib1(g)); initialrulename(L, g, frule); verifygrammar(L, g); @@ -1032,10 +1094,10 @@ static TTree *newgrammar (lua_State *L, int arg) { static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { - lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */ + lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ finalfix(L, 0, NULL, p->tree); lua_pop(L, 1); /* remove 'ktable' */ - return compile(L, p); + return lpeg_compile(L, p); } @@ -1043,7 +1105,7 @@ static int lp_printtree (lua_State *L) { TTree *tree = getpatt(L, 1, NULL); int c = lua_toboolean(L, 2); if (c) { - lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */ + lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */ finalfix(L, 0, NULL, tree); lua_pop(L, 1); /* remove 'ktable' */ } @@ -1086,9 +1148,11 @@ static size_t initposition (lua_State *L, size_t len) { ** Main match function */ static int lp_match (lua_State *L) { - Capture capture[INITCAPSIZE]; + Capture *capture = calloc(INITCAPSIZE, sizeof(Capture)); + assert(capture); const char *r; size_t l; + int rv; Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); const char *s = luaL_checklstring(L, SUBJIDX, &l); @@ -1096,13 +1160,15 @@ static int lp_match (lua_State *L) { int ptop = lua_gettop(L); lua_pushnil(L); /* initialize subscache */ lua_pushlightuserdata(L, capture); /* initialize caplistidx */ - lua_getfenv(L, 1); /* initialize penvidx */ + lua_getuservalue(L, 1); /* initialize penvidx */ r = match(L, s, s + i, s + l, code, capture, ptop); if (r == NULL) { lua_pushnil(L); return 1; } - return getcaptures(L, s, r, ptop); + rv = getcaptures(L, s, r, ptop); + free(capture); + return rv; } @@ -1113,8 +1179,12 @@ static int lp_match (lua_State *L) { ** ======================================================= */ +/* maximum limit for stack size */ +#define MAXLIM (INT_MAX / 100) + static int lp_setmax (lua_State *L) { - luaL_optinteger(L, 1, -1); + lua_Integer lim = luaL_checkinteger(L, 1); + luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range"); lua_settop(L, 1); lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); return 0; @@ -1138,8 +1208,7 @@ static int lp_type (lua_State *L) { int lp_gc (lua_State *L) { Pattern *p = getpattern(L, 1); - if (p->codesize > 0) - reallocprog(L, p, 0); + realloccode(L, p, 0); /* delete code block */ return 0; } @@ -1222,8 +1291,8 @@ int luaopen_lpeg (lua_State *L) { luaL_newmetatable(L, PATTERN_T); lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - luaL_register(L, NULL, metareg); - luaL_register(L, "lpeg", pattreg); + luaL_setfuncs(L, metareg, 0); + luaL_newlib(L, pattreg); lua_pushvalue(L, -1); lua_setfield(L, -3, "__index"); return 1; diff --git a/lptypes.h b/lptypes.h index 7ace545..5eb7987 100644 --- a/lptypes.h +++ b/lptypes.h @@ -1,7 +1,7 @@ /* -** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $ +** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $ ** LPeg - PEG pattern matching for Lua -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license) ** written by Roberto Ierusalimschy */ @@ -19,7 +19,7 @@ #include "lua.h" -#define VERSION "0.12" +#define VERSION "1.0.0" #define PATTERN_T "lpeg-pattern" @@ -27,36 +27,38 @@ /* -** compatibility with Lua 5.2 +** compatibility with Lua 5.1 */ -#if (LUA_VERSION_NUM == 502) +#if (LUA_VERSION_NUM == 501) -#undef lua_equal -#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) +#define lp_equal lua_equal -#undef lua_getfenv -#define lua_getfenv lua_getuservalue -#undef lua_setfenv -#define lua_setfenv lua_setuservalue +#define lua_getuservalue lua_getfenv +#define lua_setuservalue lua_setfenv -#undef lua_objlen -#define lua_objlen lua_rawlen +#define lua_rawlen lua_objlen -#undef luaL_register -#define luaL_register(L,n,f) \ - { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); } +#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f) +#define luaL_newlib(L,f) luaL_register(L,"lpeg",f) #endif +#if !defined(lp_equal) +#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) +#endif + + /* default maximum size for call/backtrack stack */ #if !defined(MAXBACK) -#define MAXBACK 100 +#define MAXBACK 400 #endif /* maximum number of rules in a grammar */ -#define MAXRULES 200 +#if !defined(MAXRULES) +#define MAXRULES 1000 +#endif diff --git a/lpvm.c b/lpvm.c index cd893ed..b29d21f 100644 --- a/lpvm.c +++ b/lpvm.c @@ -1,10 +1,11 @@ /* -** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $ +** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ #include #include +#include #include "lua.h" @@ -18,7 +19,7 @@ /* initial size for call/backtrack stack */ #if !defined(INITBACK) -#define INITBACK 100 +#define INITBACK MAXBACK #endif @@ -70,7 +71,7 @@ static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { max = lua_tointeger(L, -1); /* maximum allowed size */ lua_pop(L, 1); if (n >= max) /* already at maximum size? */ - luaL_error(L, "too many pending calls/choices"); + luaL_error(L, "backtrack stack overflow (current limit is %d)", max); newn = 2 * n; /* new size */ if (newn > max) newn = max; newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); @@ -146,7 +147,8 @@ static int removedyncap (lua_State *L, Capture *capture, */ const char *match (lua_State *L, const char *o, const char *s, const char *e, Instruction *op, Capture *capture, int ptop) { - Stack stackbase[INITBACK]; + Stack *stackbase = calloc(INITBACK, sizeof(Stack)); + assert(stackbase); Stack *stacklimit = stackbase + INITBACK; Stack *stack = stackbase; /* point to first empty slot in stack */ int capsize = INITCAPSIZE; @@ -168,10 +170,12 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, assert(stack == getstackbase(L, ptop) + 1); capture[captop].kind = Cclose; capture[captop].s = NULL; + free(stackbase); return s; } case IGiveup: { assert(stack == getstackbase(L, ptop)); + free(stackbase); return NULL; } case IRet: { @@ -345,7 +349,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, p++; continue; } - default: assert(0); return NULL; + default: assert(0); free(stackbase); return NULL; } } } diff --git a/lpvm.h b/lpvm.h index 6a2a558..757b9e1 100644 --- a/lpvm.h +++ b/lpvm.h @@ -1,5 +1,5 @@ /* -** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $ +** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $ */ #if !defined(lpvm_h) @@ -49,14 +49,9 @@ typedef union Instruction { } Instruction; -int getposition (lua_State *L, int t, int i); void printpatt (Instruction *p, int n); const char *match (lua_State *L, const char *o, const char *s, const char *e, Instruction *op, Capture *capture, int ptop); -int verify (lua_State *L, Instruction *op, const Instruction *p, - Instruction *e, int postable, int rule); -void checkrule (lua_State *L, Instruction *op, int from, int to, - int postable, int rule); #endif diff --git a/makefile b/makefile index 57a18fb..7a8463e 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,5 @@ LIBNAME = lpeg -LUADIR = /usr/include/lua5.1/ +LUADIR = ../lua/ COPT = -O2 # COPT = -DLPEG_DEBUG -g @@ -22,7 +22,7 @@ CWARNS = -Wall -Wextra -pedantic \ # -Wunreachable-code \ -CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR) -fPIC +CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC CC = gcc FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o diff --git a/re.html b/re.html index 4717ec2..d0d9744 100644 --- a/re.html +++ b/re.html @@ -10,7 +10,7 @@ - +
@@ -296,7 +296,7 @@

Abstract Syntax Trees

a tag field telling what non terminal that table represents. We can add such a tag using -named group captures: +named group captures:

 x = re.compile[[
@@ -450,7 +450,7 @@ 

Patterns

License

-Copyright © 2008-2010 Lua.org, PUC-Rio. +Copyright © 2008-2015 Lua.org, PUC-Rio.

Permission is hereby granted, free of charge, @@ -488,7 +488,7 @@

License

-$Id: re.html,v 1.21 2013/03/28 20:43:30 roberto Exp $ +$Id: re.html,v 1.23 2015/09/28 17:17:41 roberto Exp $

diff --git a/test.lua b/test.lua index 1d107ca..017a3ab 100755 --- a/test.lua +++ b/test.lua @@ -1,6 +1,6 @@ -#!/usr/bin/env lua5.1 +#!/usr/bin/env lua --- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $ +-- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ -- require"strict" -- just to be pedantic @@ -16,9 +16,6 @@ local unpack = rawget(table, "unpack") or unpack local loadstring = rawget(_G, "loadstring") or load --- most tests here do not need much stack space -m.setmaxstack(5) - local any = m.P(1) local space = m.S" \t\n"^0 @@ -170,8 +167,8 @@ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} checkeq(a, {"123", "d"}) -a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")} -checkeq(a, {"123"}) +-- bug in LPeg 0.12 (nil value does not create a 'ktable') +assert(m.match(m.Cc(nil), "") == nil) a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} checkeq(a, {"abcd", "l"}) @@ -194,6 +191,16 @@ checkeq(a, {1, 5}) t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) +-- bug in 0.12 ('hascapture' did not check for captures inside a rule) +do + local pat = m.P{ + 'S'; + S1 = m.C('abc') + 3, + S = #m.V('S1') -- rule has capture, but '#' must ignore it + } + assert(pat:match'abc' == 1) +end + -- test for small capture boundary for i = 250,260 do @@ -201,9 +208,8 @@ for i = 250,260 do assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) end - -- tests for any*n and any*-n -for n = 1, 550 do +for n = 1, 550, 13 do local x_1 = string.rep('x', n - 1) local x = x_1 .. 'a' assert(not m.P(n):match(x_1)) @@ -282,6 +288,13 @@ assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3) p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1 assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21) + + +-- bug in 0.12.2 +-- p = { ('ab' ('c' 'ef'?)*)? } +p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1) +s = "abcefccefc" +assert(s == p:match(s)) pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510" @@ -343,10 +356,16 @@ checkeq(t, {hi = 10, ho = 20}) t = p:match'abc' checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) +-- non-string group names +p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io)) +t = p:match('abcdefghij') +assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c') + -- test for error messages -local function checkerr (msg, ...) - assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...)))) +local function checkerr (msg, f, ...) + local st, err = pcall(f, ...) + assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) end checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") @@ -370,6 +389,32 @@ p = {'a', } checkerr("rule 'a' may be left recursive", m.match, p, "a") +-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) +-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 +-- that is optimized to ICommit L1 + +p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' } +assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') + + +do + -- large dynamic Cc + local lim = 2^16 - 1 + local c = 0 + local function seq (n) + if n == 1 then c = c + 1; return m.Cc(c) + else + local m = math.floor(n / 2) + return seq(m) * seq(n - m) + end + end + p = m.Ct(seq(lim)) + t = p:match('') + assert(t[lim] == lim) + checkerr("too many", function () p = p / print end) + checkerr("too many", seq, lim + 1) +end + -- tests for non-pattern as arguments to pattern functions @@ -488,7 +533,10 @@ assert(m.match(1 * m.B(1), 'a') == 2) assert(m.match(-m.B(1), 'a') == 1) assert(m.match(m.B(250), string.rep('a', 250)) == nil) assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) -assert(not pcall(m.B, 260)) + +-- look-behind with an open call +checkerr("pattern may not have fixed length", m.B, m.V'S1') +checkerr("too long to look behind", m.B, 260) B = #letter * -m.B(letter) + -letter * m.B(letter) x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) @@ -555,18 +603,18 @@ assert(not p:match(string.rep("011", 10001))) -- this grammar does need backtracking info. local lim = 10000 p = m.P{ '0' * m.V(1) + '0' } -assert(not pcall(m.match, p, string.rep("0", lim))) +checkerr("stack overflow", m.match, p, string.rep("0", lim)) m.setmaxstack(2*lim) -assert(not pcall(m.match, p, string.rep("0", lim))) +checkerr("stack overflow", m.match, p, string.rep("0", lim)) m.setmaxstack(2*lim + 4) -assert(pcall(m.match, p, string.rep("0", lim))) +assert(m.match(p, string.rep("0", lim)) == lim + 1) -- this repetition should not need stack space (only the call does) p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } m.setmaxstack(200) assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) -m.setmaxstack(5) -- restore original limit +m.setmaxstack(100) -- restore low limit -- tests for optional start position assert(m.match("a", "abc", 1)) @@ -588,10 +636,10 @@ print("+") -- tests for argument captures -assert(not pcall(m.Carg, 0)) -assert(not pcall(m.Carg, -1)) -assert(not pcall(m.Carg, 2^18)) -assert(not pcall(m.match, m.Carg(1), 'a', 1)) +checkerr("invalid argument", m.Carg, 0) +checkerr("invalid argument", m.Carg, -1) +checkerr("invalid argument", m.Carg, 2^18) +checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1) assert(m.match(m.Carg(1), 'a', 1, print) == print) x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} checkeq(x, {10, 20}) @@ -644,14 +692,16 @@ assert(m.match(p, "aaaa") == 5) assert(m.match(p, "abaa") == 2) assert(not m.match(p, "baaa")) -assert(not pcall(m.match, function () return 2^20 end, s)) -assert(not pcall(m.match, function () return 0 end, s)) -assert(not pcall(m.match, function (s, i) return i - 1 end, s)) -assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s)) +checkerr("invalid position", m.match, function () return 2^20 end, s) +checkerr("invalid position", m.match, function () return 0 end, s) +checkerr("invalid position", m.match, function (s, i) return i - 1 end, s) +checkerr("invalid position", m.match, + m.P(1)^0 * function (_, i) return i - 1 end, s) assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) -assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s)) +checkerr("invalid position", m.match, + m.P(1)^0 * function (_, i) return i + 1 end, s) assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) -assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)) +checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s) assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == string.len(s) + 1) @@ -696,6 +746,10 @@ t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")} t1 = {1,1,nil,nil,4,nil,3,nil,nil} for i=1,10 do assert(t[i] == t1[i]) end +-- bug in 0.12.2: ktable with only nil could be eliminated when joining +-- with a pattern without ktable +assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil) + t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")} checkeq(t, {"a", "ax", "b", "bx", "c", "cx"}) @@ -734,9 +788,9 @@ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == "411 - abc ") -assert(pcall(m.match, m.P(1)/"%0", "abc")) -assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range -assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range +assert(m.match(m.P(1)/"%0", "abc") == "a") +checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc") +checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc") p = m.C(1) p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" @@ -754,7 +808,7 @@ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" assert(p:match'x' == 'alo - x - alo') -assert(not pcall(m.match, m.Cc(true) / "%1", "a")) +checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a") -- long strings for string capture l = 10000 @@ -782,35 +836,37 @@ checkeq(t, {a="b", c="du", xux="yuy"}) -- errors in accumulator capture --- very long match (forces fold to be a pair open-close) producing with -- no initial capture -assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600))) +checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa') +-- no initial capture (very long match forces fold to be a pair open-close) +checkerr("no initial value", m.match, m.Cf(m.P(500), print), + string.rep('a', 600)) -- nested capture produces no initial value -assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo")) +checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo") -- tests for loop checker -local function haveloop (p) - assert(not pcall(function (p) return p^0 end, m.P(p))) +local function isnullable (p) + checkerr("may accept empty string", function (p) return p^0 end, m.P(p)) end -haveloop(m.P("x")^-4) +isnullable(m.P("x")^-4) assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) -haveloop("") -haveloop(m.P("x")^0) -haveloop(m.P("x")^-1) -haveloop(m.P("x") + 1 + 2 + m.P("a")^-1) -haveloop(-m.P("ab")) -haveloop(- -m.P("ab")) -haveloop(# #(m.P("ab") + "xy")) -haveloop(- #m.P("ab")^0) -haveloop(# -m.P("ab")^1) -haveloop(#m.V(3)) -haveloop(m.V(3) + m.V(1) + m.P('a')^-1) -haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) +isnullable("") +isnullable(m.P("x")^0) +isnullable(m.P("x")^-1) +isnullable(m.P("x") + 1 + 2 + m.P("a")^-1) +isnullable(-m.P("ab")) +isnullable(- -m.P("ab")) +isnullable(# #(m.P("ab") + "xy")) +isnullable(- #m.P("ab")^0) +isnullable(# -m.P("ab")^1) +isnullable(#m.V(3)) +isnullable(m.V(3) + m.V(1) + m.P('a')^-1) +isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") == 3) assert(m.match(m.P""^-3, "a") == 1) @@ -894,13 +950,20 @@ print"+" -- tests for back references -assert(not pcall(m.match, m.Cb('x'), '')) -assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')) +checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') +checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) t = p:match("ab") checkeq(t, {"a", "b"}) +p = m.P(true) +for i = 1, 10 do p = p * m.Cg(1, i) end +for i = 1, 10 do + local p = p * m.Cb(i) + assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i)) +end + t = {} function foo (p) t[#t + 1] = p; return p .. "x" end @@ -1370,8 +1433,7 @@ assert(rev:match"0123456789" == "9876543210") -- testing error messages in re local function errmsg (p, err) - local s, msg = pcall(re.compile, p) - assert(not s and string.find(msg, err)) + checkerr(err, re.compile, p) end errmsg('aaaa', "rule 'aaaa'")