== behind n; (where n = fixedlen(p))
-*/
-static void codebehind (CompileState *compst, TTree *tree) {
- if (tree->u.n > 0)
- addinstruction(compst, IBehind, tree->u.n);
- codegen(compst, sib1(tree), 0, NOINST, fullset);
-}
-
-
-/*
-** Choice; optimizations:
-** - when p1 is headfail or
-** when first(p1) and first(p2) are disjoint, than
-** a character not in first(p1) cannot go to p1, and a character
-** in first(p1) cannot go to p2 (at it is not in first(p2)).
-** (The optimization is not valid if p1 accepts the empty string,
-** as then there is no character at all...)
-** - when p2 is empty and opt is true; a IPartialCommit can reuse
-** the Choice already active in the stack.
-*/
-static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
- const Charset *fl) {
- int emptyp2 = (p2->tag == TTrue);
- Charset cs1, cs2;
- int e1 = getfirst(p1, fullset, &cs1);
- if (headfail(p1) ||
- (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
- /* == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
- int test = codetestset(compst, &cs1, 0);
- int jmp = NOINST;
- codegen(compst, p1, 0, test, fl);
- if (!emptyp2)
- jmp = addoffsetinst(compst, IJmp);
- jumptohere(compst, test);
- codegen(compst, p2, opt, NOINST, fl);
- jumptohere(compst, jmp);
- }
- else if (opt && emptyp2) {
- /* p1? == IPartialCommit; p1 */
- jumptohere(compst, addoffsetinst(compst, IPartialCommit));
- codegen(compst, p1, 1, NOINST, fullset);
- }
- else {
- /* ==
- test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */
- int pcommit;
- int test = codetestset(compst, &cs1, e1);
- int pchoice = addoffsetinst(compst, IChoice);
- codegen(compst, p1, emptyp2, test, fullset);
- pcommit = addoffsetinst(compst, ICommit);
- jumptohere(compst, pchoice);
- jumptohere(compst, test);
- codegen(compst, p2, opt, NOINST, fl);
- jumptohere(compst, pcommit);
- }
-}
-
-
-/*
-** And predicate
-** optimization: fixedlen(p) = n ==> <&p> == ; behind n
-** (valid only when 'p' has no captures)
-*/
-static void codeand (CompileState *compst, TTree *tree, int tt) {
- int n = fixedlen(tree);
- if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
- codegen(compst, tree, 0, tt, fullset);
- if (n > 0)
- addinstruction(compst, IBehind, n);
- }
- else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
- int pcommit;
- int pchoice = addoffsetinst(compst, IChoice);
- codegen(compst, tree, 0, tt, fullset);
- pcommit = addoffsetinst(compst, IBackCommit);
- jumptohere(compst, pchoice);
- addinstruction(compst, IFail, 0);
- jumptohere(compst, pcommit);
- }
-}
-
-
-/*
-** Captures: if pattern has fixed (and not too big) length, and it
-** has no nested captures, use a single IFullCapture instruction
-** after the match; otherwise, enclose the pattern with OpenCapture -
-** CloseCapture.
-*/
-static void codecapture (CompileState *compst, TTree *tree, int tt,
- const Charset *fl) {
- int len = fixedlen(sib1(tree));
- if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
- codegen(compst, sib1(tree), 0, tt, fl);
- addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
- }
- else {
- addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
- codegen(compst, sib1(tree), 0, tt, fl);
- addinstcap(compst, ICloseCapture, Cclose, 0, 0);
- }
-}
-
-
-static void coderuntime (CompileState *compst, TTree *tree, int tt) {
- addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
- codegen(compst, sib1(tree), 0, tt, fullset);
- addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
-}
-
-
-/*
-** Repetion; optimizations:
-** When pattern is a charset, can use special instruction ISpan.
-** When pattern is head fail, or if it starts with characters that
-** are disjoint from what follows the repetions, a simple test
-** is enough (a fail inside the repetition would backtrack to fail
-** again in the following pattern, so there is no need for a choice).
-** When 'opt' is true, the repetion can reuse the Choice already
-** active in the stack.
-*/
-static void coderep (CompileState *compst, TTree *tree, int opt,
- const Charset *fl) {
- Charset st;
- if (tocharset(tree, &st)) {
- addinstruction(compst, ISpan, 0);
- addcharset(compst, st.cs);
- }
- else {
- int e1 = getfirst(tree, fullset, &st);
- if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
- /* L1: test (fail(p1)) -> L2;
; jmp L1; L2: */
- int jmp;
- int test = codetestset(compst, &st, 0);
- codegen(compst, tree, 0, test, fullset);
- jmp = addoffsetinst(compst, IJmp);
- jumptohere(compst, test);
- jumptothere(compst, jmp, test);
- }
- else {
- /* test(fail(p1)) -> L2; choice L2; L1:
; partialcommit L1; L2: */
- /* or (if 'opt'): partialcommit L1; L1:
; partialcommit L1; */
- int commit, l2;
- int test = codetestset(compst, &st, e1);
- int pchoice = NOINST;
- if (opt)
- jumptohere(compst, addoffsetinst(compst, IPartialCommit));
- else
- pchoice = addoffsetinst(compst, IChoice);
- l2 = gethere(compst);
- codegen(compst, tree, 0, NOINST, fullset);
- commit = addoffsetinst(compst, IPartialCommit);
- jumptothere(compst, commit, l2);
- jumptohere(compst, pchoice);
- jumptohere(compst, test);
- }
- }
-}
-
-
-/*
-** Not predicate; optimizations:
-** In any case, if first test fails, 'not' succeeds, so it can jump to
-** the end. If pattern is headfail, that is all (it cannot fail
-** in other parts); this case includes 'not' of simple sets. Otherwise,
-** use the default code (a choice plus a failtwice).
-*/
-static void codenot (CompileState *compst, TTree *tree) {
- Charset st;
- int e = getfirst(tree, fullset, &st);
- int test = codetestset(compst, &st, e);
- if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
- addinstruction(compst, IFail, 0);
- else {
- /* test(fail(p))-> L1; choice L1;
; failtwice; L1: */
- int pchoice = addoffsetinst(compst, IChoice);
- codegen(compst, tree, 0, NOINST, fullset);
- addinstruction(compst, IFailTwice, 0);
- jumptohere(compst, pchoice);
- }
- jumptohere(compst, test);
-}
-
-
-/*
-** change open calls to calls, using list 'positions' to find
-** correct offsets; also optimize tail calls
-*/
-static void correctcalls (CompileState *compst, int *positions,
- int from, int to) {
- int i;
- Instruction *code = compst->p->code;
- for (i = from; i < to; i += sizei(&code[i])) {
- if (code[i].i.code == IOpenCall) {
- int n = code[i].i.key; /* rule number */
- int rule = positions[n]; /* rule position */
- assert(rule == from || code[rule - 1].i.code == IRet);
- if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
- code[i].i.code = IJmp; /* tail call */
- else
- code[i].i.code = ICall;
- jumptothere(compst, i, rule); /* call jumps to respective rule */
- }
- }
- assert(i == to);
-}
-
-
-/*
-** Code for a grammar:
-** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
-*/
-static void codegrammar (CompileState *compst, TTree *grammar) {
- int positions[MAXRULES];
- int rulenumber = 0;
- TTree *rule;
- int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
- int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
- int start = gethere(compst); /* here starts the initial rule */
- jumptohere(compst, firstcall);
- for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
- positions[rulenumber++] = gethere(compst); /* save rule position */
- codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
- addinstruction(compst, IRet, 0);
- }
- assert(rule->tag == TTrue);
- jumptohere(compst, jumptoend);
- correctcalls(compst, positions, start, gethere(compst));
-}
-
-
-static void codecall (CompileState *compst, TTree *call) {
- int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
- getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
- assert(sib2(call)->tag == TRule);
-}
-
-
-/*
-** Code first child of a sequence
-** (second child is called in-place to allow tail call)
-** Return 'tt' for second child
-*/
-static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
- int tt, const Charset *fl) {
- if (needfollow(p1)) {
- Charset fl1;
- getfirst(p2, fl, &fl1); /* p1 follow is p2 first */
- codegen(compst, p1, 0, tt, &fl1);
- }
- else /* use 'fullset' as follow */
- codegen(compst, p1, 0, tt, fullset);
- if (fixedlen(p1) != 0) /* can 'p1' consume anything? */
- return NOINST; /* invalidate test */
- else return tt; /* else 'tt' still protects sib2 */
-}
-
-
-/*
-** Main code-generation function: dispatch to auxiliar functions
-** according to kind of tree. ('needfollow' should return true
-** only for consructions that use 'fl'.)
-*/
-static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
- const Charset *fl) {
- tailcall:
- switch (tree->tag) {
- case TChar: codechar(compst, tree->u.n, tt); break;
- case TAny: addinstruction(compst, IAny, 0); break;
- case TSet: codecharset(compst, treebuffer(tree), tt); break;
- case TTrue: break;
- case TFalse: addinstruction(compst, IFail, 0); break;
- case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
- case TRep: coderep(compst, sib1(tree), opt, fl); break;
- case TBehind: codebehind(compst, tree); break;
- case TNot: codenot(compst, sib1(tree)); break;
- case TAnd: codeand(compst, sib1(tree), tt); break;
- case TCapture: codecapture(compst, tree, tt, fl); break;
- case TRunTime: coderuntime(compst, tree, tt); break;
- case TGrammar: codegrammar(compst, tree); break;
- case TCall: codecall(compst, tree); break;
- case TSeq: {
- tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */
- /* codegen(compst, p2, opt, tt, fl); */
- tree = sib2(tree); goto tailcall;
- }
- default: assert(0);
- }
-}
-
-
-/*
-** Optimize jumps and other jump-like instructions.
-** * Update labels of instructions with labels to their final
-** destinations (e.g., choice L1; ... L1: jmp L2: becomes
-** choice L2)
-** * Jumps to other instructions that do jumps become those
-** instructions (e.g., jump to return becomes a return; jump
-** to commit becomes a commit)
-*/
-static void peephole (CompileState *compst) {
- Instruction *code = compst->p->code;
- int i;
- for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
- redo:
- switch (code[i].i.code) {
- case IChoice: case ICall: case ICommit: case IPartialCommit:
- case IBackCommit: case ITestChar: case ITestSet:
- case ITestAny: { /* instructions with labels */
- jumptothere(compst, i, finallabel(code, i)); /* optimize label */
- break;
- }
- case IJmp: {
- int ft = finaltarget(code, i);
- switch (code[ft].i.code) { /* jumping to what? */
- case IRet: case IFail: case IFailTwice:
- case IEnd: { /* instructions with unconditional implicit jumps */
- code[i] = code[ft]; /* jump becomes that instruction */
- code[i + 1].i.code = IAny; /* 'no-op' for target position */
- break;
- }
- case ICommit: case IPartialCommit:
- case IBackCommit: { /* inst. with unconditional explicit jumps */
- int fft = finallabel(code, ft);
- code[i] = code[ft]; /* jump becomes that instruction... */
- jumptothere(compst, i, fft); /* but must correct its offset */
- goto redo; /* reoptimize its label */
- }
- default: {
- jumptothere(compst, i, ft); /* optimize label */
- break;
- }
- }
- break;
- }
- default: break;
- }
- }
- assert(code[i - 1].i.code == IEnd);
-}
-
-
-/*
-** Compile a pattern
-*/
-Instruction *compile (lua_State *L, Pattern *p) {
- CompileState compst;
- compst.p = p; compst.ncode = 0; compst.L = L;
- realloccode(L, p, 2); /* minimum initial size */
- codegen(&compst, p->tree, 0, NOINST, fullset);
- addinstruction(&compst, IEnd, 0);
- realloccode(L, p, compst.ncode); /* set final size */
- peephole(&compst);
- return p->code;
-}
-
-
-/* }====================================================== */
-
diff --git a/src/ext/lpeg/lpcode.h b/src/ext/lpeg/lpcode.h
deleted file mode 100644
index 2a5861e..0000000
--- a/src/ext/lpeg/lpcode.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $
-*/
-
-#if !defined(lpcode_h)
-#define lpcode_h
-
-#include "lua.h"
-
-#include "lptypes.h"
-#include "lptree.h"
-#include "lpvm.h"
-
-int tocharset (TTree *tree, Charset *cs);
-int checkaux (TTree *tree, int pred);
-int fixedlen (TTree *tree);
-int hascaptures (TTree *tree);
-int lp_gc (lua_State *L);
-Instruction *compile (lua_State *L, Pattern *p);
-void realloccode (lua_State *L, Pattern *p, int nsize);
-int sizei (const Instruction *i);
-
-
-#define PEnullable 0
-#define PEnofail 1
-
-/*
-** nofail(t) implies that 't' cannot fail with any input
-*/
-#define nofail(t) checkaux(t, PEnofail)
-
-/*
-** (not nullable(t)) implies 't' cannot match without consuming
-** something
-*/
-#define nullable(t) checkaux(t, PEnullable)
-
-
-
-#endif
diff --git a/src/ext/lpeg/lpeg-128.gif b/src/ext/lpeg/lpeg-128.gif
deleted file mode 100644
index bbf5e78..0000000
Binary files a/src/ext/lpeg/lpeg-128.gif and /dev/null differ
diff --git a/src/ext/lpeg/lpeg.html b/src/ext/lpeg/lpeg.html
deleted file mode 100644
index 5c9535f..0000000
--- a/src/ext/lpeg/lpeg.html
+++ /dev/null
@@ -1,1445 +0,0 @@
-
-
-
- LPeg - Parsing Expression Grammars For Lua
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
LPeg
-
- Parsing Expression Grammars For Lua, version 1.0
-
-
-
-
-
-
-
-
-
-
-
-
-
-LPeg is a new pattern-matching library for Lua,
-based on
-
-Parsing Expression Grammars (PEGs).
-This text is a reference manual for the library.
-For a more formal treatment of LPeg,
-as well as some discussion about its implementation,
-see
-
-A Text Pattern-Matching Tool based on Parsing Expression Grammars .
-(You may also be interested in my
-talk about LPeg
-given at the III Lua Workshop.)
-
-
-
-Following the Snobol tradition,
-LPeg defines patterns as first-class objects.
-That is, patterns are regular Lua values
-(represented by userdata).
-The library offers several functions to create
-and compose patterns.
-With the use of metamethods,
-several of these functions are provided as infix or prefix
-operators.
-On the one hand,
-the result is usually much more verbose than the typical
-encoding of patterns using the so called
-regular expressions
-(which typically are not regular expressions in the formal sense).
-On the other hand,
-first-class patterns allow much better documentation
-(as it is easy to comment the code,
-to break complex definitions in smaller parts, etc.)
-and are extensible,
-as we can define new functions to create and compose patterns.
-
-
-
-For a quick glance of the library,
-the following table summarizes its basic operations
-for creating patterns:
-
-
-Operator Description
-lpeg.P(string)
- Matches string
literally
-lpeg.P(n)
- Matches exactly n
characters
-lpeg.S(string)
- Matches any character in string
(Set)
-lpeg.R("xy ")
- Matches any character between x and y (Range)
-patt^n
- Matches at least n
repetitions of patt
-patt^-n
- Matches at most n
repetitions of patt
-patt1 * patt2
- Matches patt1
followed by patt2
-patt1 + patt2
- Matches patt1
or patt2
- (ordered choice)
-patt1 - patt2
- Matches patt1
if patt2
does not match
--patt
- Equivalent to ("" - patt)
-#patt
- Matches patt
but consumes no input
-lpeg.B(patt)
- Matches patt
behind the current position,
- consuming no input
-
-
-
As a very simple example,
-lpeg.R("09")^1
creates a pattern that
-matches a non-empty sequence of digits.
-As a not so simple example,
--lpeg.P(1)
-(which can be written as lpeg.P(-1)
,
-or simply -1
for operations expecting a pattern)
-matches an empty string only if it cannot match a single character;
-so, it succeeds only at the end of the subject.
-
-
-
-LPeg also offers the re
module ,
-which implements patterns following a regular-expression style
-(e.g., [09]+
).
-(This module is 260 lines of Lua code,
-and of course it uses LPeg to parse regular expressions and
-translate them to regular LPeg patterns.)
-
-
-
-
-
-
-
lpeg.match (pattern, subject [, init])
-
-The matching function.
-It attempts to match the given pattern against the subject string.
-If the match succeeds,
-returns the index in the subject of the first character after the match,
-or the captured values
-(if the pattern captured any value).
-
-
-
-An optional numeric argument init
makes the match
-start at that position in the subject string.
-As usual in Lua libraries,
-a negative value counts from the end.
-
-
-
-Unlike typical pattern-matching functions,
-match
works only in anchored mode;
-that is, it tries to match the pattern with a prefix of
-the given subject string (at position init
),
-not with an arbitrary substring of the subject.
-So, if we want to find a pattern anywhere in a string,
-we must either write a loop in Lua or write a pattern that
-matches anywhere.
-This second approach is easy and quite efficient;
-see examples .
-
-
-
lpeg.type (value)
-
-If the given value is a pattern,
-returns the string "pattern"
.
-Otherwise returns nil.
-
-
-
lpeg.version ()
-
-Returns a string with the running version of LPeg.
-
-
-
lpeg.setmaxstack (max)
-
-Sets a limit for the size of the backtrack stack used by LPeg to
-track calls and choices.
-(The default limit is 400.)
-Most well-written patterns need little backtrack levels and
-therefore you seldom need to change this limit;
-before changing it you should try to rewrite your
-pattern to avoid the need for extra space.
-Nevertheless, a few useful patterns may overflow.
-Also, with recursive grammars,
-subjects with deep recursion may also need larger limits.
-
-
-
-
-
-
-The following operations build patterns.
-All operations that expect a pattern as an argument
-may receive also strings, tables, numbers, booleans, or functions,
-which are translated to patterns according to
-the rules of function lpeg.P
.
-
-
-
-
-
lpeg.P (value)
-
-Converts the given value into a proper pattern,
-according to the following rules:
-
-
-
-
-If the argument is a pattern,
-it is returned unmodified.
-
-
-
-If the argument is a string,
-it is translated to a pattern that matches the string literally.
-
-
-
-If the argument is a non-negative number n ,
-the result is a pattern that matches exactly n characters.
-
-
-
-If the argument is a negative number -n ,
-the result is a pattern that
-succeeds only if the input string has less than n characters left:
-lpeg.P(-n)
-is equivalent to -lpeg.P(n)
-(see the unary minus operation ).
-
-
-
-If the argument is a boolean,
-the result is a pattern that always succeeds or always fails
-(according to the boolean value),
-without consuming any input.
-
-
-
-If the argument is a table,
-it is interpreted as a grammar
-(see Grammars ).
-
-
-
-If the argument is a function,
-returns a pattern equivalent to a
-match-time capture over the empty string.
-
-
-
-
-
-
lpeg.B(patt)
-
-Returns a pattern that
-matches only if the input string at the current position
-is preceded by patt
.
-Pattern patt
must match only strings
-with some fixed length,
-and it cannot contain captures.
-
-
-
-Like the and predicate ,
-this pattern never consumes any input,
-independently of success or failure.
-
-
-
-
lpeg.R ({range})
-
-Returns a pattern that matches any single character
-belonging to one of the given ranges .
-Each range
is a string xy of length 2,
-representing all characters with code
-between the codes of x and y
-(both inclusive).
-
-
-
-As an example, the pattern
-lpeg.R("09")
matches any digit,
-and lpeg.R("az", "AZ")
matches any ASCII letter.
-
-
-
-
lpeg.S (string)
-
-Returns a pattern that matches any single character that
-appears in the given string.
-(The S
stands for Set .)
-
-
-
-As an example, the pattern
-lpeg.S("+-*/")
matches any arithmetic operator.
-
-
-
-Note that, if s
is a character
-(that is, a string of length 1),
-then lpeg.P(s)
is equivalent to lpeg.S(s)
-which is equivalent to lpeg.R(s..s)
.
-Note also that both lpeg.S("")
and lpeg.R()
-are patterns that always fail.
-
-
-
-
lpeg.V (v)
-
-This operation creates a non-terminal (a variable )
-for a grammar.
-The created non-terminal refers to the rule indexed by v
-in the enclosing grammar.
-(See Grammars for details.)
-
-
-
-
lpeg.locale ([table])
-
-Returns a table with patterns for matching some character classes
-according to the current locale.
-The table has fields named
-alnum
,
-alpha
,
-cntrl
,
-digit
,
-graph
,
-lower
,
-print
,
-punct
,
-space
,
-upper
, and
-xdigit
,
-each one containing a correspondent pattern.
-Each pattern matches any single character that belongs to its class.
-
-
-
-If called with an argument table
,
-then it creates those fields inside the given table and
-returns that table.
-
-
-
-
#patt
-
-Returns a pattern that
-matches only if the input string matches patt
,
-but without consuming any input,
-independently of success or failure.
-(This pattern is called an and predicate
-and it is equivalent to
-&patt in the original PEG notation.)
-
-
-
-
-This pattern never produces any capture.
-
-
-
-
-patt
-
-Returns a pattern that
-matches only if the input string does not match patt
.
-It does not consume any input,
-independently of success or failure.
-(This pattern is equivalent to
-!patt in the original PEG notation.)
-
-
-
-As an example, the pattern
--lpeg.P(1)
matches only the end of string.
-
-
-
-This pattern never produces any captures,
-because either patt
fails
-or -patt
fails.
-(A failing pattern never produces captures.)
-
-
-
-
patt1 + patt2
-
-Returns a pattern equivalent to an ordered choice
-of patt1
and patt2
.
-(This is denoted by patt1 / patt2 in the original PEG notation,
-not to be confused with the /
operation in LPeg.)
-It matches either patt1
or patt2
,
-with no backtracking once one of them succeeds.
-The identity element for this operation is the pattern
-lpeg.P(false)
,
-which always fails.
-
-
-
-If both patt1
and patt2
are
-character sets,
-this operation is equivalent to set union.
-
-
-lower = lpeg.R("az")
-upper = lpeg.R("AZ")
-letter = lower + upper
-
-
-
-
patt1 - patt2
-
-Returns a pattern equivalent to !patt2 patt1 .
-This pattern asserts that the input does not match
-patt2
and then matches patt1
.
-
-
-
-When successful,
-this pattern produces all captures from patt1
.
-It never produces any capture from patt2
-(as either patt2
fails or
-patt1 - patt2
fails).
-
-
-
-If both patt1
and patt2
are
-character sets,
-this operation is equivalent to set difference.
-Note that -patt
is equivalent to "" - patt
-(or 0 - patt
).
-If patt
is a character set,
-1 - patt
is its complement.
-
-
-
-
patt1 * patt2
-
-Returns a pattern that matches patt1
-and then matches patt2
,
-starting where patt1
finished.
-The identity element for this operation is the
-pattern lpeg.P(true)
,
-which always succeeds.
-
-
-
-(LPeg uses the *
operator
-[instead of the more obvious ..
]
-both because it has
-the right priority and because in formal languages it is
-common to use a dot for denoting concatenation.)
-
-
-
-
patt^n
-
-If n
is nonnegative,
-this pattern is
-equivalent to pattn patt* :
-It matches n
or more occurrences of patt
.
-
-
-
-Otherwise, when n
is negative,
-this pattern is equivalent to (patt?)-n :
-It matches at most |n|
-occurrences of patt
.
-
-
-
-In particular, patt^0
is equivalent to patt* ,
-patt^1
is equivalent to patt+ ,
-and patt^-1
is equivalent to patt?
-in the original PEG notation.
-
-
-
-In all cases,
-the resulting pattern is greedy with no backtracking
-(also called a possessive repetition).
-That is, it matches only the longest possible sequence
-of matches for patt
.
-
-
-
-
-
-
-
-With the use of Lua variables,
-it is possible to define patterns incrementally,
-with each new pattern using previously defined ones.
-However, this technique does not allow the definition of
-recursive patterns.
-For recursive patterns,
-we need real grammars.
-
-
-
-LPeg represents grammars with tables,
-where each entry is a rule.
-
-
-
-The call lpeg.V(v)
-creates a pattern that represents the nonterminal
-(or variable ) with index v
in a grammar.
-Because the grammar still does not exist when
-this function is evaluated,
-the result is an open reference to the respective rule.
-
-
-
-A table is fixed when it is converted to a pattern
-(either by calling lpeg.P
or by using it wherein a
-pattern is expected).
-Then every open reference created by lpeg.V(v)
-is corrected to refer to the rule indexed by v
in the table.
-
-
-
-When a table is fixed,
-the result is a pattern that matches its initial rule .
-The entry with index 1 in the table defines its initial rule.
-If that entry is a string,
-it is assumed to be the name of the initial rule.
-Otherwise, LPeg assumes that the entry 1 itself is the initial rule.
-
-
-
-As an example,
-the following grammar matches strings of a's and b's that
-have the same number of a's and b's:
-
-
-equalcount = lpeg.P{
- "S"; -- initial rule name
- S = "a" * lpeg.V"B" + "b" * lpeg.V"A" + "",
- A = "a" * lpeg.V"S" + "b" * lpeg.V"A" * lpeg.V"A",
- B = "b" * lpeg.V"S" + "a" * lpeg.V"B" * lpeg.V"B",
-} * -1
-
-
-It is equivalent to the following grammar in standard PEG notation:
-
-
- S <- 'a' B / 'b' A / ''
- A <- 'a' S / 'b' A A
- B <- 'b' S / 'a' B B
-
-
-
-
-
-
-A capture is a pattern that produces values
-(the so called semantic information )
-according to what it matches.
-LPeg offers several kinds of captures,
-which produces values based on matches and combine these values to
-produce new values.
-Each capture may produce zero or more values.
-
-
-
-The following table summarizes the basic captures:
-
-
-Operation What it Produces
-lpeg.C(patt)
- the match for patt
plus all captures
- made by patt
-lpeg.Carg(n)
- the value of the nth extra argument to
- lpeg.match
(matches the empty string)
-lpeg.Cb(name)
- the values produced by the previous
- group capture named name
- (matches the empty string)
-lpeg.Cc(values)
- the given values (matches the empty string)
-lpeg.Cf(patt, func)
- a folding of the captures from patt
-lpeg.Cg(patt [, name])
- the values produced by patt
,
- optionally tagged with name
-lpeg.Cp()
- the current position (matches the empty string)
-lpeg.Cs(patt)
- the match for patt
- with the values from nested captures replacing their matches
-lpeg.Ct(patt)
- a table with all captures from patt
-patt / string
- string
, with some marks replaced by captures
- of patt
-patt / number
- the n-th value captured by patt
,
-or no value when number
is zero.
-patt / table
- table[c]
, where c
is the (first)
- capture of patt
-patt / function
- the returns of function
applied to the captures
- of patt
-lpeg.Cmt(patt, function)
- the returns of function
applied to the captures
- of patt
; the application is done at match time
-
-
-
-A capture pattern produces its values only when it succeeds.
-For instance,
-the pattern lpeg.C(lpeg.P"a"^-1)
-produces the empty string when there is no "a"
-(because the pattern "a"?
succeeds),
-while the pattern lpeg.C("a")^-1
-does not produce any value when there is no "a"
-(because the pattern "a"
fails).
-A pattern inside a loop or inside a recursive structure
-produces values for each match.
-
-
-
-Usually,
-LPeg does not specify when (and if) it evaluates its captures.
-(As an example,
-consider the pattern lpeg.P"a" / func / 0
.
-Because the "division" by 0 instructs LPeg to throw away the
-results from the pattern,
-LPeg may or may not call func
.)
-Therefore, captures should avoid side effects.
-Moreover,
-most captures cannot affect the way a pattern matches a subject.
-The only exception to this rule is the
-so-called match-time capture .
-When a match-time capture matches,
-it forces the immediate evaluation of all its nested captures
-and then calls its corresponding function,
-which defines whether the match succeeds and also
-what values are produced.
-
-
-
lpeg.C (patt)
-
-Creates a simple capture ,
-which captures the substring of the subject that matches patt
.
-The captured value is a string.
-If patt
has other captures,
-their values are returned after this one.
-
-
-
-
lpeg.Carg (n)
-
-Creates an argument capture .
-This pattern matches the empty string and
-produces the value given as the nth extra
-argument given in the call to lpeg.match
.
-
-
-
-
lpeg.Cb (name)
-
-Creates a back capture .
-This pattern matches the empty string and
-produces the values produced by the most recent
-group capture named name
-(where name
can be any Lua value).
-
-
-
-Most recent means the last
-complete
-outermost
-group capture with the given name.
-A Complete capture means that the entire pattern
-corresponding to the capture has matched.
-An Outermost capture means that the capture is not inside
-another complete capture.
-
-
-
-In the same way that LPeg does not specify when it evaluates captures,
-it does not specify whether it reuses
-values previously produced by the group
-or re-evaluates them.
-
-
-
lpeg.Cc ([value, ...])
-
-Creates a constant capture .
-This pattern matches the empty string and
-produces all given values as its captured values.
-
-
-
-
lpeg.Cf (patt, func)
-
-Creates a fold capture .
-If patt
produces a list of captures
-C1 C2 ... Cn ,
-this capture will produce the value
-func(...func(func(C1 , C2 ), C3 )...,
- Cn ) ,
-that is, it will fold
-(or accumulate , or reduce )
-the captures from patt
using function func
.
-
-
-
-This capture assumes that patt
should produce
-at least one capture with at least one value (of any type),
-which becomes the initial value of an accumulator .
-(If you need a specific initial value,
-you may prefix a constant capture to patt
.)
-For each subsequent capture,
-LPeg calls func
-with this accumulator as the first argument and all values produced
-by the capture as extra arguments;
-the first result from this call
-becomes the new value for the accumulator.
-The final value of the accumulator becomes the captured value.
-
-
-
-As an example,
-the following pattern matches a list of numbers separated
-by commas and returns their addition:
-
-
--- matches a numeral and captures its numerical value
-number = lpeg.R"09"^1 / tonumber
-
--- matches a list of numbers, capturing their values
-list = number * ("," * number)^0
-
--- auxiliary function to add two numbers
-function add (acc, newvalue) return acc + newvalue end
-
--- folds the list of numbers adding them
-sum = lpeg.Cf(list, add)
-
--- example of use
-print(sum:match("10,30,43")) --> 83
-
-
-
-
lpeg.Cg (patt [, name])
-
-Creates a group capture .
-It groups all values returned by patt
-into a single capture.
-The group may be anonymous (if no name is given)
-or named with the given name
-(which can be any non-nil Lua value).
-
-
-
-An anonymous group serves to join values from several captures into
-a single capture.
-A named group has a different behavior.
-In most situations, a named group returns no values at all.
-Its values are only relevant for a following
-back capture or when used
-inside a table capture .
-
-
-
-
lpeg.Cp ()
-
-Creates a position capture .
-It matches the empty string and
-captures the position in the subject where the match occurs.
-The captured value is a number.
-
-
-
-
lpeg.Cs (patt)
-
-Creates a substitution capture ,
-which captures the substring of the subject that matches patt
,
-with substitutions .
-For any capture inside patt
with a value,
-the substring that matched the capture is replaced by the capture value
-(which should be a string).
-The final captured value is the string resulting from
-all replacements.
-
-
-
-
lpeg.Ct (patt)
-
-Creates a table capture .
-This capture returns a table with all values from all anonymous captures
-made by patt
inside this table in successive integer keys,
-starting at 1.
-Moreover,
-for each named capture group created by patt
,
-the first value of the group is put into the table
-with the group name as its key.
-The captured value is only the table.
-
-
-
-
patt / string
-
-Creates a string capture .
-It creates a capture string based on string
.
-The captured value is a copy of string
,
-except that the character %
works as an escape character:
-any sequence in string
of the form %n
,
-with n between 1 and 9,
-stands for the match of the n -th capture in patt
.
-The sequence %0
stands for the whole match.
-The sequence %%
stands for a single %
.
-
-
-
-
patt / number
-
-Creates a numbered capture .
-For a non-zero number,
-the captured value is the n-th value
-captured by patt
.
-When number
is zero,
-there are no captured values.
-
-
-
-
patt / table
-
-Creates a query capture .
-It indexes the given table using as key the first value captured by
-patt
,
-or the whole match if patt
produced no value.
-The value at that index is the final value of the capture.
-If the table does not have that key,
-there is no captured value.
-
-
-
-
patt / function
-
-Creates a function capture .
-It calls the given function passing all captures made by
-patt
as arguments,
-or the whole match if patt
made no capture.
-The values returned by the function
-are the final values of the capture.
-In particular,
-if function
returns no value,
-there is no captured value.
-
-
-
-
lpeg.Cmt(patt, function)
-
-Creates a match-time capture .
-Unlike all other captures,
-this one is evaluated immediately when a match occurs
-(even if it is part of a larger pattern that fails later).
-It forces the immediate evaluation of all its nested captures
-and then calls function
.
-
-
-
-The given function gets as arguments the entire subject,
-the current position (after the match of patt
),
-plus any capture values produced by patt
.
-
-
-
-The first value returned by function
-defines how the match happens.
-If the call returns a number,
-the match succeeds
-and the returned number becomes the new current position.
-(Assuming a subject s and current position i ,
-the returned number must be in the range [i, len(s) + 1] .)
-If the call returns true ,
-the match succeeds without consuming any input.
-(So, to return true is equivalent to return i .)
-If the call returns false , nil , or no value,
-the match fails.
-
-
-
-Any extra values returned by the function become the
-values produced by the capture.
-
-
-
-
-
-
-
-
Using a Pattern
-
-This example shows a very simple but complete program
-that builds and uses a pattern:
-
-
-local lpeg = require "lpeg"
-
--- matches a word followed by end-of-string
-p = lpeg.R"az"^1 * -1
-
-print(p:match("hello")) --> 6
-print(lpeg.match(p, "hello")) --> 6
-print(p:match("1 hello")) --> nil
-
-
-The pattern is simply a sequence of one or more lower-case letters
-followed by the end of string (-1).
-The program calls match
both as a method
-and as a function.
-In both sucessful cases,
-the match returns
-the index of the first character after the match,
-which is the string length plus one.
-
-
-
-
Name-value lists
-
-This example parses a list of name-value pairs and returns a table
-with those pairs:
-
-
-lpeg.locale(lpeg) -- adds locale entries into 'lpeg' table
-
-local space = lpeg.space^0
-local name = lpeg.C(lpeg.alpha^1) * space
-local sep = lpeg.S(",;") * space
-local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
-local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
-t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
-
-
-Each pair has the format name = name
followed by
-an optional separator (a comma or a semicolon).
-The pair
pattern encloses the pair in a group pattern,
-so that the names become the values of a single capture.
-The list
pattern then folds these captures.
-It starts with an empty table,
-created by a table capture matching an empty string;
-then for each capture (a pair of names) it applies rawset
-over the accumulator (the table) and the capture values (the pair of names).
-rawset
returns the table itself,
-so the accumulator is always the table.
-
-
-
Splitting a string
-
-The following code builds a pattern that
-splits a string using a given pattern
-sep
as a separator:
-
-
-function split (s, sep)
- sep = lpeg.P(sep)
- local elem = lpeg.C((1 - sep)^0)
- local p = elem * (sep * elem)^0
- return lpeg.match(p, s)
-end
-
-
-First the function ensures that sep
is a proper pattern.
-The pattern elem
is a repetition of zero of more
-arbitrary characters as long as there is not a match against
-the separator.
-It also captures its match.
-The pattern p
matches a list of elements separated
-by sep
.
-
-
-
-If the split results in too many values,
-it may overflow the maximum number of values
-that can be returned by a Lua function.
-In this case,
-we can collect these values in a table:
-
-
-function split (s, sep)
- sep = lpeg.P(sep)
- local elem = lpeg.C((1 - sep)^0)
- local p = lpeg.Ct(elem * (sep * elem)^0) -- make a table capture
- return lpeg.match(p, s)
-end
-
-
-
-
Searching for a pattern
-
-The primitive match
works only in anchored mode.
-If we want to find a pattern anywhere in a string,
-we must write a pattern that matches anywhere.
-
-
-
-Because patterns are composable,
-we can write a function that,
-given any arbitrary pattern p
,
-returns a new pattern that searches for p
-anywhere in a string.
-There are several ways to do the search.
-One way is like this:
-
-
-function anywhere (p)
- return lpeg.P{ p + 1 * lpeg.V(1) }
-end
-
-
-This grammar has a straight reading:
-it matches p
or skips one character and tries again.
-
-
-
-If we want to know where the pattern is in the string
-(instead of knowing only that it is there somewhere),
-we can add position captures to the pattern:
-
-
-local I = lpeg.Cp()
-function anywhere (p)
- return lpeg.P{ I * p * I + 1 * lpeg.V(1) }
-end
-
-print(anywhere("world"):match("hello world!")) -> 7 12
-
-
-
-Another option for the search is like this:
-
-
-local I = lpeg.Cp()
-function anywhere (p)
- return (1 - lpeg.P(p))^0 * I * p * I
-end
-
-
-Again the pattern has a straight reading:
-it skips as many characters as possible while not matching p
,
-and then matches p
(plus appropriate captures).
-
-
-
-If we want to look for a pattern only at word boundaries,
-we can use the following transformer:
-
-
-
-local t = lpeg.locale()
-
-function atwordboundary (p)
- return lpeg.P{
- [1] = p + t.alpha^0 * (1 - t.alpha)^1 * lpeg.V(1)
- }
-end
-
-
-
-
Balanced parentheses
-
-The following pattern matches only strings with balanced parentheses:
-
-
-b = lpeg.P{ "(" * ((1 - lpeg.S"()") + lpeg.V(1))^0 * ")" }
-
-
-Reading the first (and only) rule of the given grammar,
-we have that a balanced string is
-an open parenthesis,
-followed by zero or more repetitions of either
-a non-parenthesis character or
-a balanced string (lpeg.V(1)
),
-followed by a closing parenthesis.
-
-
-
-
Global substitution
-
-The next example does a job somewhat similar to string.gsub
.
-It receives a pattern and a replacement value,
-and substitutes the replacement value for all occurrences of the pattern
-in a given string:
-
-
-function gsub (s, patt, repl)
- patt = lpeg.P(patt)
- patt = lpeg.Cs((patt / repl + 1)^0)
- return lpeg.match(patt, s)
-end
-
-
-As in string.gsub
,
-the replacement value can be a string,
-a function, or a table.
-
-
-
-
Comma-Separated Values (CSV)
-
-This example breaks a string into comma-separated values,
-returning all fields:
-
-
-local field = '"' * lpeg.Cs(((lpeg.P(1) - '"') + lpeg.P'""' / '"')^0) * '"' +
- lpeg.C((1 - lpeg.S',\n"')^0)
-
-local record = field * (',' * field)^0 * (lpeg.P'\n' + -1)
-
-function csv (s)
- return lpeg.match(record, s)
-end
-
-
-A field is either a quoted field
-(which may contain any character except an individual quote,
-which may be written as two quotes that are replaced by one)
-or an unquoted field
-(which cannot contain commas, newlines, or quotes).
-A record is a list of fields separated by commas,
-ending with a newline or the string end (-1).
-
-
-
-As it is,
-the previous pattern returns each field as a separated result.
-If we add a table capture in the definition of record
,
-the pattern will return instead a single table
-containing all fields:
-
-
-local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
-
-
-
-
UTF-8 and Latin 1
-
-It is not difficult to use LPeg to convert a string from
-UTF-8 encoding to Latin 1 (ISO 8859-1):
-
-
-
--- convert a two-byte UTF-8 sequence to a Latin 1 character
-local function f2 (s)
- local c1, c2 = string.byte(s, 1, 2)
- return string.char(c1 * 64 + c2 - 12416)
-end
-
-local utf8 = lpeg.R("\0\127")
- + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
-
-local decode_pattern = lpeg.Cs(utf8^0) * -1
-
-
-In this code,
-the definition of UTF-8 is already restricted to the
-Latin 1 range (from 0 to 255).
-Any encoding outside this range (as well as any invalid encoding)
-will not match that pattern.
-
-
-
-As the definition of decode_pattern
demands that
-the pattern matches the whole input (because of the -1 at its end),
-any invalid string will simply fail to match,
-without any useful information about the problem.
-We can improve this situation redefining decode_pattern
-as follows:
-
-
-local function er (_, i) error("invalid encoding at position " .. i) end
-
-local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
-
-
-Now, if the pattern utf8^0
stops
-before the end of the string,
-an appropriate error function is called.
-
-
-
-
UTF-8 and Unicode
-
-We can extend the previous patterns to handle all Unicode code points.
-Of course,
-we cannot translate them to Latin 1 or any other one-byte encoding.
-Instead, our translation results in a array with the code points
-represented as numbers.
-The full code is here:
-
-
--- decode a two-byte UTF-8 sequence
-local function f2 (s)
- local c1, c2 = string.byte(s, 1, 2)
- return c1 * 64 + c2 - 12416
-end
-
--- decode a three-byte UTF-8 sequence
-local function f3 (s)
- local c1, c2, c3 = string.byte(s, 1, 3)
- return (c1 * 64 + c2) * 64 + c3 - 925824
-end
-
--- decode a four-byte UTF-8 sequence
-local function f4 (s)
- local c1, c2, c3, c4 = string.byte(s, 1, 4)
- return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
-end
-
-local cont = lpeg.R("\128\191") -- continuation byte
-
-local utf8 = lpeg.R("\0\127") / string.byte
- + lpeg.R("\194\223") * cont / f2
- + lpeg.R("\224\239") * cont * cont / f3
- + lpeg.R("\240\244") * cont * cont * cont / f4
-
-local decode_pattern = lpeg.Ct(utf8^0) * -1
-
-
-
-
Lua's long strings
-
-A long string in Lua starts with the pattern [=*[
-and ends at the first occurrence of ]=*]
with
-exactly the same number of equal signs.
-If the opening brackets are followed by a newline,
-this newline is discarded
-(that is, it is not part of the string).
-
-
-
-To match a long string in Lua,
-the pattern must capture the first repetition of equal signs and then,
-whenever it finds a candidate for closing the string,
-check whether it has the same number of equal signs.
-
-
-
-equals = lpeg.P"="^0
-open = "[" * lpeg.Cg(equals, "init") * "[" * lpeg.P"\n"^-1
-close = "]" * lpeg.C(equals) * "]"
-closeeq = lpeg.Cmt(close * lpeg.Cb("init"), function (s, i, a, b) return a == b end)
-string = open * lpeg.C((lpeg.P(1) - closeeq)^0) * close / 1
-
-
-
-The open
pattern matches [=*[
,
-capturing the repetitions of equal signs in a group named init
;
-it also discharges an optional newline, if present.
-The close
pattern matches ]=*]
,
-also capturing the repetitions of equal signs.
-The closeeq
pattern first matches close
;
-then it uses a back capture to recover the capture made
-by the previous open
,
-which is named init
;
-finally it uses a match-time capture to check
-whether both captures are equal.
-The string
pattern starts with an open
,
-then it goes as far as possible until matching closeeq
,
-and then matches the final close
.
-The final numbered capture simply discards
-the capture made by close
.
-
-
-
-
Arithmetic expressions
-
-This example is a complete parser and evaluator for simple
-arithmetic expressions.
-We write it in two styles.
-The first approach first builds a syntax tree and then
-traverses this tree to compute the expression value:
-
-
--- Lexical Elements
-local Space = lpeg.S(" \n\t")^0
-local Number = lpeg.C(lpeg.P"-"^-1 * lpeg.R("09")^1) * Space
-local TermOp = lpeg.C(lpeg.S("+-")) * Space
-local FactorOp = lpeg.C(lpeg.S("*/")) * Space
-local Open = "(" * Space
-local Close = ")" * Space
-
--- Grammar
-local Exp, Term, Factor = lpeg.V"Exp", lpeg.V"Term", lpeg.V"Factor"
-G = lpeg.P{ Exp,
- Exp = lpeg.Ct(Term * (TermOp * Term)^0);
- Term = lpeg.Ct(Factor * (FactorOp * Factor)^0);
- Factor = Number + Open * Exp * Close;
-}
-
-G = Space * G * -1
-
--- Evaluator
-function eval (x)
- if type(x) == "string" then
- return tonumber(x)
- else
- local op1 = eval(x[1])
- for i = 2, #x, 2 do
- local op = x[i]
- local op2 = eval(x[i + 1])
- if (op == "+") then op1 = op1 + op2
- elseif (op == "-") then op1 = op1 - op2
- elseif (op == "*") then op1 = op1 * op2
- elseif (op == "/") then op1 = op1 / op2
- end
- end
- return op1
- end
-end
-
--- Parser/Evaluator
-function evalExp (s)
- local t = lpeg.match(G, s)
- if not t then error("syntax error", 2) end
- return eval(t)
-end
-
--- small example
-print(evalExp"3 + 5*9 / (1+1) - 12") --> 13.5
-
-
-
-The second style computes the expression value on the fly,
-without building the syntax tree.
-The following grammar takes this approach.
-(It assumes the same lexical elements as before.)
-
-
--- Auxiliary function
-function eval (v1, op, v2)
- if (op == "+") then return v1 + v2
- elseif (op == "-") then return v1 - v2
- elseif (op == "*") then return v1 * v2
- elseif (op == "/") then return v1 / v2
- end
-end
-
--- Grammar
-local V = lpeg.V
-G = lpeg.P{ "Exp",
- Exp = lpeg.Cf(V"Term" * lpeg.Cg(TermOp * V"Term")^0, eval);
- Term = lpeg.Cf(V"Factor" * lpeg.Cg(FactorOp * V"Factor")^0, eval);
- Factor = Number / tonumber + Open * V"Exp" * Close;
-}
-
--- small example
-print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5
-
-
-Note the use of the fold (accumulator) capture.
-To compute the value of an expression,
-the accumulator starts with the value of the first term,
-and then applies eval
over
-the accumulator, the operator,
-and the new term for each repetition.
-
-
-
-
-
Download
-
-
LPeg
-source code .
-
-
-
-
-
-Copyright © 2007-2017 Lua.org, PUC-Rio.
-
-
-Permission is hereby granted, free of charge,
-to any person obtaining a copy of this software and
-associated documentation files (the "Software"),
-to deal in the Software without restriction,
-including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense,
-and/or sell copies of the Software,
-and to permit persons to whom the Software is
-furnished to do so,
-subject to the following conditions:
-
-
-
-The above copyright notice and this permission notice
-shall be included in all copies or substantial portions of the Software.
-
-
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED,
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-
-
-
-
-
-
-
-$Id: lpeg.html,v 1.77 2017/01/13 13:40:05 roberto Exp $
-
-
-
-
-
-
-
diff --git a/src/ext/lpeg/lpprint.c b/src/ext/lpeg/lpprint.c
deleted file mode 100644
index f7be408..0000000
--- a/src/ext/lpeg/lpprint.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
-** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $
-** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-*/
-
-#include
-#include
-#include
-
-
-#include "lptypes.h"
-#include "lpprint.h"
-#include "lpcode.h"
-
-
-#if defined(LPEG_DEBUG)
-
-/*
-** {======================================================
-** Printing patterns (for debugging)
-** =======================================================
-*/
-
-
-void printcharset (const byte *st) {
- int i;
- printf("[");
- for (i = 0; i <= UCHAR_MAX; i++) {
- int first = i;
- while (testchar(st, i) && i <= UCHAR_MAX) i++;
- if (i - 1 == first) /* unary range? */
- printf("(%02x)", first);
- else if (i - 1 > first) /* non-empty range? */
- printf("(%02x-%02x)", first, i - 1);
- }
- printf("]");
-}
-
-
-static const char *capkind (int kind) {
- const char *const modes[] = {
- "close", "position", "constant", "backref",
- "argument", "simple", "table", "function",
- "query", "string", "num", "substitution", "fold",
- "runtime", "group"};
- return modes[kind];
-}
-
-
-static void printjmp (const Instruction *op, const Instruction *p) {
- printf("-> %d", (int)(p + (p + 1)->offset - op));
-}
-
-
-void printinst (const Instruction *op, const Instruction *p) {
- const char *const names[] = {
- "any", "char", "set",
- "testany", "testchar", "testset",
- "span", "behind",
- "ret", "end",
- "choice", "jmp", "call", "open_call",
- "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
- "fullcapture", "opencapture", "closecapture", "closeruntime"
- };
- printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
- switch ((Opcode)p->i.code) {
- case IChar: {
- printf("'%c'", p->i.aux);
- break;
- }
- case ITestChar: {
- printf("'%c'", p->i.aux); printjmp(op, p);
- break;
- }
- case IFullCapture: {
- printf("%s (size = %d) (idx = %d)",
- capkind(getkind(p)), getoff(p), p->i.key);
- break;
- }
- case IOpenCapture: {
- printf("%s (idx = %d)", capkind(getkind(p)), p->i.key);
- break;
- }
- case ISet: {
- printcharset((p+1)->buff);
- break;
- }
- case ITestSet: {
- printcharset((p+2)->buff); printjmp(op, p);
- break;
- }
- case ISpan: {
- printcharset((p+1)->buff);
- break;
- }
- case IOpenCall: {
- printf("-> %d", (p + 1)->offset);
- break;
- }
- case IBehind: {
- printf("%d", p->i.aux);
- break;
- }
- case IJmp: case ICall: case ICommit: case IChoice:
- case IPartialCommit: case IBackCommit: case ITestAny: {
- printjmp(op, p);
- break;
- }
- default: break;
- }
- printf("\n");
-}
-
-
-void printpatt (Instruction *p, int n) {
- Instruction *op = p;
- while (p < op + n) {
- printinst(op, p);
- p += sizei(p);
- }
-}
-
-
-#if defined(LPEG_DEBUG)
-static void printcap (Capture *cap) {
- printf("%s (idx: %d - size: %d) -> %p\n",
- capkind(cap->kind), cap->idx, cap->siz, cap->s);
-}
-
-
-void printcaplist (Capture *cap, Capture *limit) {
- printf(">======\n");
- for (; cap->s && (limit == NULL || cap < limit); cap++)
- printcap(cap);
- printf("=======\n");
-}
-#endif
-
-/* }====================================================== */
-
-
-/*
-** {======================================================
-** Printing trees (for debugging)
-** =======================================================
-*/
-
-static const char *tagnames[] = {
- "char", "set", "any",
- "true", "false",
- "rep",
- "seq", "choice",
- "not", "and",
- "call", "opencall", "rule", "grammar",
- "behind",
- "capture", "run-time"
-};
-
-
-void printtree (TTree *tree, int ident) {
- int i;
- for (i = 0; i < ident; i++) printf(" ");
- printf("%s", tagnames[tree->tag]);
- switch (tree->tag) {
- case TChar: {
- int c = tree->u.n;
- if (isprint(c))
- printf(" '%c'\n", c);
- else
- printf(" (%02X)\n", c);
- break;
- }
- case TSet: {
- printcharset(treebuffer(tree));
- printf("\n");
- break;
- }
- case TOpenCall: case TCall: {
- assert(sib2(tree)->tag == TRule);
- printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap);
- break;
- }
- case TBehind: {
- printf(" %d\n", tree->u.n);
- printtree(sib1(tree), ident + 2);
- break;
- }
- case TCapture: {
- printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
- printtree(sib1(tree), ident + 2);
- break;
- }
- case TRule: {
- printf(" n: %d key: %d\n", tree->cap, tree->key);
- printtree(sib1(tree), ident + 2);
- break; /* do not print next rule as a sibling */
- }
- case TGrammar: {
- TTree *rule = sib1(tree);
- printf(" %d\n", tree->u.n); /* number of rules */
- for (i = 0; i < tree->u.n; i++) {
- printtree(rule, ident + 2);
- rule = sib2(rule);
- }
- assert(rule->tag == TTrue); /* sentinel */
- break;
- }
- default: {
- int sibs = numsiblings[tree->tag];
- printf("\n");
- if (sibs >= 1) {
- printtree(sib1(tree), ident + 2);
- if (sibs >= 2)
- printtree(sib2(tree), ident + 2);
- }
- break;
- }
- }
-}
-
-
-void printktable (lua_State *L, int idx) {
- int n, i;
- lua_getuservalue(L, idx);
- if (lua_isnil(L, -1)) /* no ktable? */
- return;
- n = lua_rawlen(L, -1);
- printf("[");
- for (i = 1; i <= n; i++) {
- printf("%d = ", i);
- lua_rawgeti(L, -1, i);
- if (lua_isstring(L, -1))
- printf("%s ", lua_tostring(L, -1));
- else
- printf("%s ", lua_typename(L, lua_type(L, -1)));
- lua_pop(L, 1);
- }
- printf("]\n");
- /* leave ktable at the stack */
-}
-
-/* }====================================================== */
-
-#endif
diff --git a/src/ext/lpeg/lpprint.h b/src/ext/lpeg/lpprint.h
deleted file mode 100644
index 6329760..0000000
--- a/src/ext/lpeg/lpprint.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $
-*/
-
-
-#if !defined(lpprint_h)
-#define lpprint_h
-
-
-#include "lptree.h"
-#include "lpvm.h"
-
-
-#if defined(LPEG_DEBUG)
-
-void printpatt (Instruction *p, int n);
-void printtree (TTree *tree, int ident);
-void printktable (lua_State *L, int idx);
-void printcharset (const byte *st);
-void printcaplist (Capture *cap, Capture *limit);
-void printinst (const Instruction *op, const Instruction *p);
-
-#else
-
-#define printktable(L,idx) \
- luaL_error(L, "function only implemented in debug mode")
-#define printtree(tree,i) \
- luaL_error(L, "function only implemented in debug mode")
-#define printpatt(p,n) \
- luaL_error(L, "function only implemented in debug mode")
-
-#endif
-
-
-#endif
-
diff --git a/src/ext/lpeg/lptree.c b/src/ext/lpeg/lptree.c
deleted file mode 100644
index bda61b9..0000000
--- a/src/ext/lpeg/lptree.c
+++ /dev/null
@@ -1,1303 +0,0 @@
-/*
-** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $
-** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-*/
-
-#include
-#include
-#include
-
-
-#include "lua.h"
-#include "lauxlib.h"
-
-#include "lptypes.h"
-#include "lpcap.h"
-#include "lpcode.h"
-#include "lpprint.h"
-#include "lptree.h"
-
-
-/* number of siblings for each tree */
-const byte numsiblings[] = {
- 0, 0, 0, /* char, set, any */
- 0, 0, /* true, false */
- 1, /* rep */
- 2, 2, /* seq, choice */
- 1, 1, /* not, and */
- 0, 0, 2, 1, /* call, opencall, rule, grammar */
- 1, /* behind */
- 1, 1 /* capture, runtime capture */
-};
-
-
-static TTree *newgrammar (lua_State *L, int arg);
-
-
-/*
-** returns a reasonable name for value at index 'idx' on the stack
-*/
-static const char *val2str (lua_State *L, int idx) {
- const char *k = lua_tostring(L, idx);
- if (k != NULL)
- return lua_pushfstring(L, "%s", k);
- else
- return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx));
-}
-
-
-/*
-** Fix a TOpenCall into a TCall node, using table 'postable' to
-** translate a key to its rule address in the tree. Raises an
-** error if key does not exist.
-*/
-static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) {
- int n;
- lua_rawgeti(L, -1, t->key); /* get rule's name */
- lua_gettable(L, postable); /* query name in position table */
- n = lua_tonumber(L, -1); /* get (absolute) position */
- lua_pop(L, 1); /* remove position */
- if (n == 0) { /* no position? */
- lua_rawgeti(L, -1, t->key); /* get rule's name again */
- luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1));
- }
- t->tag = TCall;
- t->u.ps = n - (t - g); /* position relative to node */
- assert(sib2(t)->tag == TRule);
- sib2(t)->key = t->key; /* fix rule's key */
-}
-
-
-/*
-** Transform left associative constructions into right
-** associative ones, for sequence and choice; that is:
-** (t11 + t12) + t2 => t11 + (t12 + t2)
-** (t11 * t12) * t2 => t11 * (t12 * t2)
-** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
-*/
-static void correctassociativity (TTree *tree) {
- TTree *t1 = sib1(tree);
- assert(tree->tag == TChoice || tree->tag == TSeq);
- while (t1->tag == tree->tag) {
- int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */
- int n11size = t1->u.ps - 1;
- int n12size = n1size - n11size - 1;
- memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */
- tree->u.ps = n11size + 1;
- sib2(tree)->tag = tree->tag;
- sib2(tree)->u.ps = n12size + 1;
- }
-}
-
-
-/*
-** Make final adjustments in a tree. Fix open calls in tree 't',
-** making them refer to their respective rules or raising appropriate
-** errors (if not inside a grammar). Correct associativity of associative
-** constructions (making them right associative). Assume that tree's
-** ktable is at the top of the stack (for error messages).
-*/
-static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
- tailcall:
- switch (t->tag) {
- case TGrammar: /* subgrammars were already fixed */
- return;
- case TOpenCall: {
- if (g != NULL) /* inside a grammar? */
- fixonecall(L, postable, g, t);
- else { /* open call outside grammar */
- lua_rawgeti(L, -1, t->key);
- luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1));
- }
- break;
- }
- case TSeq: case TChoice:
- correctassociativity(t);
- break;
- }
- switch (numsiblings[t->tag]) {
- case 1: /* finalfix(L, postable, g, sib1(t)); */
- t = sib1(t); goto tailcall;
- case 2:
- finalfix(L, postable, g, sib1(t));
- t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */
- default: assert(numsiblings[t->tag] == 0); break;
- }
-}
-
-
-
-/*
-** {===================================================================
-** KTable manipulation
-**
-** - The ktable of a pattern 'p' can be shared by other patterns that
-** contain 'p' and no other constants. Because of this sharing, we
-** should not add elements to a 'ktable' unless it was freshly created
-** for the new pattern.
-**
-** - The maximum index in a ktable is USHRT_MAX, because trees and
-** patterns use unsigned shorts to store those indices.
-** ====================================================================
-*/
-
-/*
-** Create a new 'ktable' to the pattern at the top of the stack.
-*/
-static void newktable (lua_State *L, int n) {
- lua_createtable(L, n, 0); /* create a fresh table */
- lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */
-}
-
-
-/*
-** Add element 'idx' to 'ktable' of pattern at the top of the stack;
-** Return index of new element.
-** If new element is nil, does not add it to table (as it would be
-** useless) and returns 0, as ktable[0] is always nil.
-*/
-static int addtoktable (lua_State *L, int idx) {
- if (lua_isnil(L, idx)) /* nil value? */
- return 0;
- else {
- int n;
- lua_getuservalue(L, -1); /* get ktable from pattern */
- n = lua_rawlen(L, -1);
- if (n >= USHRT_MAX)
- luaL_error(L, "too many Lua values in pattern");
- lua_pushvalue(L, idx); /* element to be added */
- lua_rawseti(L, -2, ++n);
- lua_pop(L, 1); /* remove 'ktable' */
- return n;
- }
-}
-
-
-/*
-** Return the number of elements in the ktable at 'idx'.
-** In Lua 5.2/5.3, default "environment" for patterns is nil, not
-** a table. Treat it as an empty table. In Lua 5.1, assumes that
-** the environment has no numeric indices (len == 0)
-*/
-static int ktablelen (lua_State *L, int idx) {
- if (!lua_istable(L, idx)) return 0;
- else return lua_rawlen(L, idx);
-}
-
-
-/*
-** Concatentate the contents of table 'idx1' into table 'idx2'.
-** (Assume that both indices are negative.)
-** Return the original length of table 'idx2' (or 0, if no
-** element was added, as there is no need to correct any index).
-*/
-static int concattable (lua_State *L, int idx1, int idx2) {
- int i;
- int n1 = ktablelen(L, idx1);
- int n2 = ktablelen(L, idx2);
- if (n1 + n2 > USHRT_MAX)
- luaL_error(L, "too many Lua values in pattern");
- if (n1 == 0) return 0; /* nothing to correct */
- for (i = 1; i <= n1; i++) {
- lua_rawgeti(L, idx1, i);
- lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */
- }
- return n2;
-}
-
-
-/*
-** When joining 'ktables', constants from one of the subpatterns must
-** be renumbered; 'correctkeys' corrects their indices (adding 'n'
-** to each of them)
-*/
-static void correctkeys (TTree *tree, int n) {
- if (n == 0) return; /* no correction? */
- tailcall:
- switch (tree->tag) {
- case TOpenCall: case TCall: case TRunTime: case TRule: {
- if (tree->key > 0)
- tree->key += n;
- break;
- }
- case TCapture: {
- if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
- tree->key += n;
- break;
- }
- default: break;
- }
- switch (numsiblings[tree->tag]) {
- case 1: /* correctkeys(sib1(tree), n); */
- tree = sib1(tree); goto tailcall;
- case 2:
- correctkeys(sib1(tree), n);
- tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */
- default: assert(numsiblings[tree->tag] == 0); break;
- }
-}
-
-
-/*
-** Join the ktables from p1 and p2 the ktable for the new pattern at the
-** top of the stack, reusing them when possible.
-*/
-static void joinktables (lua_State *L, int p1, TTree *t2, int p2) {
- int n1, n2;
- lua_getuservalue(L, p1); /* get ktables */
- lua_getuservalue(L, p2);
- n1 = ktablelen(L, -2);
- n2 = ktablelen(L, -1);
- if (n1 == 0 && n2 == 0) /* are both tables empty? */
- lua_pop(L, 2); /* nothing to be done; pop tables */
- else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */
- lua_pop(L, 1); /* pop 2nd table */
- lua_setuservalue(L, -2); /* set 1st ktable into new pattern */
- }
- else if (n1 == 0) { /* first table is empty? */
- lua_setuservalue(L, -3); /* set 2nd table into new pattern */
- lua_pop(L, 1); /* pop 1st table */
- }
- else {
- lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */
- /* stack: new p; ktable p1; ktable p2; new ktable */
- concattable(L, -3, -1); /* from p1 into new ktable */
- concattable(L, -2, -1); /* from p2 into new ktable */
- lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */
- lua_pop(L, 2); /* pop other ktables */
- correctkeys(t2, n1); /* correction for indices from p2 */
- }
-}
-
-
-/*
-** copy 'ktable' of element 'idx' to new tree (on top of stack)
-*/
-static void copyktable (lua_State *L, int idx) {
- lua_getuservalue(L, idx);
- lua_setuservalue(L, -2);
-}
-
-
-/*
-** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable'
-** from tree at the top of the stack, and correct corresponding
-** tree.
-*/
-static void mergektable (lua_State *L, int idx, TTree *stree) {
- int n;
- lua_getuservalue(L, -1); /* get ktables */
- lua_getuservalue(L, idx);
- n = concattable(L, -1, -2);
- lua_pop(L, 2); /* remove both ktables */
- correctkeys(stree, n);
-}
-
-
-/*
-** Create a new 'ktable' to the pattern at the top of the stack, adding
-** all elements from pattern 'p' (if not 0) plus element 'idx' to it.
-** Return index of new element.
-*/
-static int addtonewktable (lua_State *L, int p, int idx) {
- newktable(L, 1);
- if (p)
- mergektable(L, p, NULL);
- return addtoktable(L, idx);
-}
-
-/* }====================================================== */
-
-
-/*
-** {======================================================
-** Tree generation
-** =======================================================
-*/
-
-/*
-** In 5.2, could use 'luaL_testudata'...
-*/
-static int testpattern (lua_State *L, int idx) {
- if (lua_touserdata(L, idx)) { /* value is a userdata? */
- if (lua_getmetatable(L, idx)) { /* does it have a metatable? */
- luaL_getmetatable(L, PATTERN_T);
- if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */
- lua_pop(L, 2); /* remove both metatables */
- return 1;
- }
- }
- }
- return 0;
-}
-
-
-static Pattern *getpattern (lua_State *L, int idx) {
- return (Pattern *)luaL_checkudata(L, idx, PATTERN_T);
-}
-
-
-static int getsize (lua_State *L, int idx) {
- return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
-}
-
-
-static TTree *gettree (lua_State *L, int idx, int *len) {
- Pattern *p = getpattern(L, idx);
- if (len)
- *len = getsize(L, idx);
- return p->tree;
-}
-
-
-/*
-** create a pattern. Set its uservalue (the 'ktable') equal to its
-** metatable. (It could be any empty sequence; the metatable is at
-** hand here, so we use it.)
-*/
-static TTree *newtree (lua_State *L, int len) {
- size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
- Pattern *p = (Pattern *)lua_newuserdata(L, size);
- luaL_getmetatable(L, PATTERN_T);
- lua_pushvalue(L, -1);
- lua_setuservalue(L, -3);
- lua_setmetatable(L, -2);
- p->code = NULL; p->codesize = 0;
- return p->tree;
-}
-
-
-static TTree *newleaf (lua_State *L, int tag) {
- TTree *tree = newtree(L, 1);
- tree->tag = tag;
- return tree;
-}
-
-
-static TTree *newcharset (lua_State *L) {
- TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1);
- tree->tag = TSet;
- loopset(i, treebuffer(tree)[i] = 0);
- return tree;
-}
-
-
-/*
-** add to tree a sequence where first sibling is 'sib' (with size
-** 'sibsize'); returns position for second sibling
-*/
-static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
- tree->tag = TSeq; tree->u.ps = sibsize + 1;
- memcpy(sib1(tree), sib, sibsize * sizeof(TTree));
- return sib2(tree);
-}
-
-
-/*
-** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
-** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
-** must build a sequence of sequence of sequence...)
-*/
-static void fillseq (TTree *tree, int tag, int n, const char *s) {
- int i;
- for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */
- tree->tag = TSeq; tree->u.ps = 2;
- sib1(tree)->tag = tag;
- sib1(tree)->u.n = s ? (byte)s[i] : 0;
- tree = sib2(tree);
- }
- tree->tag = tag; /* last one does not need TSeq */
- tree->u.n = s ? (byte)s[i] : 0;
-}
-
-
-/*
-** Numbers as patterns:
-** 0 == true (always match); n == TAny repeated 'n' times;
-** -n == not (TAny repeated 'n' times)
-*/
-static TTree *numtree (lua_State *L, int n) {
- if (n == 0)
- return newleaf(L, TTrue);
- else {
- TTree *tree, *nd;
- if (n > 0)
- tree = nd = newtree(L, 2 * n - 1);
- else { /* negative: code it as !(-n) */
- n = -n;
- tree = newtree(L, 2 * n);
- tree->tag = TNot;
- nd = sib1(tree);
- }
- fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */
- return tree;
- }
-}
-
-
-/*
-** Convert value at index 'idx' to a pattern
-*/
-static TTree *getpatt (lua_State *L, int idx, int *len) {
- TTree *tree;
- switch (lua_type(L, idx)) {
- case LUA_TSTRING: {
- size_t slen;
- const char *s = lua_tolstring(L, idx, &slen); /* get string */
- if (slen == 0) /* empty? */
- tree = newleaf(L, TTrue); /* always match */
- else {
- tree = newtree(L, 2 * (slen - 1) + 1);
- fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */
- }
- break;
- }
- case LUA_TNUMBER: {
- int n = lua_tointeger(L, idx);
- tree = numtree(L, n);
- break;
- }
- case LUA_TBOOLEAN: {
- tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse));
- break;
- }
- case LUA_TTABLE: {
- tree = newgrammar(L, idx);
- break;
- }
- case LUA_TFUNCTION: {
- tree = newtree(L, 2);
- tree->tag = TRunTime;
- tree->key = addtonewktable(L, 0, idx);
- sib1(tree)->tag = TTrue;
- break;
- }
- default: {
- return gettree(L, idx, len);
- }
- }
- lua_replace(L, idx); /* put new tree into 'idx' slot */
- if (len)
- *len = getsize(L, idx);
- return tree;
-}
-
-
-/*
-** create a new tree, whith a new root and one sibling.
-** Sibling must be on the Lua stack, at index 1.
-*/
-static TTree *newroot1sib (lua_State *L, int tag) {
- int s1;
- TTree *tree1 = getpatt(L, 1, &s1);
- TTree *tree = newtree(L, 1 + s1); /* create new tree */
- tree->tag = tag;
- memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
- copyktable(L, 1);
- return tree;
-}
-
-
-/*
-** create a new tree, whith a new root and 2 siblings.
-** Siblings must be on the Lua stack, first one at index 1.
-*/
-static TTree *newroot2sib (lua_State *L, int tag) {
- int s1, s2;
- TTree *tree1 = getpatt(L, 1, &s1);
- TTree *tree2 = getpatt(L, 2, &s2);
- TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */
- tree->tag = tag;
- tree->u.ps = 1 + s1;
- memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
- memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
- joinktables(L, 1, sib2(tree), 2);
- return tree;
-}
-
-
-static int lp_P (lua_State *L) {
- luaL_checkany(L, 1);
- getpatt(L, 1, NULL);
- lua_settop(L, 1);
- return 1;
-}
-
-
-/*
-** sequence operator; optimizations:
-** false x => false, x true => x, true x => x
-** (cannot do x . false => false because x may have runtime captures)
-*/
-static int lp_seq (lua_State *L) {
- TTree *tree1 = getpatt(L, 1, NULL);
- TTree *tree2 = getpatt(L, 2, NULL);
- if (tree1->tag == TFalse || tree2->tag == TTrue)
- lua_pushvalue(L, 1); /* false . x == false, x . true = x */
- else if (tree1->tag == TTrue)
- lua_pushvalue(L, 2); /* true . x = x */
- else
- newroot2sib(L, TSeq);
- return 1;
-}
-
-
-/*
-** choice operator; optimizations:
-** charset / charset => charset
-** true / x => true, x / false => x, false / x => x
-** (x / true is not equivalent to true)
-*/
-static int lp_choice (lua_State *L) {
- Charset st1, st2;
- TTree *t1 = getpatt(L, 1, NULL);
- TTree *t2 = getpatt(L, 2, NULL);
- if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
- TTree *t = newcharset(L);
- loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
- }
- else if (nofail(t1) || t2->tag == TFalse)
- lua_pushvalue(L, 1); /* true / x => true, x / false => x */
- else if (t1->tag == TFalse)
- lua_pushvalue(L, 2); /* false / x => x */
- else
- newroot2sib(L, TChoice);
- return 1;
-}
-
-
-/*
-** p^n
-*/
-static int lp_star (lua_State *L) {
- int size1;
- int n = (int)luaL_checkinteger(L, 2);
- TTree *tree1 = getpatt(L, 1, &size1);
- if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
- TTree *tree = newtree(L, (n + 1) * (size1 + 1));
- if (nullable(tree1))
- luaL_error(L, "loop body may accept empty string");
- while (n--) /* repeat 'n' times */
- tree = seqaux(tree, tree1, size1);
- tree->tag = TRep;
- memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
- }
- else { /* choice (seq tree1 ... choice tree1 true ...) true */
- TTree *tree;
- n = -n;
- /* size = (choice + seq + tree1 + true) * n, but the last has no seq */
- tree = newtree(L, n * (size1 + 3) - 1);
- for (; n > 1; n--) { /* repeat (n - 1) times */
- tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2;
- sib2(tree)->tag = TTrue;
- tree = sib1(tree);
- tree = seqaux(tree, tree1, size1);
- }
- tree->tag = TChoice; tree->u.ps = size1 + 1;
- sib2(tree)->tag = TTrue;
- memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
- }
- copyktable(L, 1);
- return 1;
-}
-
-
-/*
-** #p == &p
-*/
-static int lp_and (lua_State *L) {
- newroot1sib(L, TAnd);
- return 1;
-}
-
-
-/*
-** -p == !p
-*/
-static int lp_not (lua_State *L) {
- newroot1sib(L, TNot);
- return 1;
-}
-
-
-/*
-** [t1 - t2] == Seq (Not t2) t1
-** If t1 and t2 are charsets, make their difference.
-*/
-static int lp_sub (lua_State *L) {
- Charset st1, st2;
- int s1, s2;
- TTree *t1 = getpatt(L, 1, &s1);
- TTree *t2 = getpatt(L, 2, &s2);
- if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
- TTree *t = newcharset(L);
- loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]);
- }
- else {
- TTree *tree = newtree(L, 2 + s1 + s2);
- tree->tag = TSeq; /* sequence of... */
- tree->u.ps = 2 + s2;
- sib1(tree)->tag = TNot; /* ...not... */
- memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */
- memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */
- joinktables(L, 1, sib1(tree), 2);
- }
- return 1;
-}
-
-
-static int lp_set (lua_State *L) {
- size_t l;
- const char *s = luaL_checklstring(L, 1, &l);
- TTree *tree = newcharset(L);
- while (l--) {
- setchar(treebuffer(tree), (byte)(*s));
- s++;
- }
- return 1;
-}
-
-
-static int lp_range (lua_State *L) {
- int arg;
- int top = lua_gettop(L);
- TTree *tree = newcharset(L);
- for (arg = 1; arg <= top; arg++) {
- int c;
- size_t l;
- const char *r = luaL_checklstring(L, arg, &l);
- luaL_argcheck(L, l == 2, arg, "range must have two characters");
- for (c = (byte)r[0]; c <= (byte)r[1]; c++)
- setchar(treebuffer(tree), c);
- }
- return 1;
-}
-
-
-/*
-** Look-behind predicate
-*/
-static int lp_behind (lua_State *L) {
- TTree *tree;
- TTree *tree1 = getpatt(L, 1, NULL);
- int n = fixedlen(tree1);
- luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length");
- luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
- luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
- tree = newroot1sib(L, TBehind);
- tree->u.n = n;
- return 1;
-}
-
-
-/*
-** Create a non-terminal
-*/
-static int lp_V (lua_State *L) {
- TTree *tree = newleaf(L, TOpenCall);
- luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
- tree->key = addtonewktable(L, 0, 1);
- return 1;
-}
-
-
-/*
-** Create a tree for a non-empty capture, with a body and
-** optionally with an associated Lua value (at index 'labelidx' in the
-** stack)
-*/
-static int capture_aux (lua_State *L, int cap, int labelidx) {
- TTree *tree = newroot1sib(L, TCapture);
- tree->cap = cap;
- tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx);
- return 1;
-}
-
-
-/*
-** Fill a tree with an empty capture, using an empty (TTrue) sibling.
-*/
-static TTree *auxemptycap (TTree *tree, int cap) {
- tree->tag = TCapture;
- tree->cap = cap;
- sib1(tree)->tag = TTrue;
- return tree;
-}
-
-
-/*
-** Create a tree for an empty capture
-*/
-static TTree *newemptycap (lua_State *L, int cap) {
- return auxemptycap(newtree(L, 2), cap);
-}
-
-
-/*
-** Create a tree for an empty capture with an associated Lua value
-*/
-static TTree *newemptycapkey (lua_State *L, int cap, int idx) {
- TTree *tree = auxemptycap(newtree(L, 2), cap);
- tree->key = addtonewktable(L, 0, idx);
- return tree;
-}
-
-
-/*
-** Captures with syntax p / v
-** (function capture, query capture, string capture, or number capture)
-*/
-static int lp_divcapture (lua_State *L) {
- switch (lua_type(L, 2)) {
- case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
- case LUA_TTABLE: return capture_aux(L, Cquery, 2);
- case LUA_TSTRING: return capture_aux(L, Cstring, 2);
- case LUA_TNUMBER: {
- int n = lua_tointeger(L, 2);
- TTree *tree = newroot1sib(L, TCapture);
- luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number");
- tree->cap = Cnum;
- tree->key = n;
- return 1;
- }
- default: return luaL_argerror(L, 2, "invalid replacement value");
- }
-}
-
-
-static int lp_substcapture (lua_State *L) {
- return capture_aux(L, Csubst, 0);
-}
-
-
-static int lp_tablecapture (lua_State *L) {
- return capture_aux(L, Ctable, 0);
-}
-
-
-static int lp_groupcapture (lua_State *L) {
- if (lua_isnoneornil(L, 2))
- return capture_aux(L, Cgroup, 0);
- else
- return capture_aux(L, Cgroup, 2);
-}
-
-
-static int lp_foldcapture (lua_State *L) {
- luaL_checktype(L, 2, LUA_TFUNCTION);
- return capture_aux(L, Cfold, 2);
-}
-
-
-static int lp_simplecapture (lua_State *L) {
- return capture_aux(L, Csimple, 0);
-}
-
-
-static int lp_poscapture (lua_State *L) {
- newemptycap(L, Cposition);
- return 1;
-}
-
-
-static int lp_argcapture (lua_State *L) {
- int n = (int)luaL_checkinteger(L, 1);
- TTree *tree = newemptycap(L, Carg);
- tree->key = n;
- luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
- return 1;
-}
-
-
-static int lp_backref (lua_State *L) {
- luaL_checkany(L, 1);
- newemptycapkey(L, Cbackref, 1);
- return 1;
-}
-
-
-/*
-** Constant capture
-*/
-static int lp_constcapture (lua_State *L) {
- int i;
- int n = lua_gettop(L); /* number of values */
- if (n == 0) /* no values? */
- newleaf(L, TTrue); /* no capture */
- else if (n == 1)
- newemptycapkey(L, Cconst, 1); /* single constant capture */
- else { /* create a group capture with all values */
- TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
- newktable(L, n); /* create a 'ktable' for new tree */
- tree->tag = TCapture;
- tree->cap = Cgroup;
- tree->key = 0;
- tree = sib1(tree);
- for (i = 1; i <= n - 1; i++) {
- tree->tag = TSeq;
- tree->u.ps = 3; /* skip TCapture and its sibling */
- auxemptycap(sib1(tree), Cconst);
- sib1(tree)->key = addtoktable(L, i);
- tree = sib2(tree);
- }
- auxemptycap(tree, Cconst);
- tree->key = addtoktable(L, i);
- }
- return 1;
-}
-
-
-static int lp_matchtime (lua_State *L) {
- TTree *tree;
- luaL_checktype(L, 2, LUA_TFUNCTION);
- tree = newroot1sib(L, TRunTime);
- tree->key = addtonewktable(L, 1, 2);
- return 1;
-}
-
-/* }====================================================== */
-
-
-/*
-** {======================================================
-** Grammar - Tree generation
-** =======================================================
-*/
-
-/*
-** push on the stack the index and the pattern for the
-** initial rule of grammar at index 'arg' in the stack;
-** also add that index into position table.
-*/
-static void getfirstrule (lua_State *L, int arg, int postab) {
- lua_rawgeti(L, arg, 1); /* access first element */
- if (lua_isstring(L, -1)) { /* is it the name of initial rule? */
- lua_pushvalue(L, -1); /* duplicate it to use as key */
- lua_gettable(L, arg); /* get associated rule */
- }
- else {
- lua_pushinteger(L, 1); /* key for initial rule */
- lua_insert(L, -2); /* put it before rule */
- }
- if (!testpattern(L, -1)) { /* initial rule not a pattern? */
- if (lua_isnil(L, -1))
- luaL_error(L, "grammar has no initial rule");
- else
- luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2));
- }
- lua_pushvalue(L, -2); /* push key */
- lua_pushinteger(L, 1); /* push rule position (after TGrammar) */
- lua_settable(L, postab); /* insert pair at position table */
-}
-
-/*
-** traverse grammar at index 'arg', pushing all its keys and patterns
-** into the stack. Create a new table (before all pairs key-pattern) to
-** collect all keys and their associated positions in the final tree
-** (the "position table").
-** Return the number of rules and (in 'totalsize') the total size
-** for the new tree.
-*/
-static int collectrules (lua_State *L, int arg, int *totalsize) {
- int n = 1; /* to count number of rules */
- int postab = lua_gettop(L) + 1; /* index of position table */
- int size; /* accumulator for total size */
- lua_newtable(L); /* create position table */
- getfirstrule(L, arg, postab);
- size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */
- lua_pushnil(L); /* prepare to traverse grammar table */
- while (lua_next(L, arg) != 0) {
- if (lua_tonumber(L, -2) == 1 ||
- lp_equal(L, -2, postab + 1)) { /* initial rule? */
- lua_pop(L, 1); /* remove value (keep key for lua_next) */
- continue;
- }
- if (!testpattern(L, -1)) /* value is not a pattern? */
- luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2));
- luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
- lua_pushvalue(L, -2); /* push key (to insert into position table) */
- lua_pushinteger(L, size);
- lua_settable(L, postab);
- size += 1 + getsize(L, -1); /* update size */
- lua_pushvalue(L, -2); /* push key (for next lua_next) */
- n++;
- }
- *totalsize = size + 1; /* TTrue to finish list of rules */
- return n;
-}
-
-
-static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
- int i;
- TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */
- for (i = 0; i < n; i++) { /* add each rule into new tree */
- int ridx = frule + 2*i + 1; /* index of i-th rule */
- int rulesize;
- TTree *rn = gettree(L, ridx, &rulesize);
- nd->tag = TRule;
- nd->key = 0; /* will be fixed when rule is used */
- nd->cap = i; /* rule number */
- nd->u.ps = rulesize + 1; /* point to next rule */
- memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
- mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
- nd = sib2(nd); /* move to next rule */
- }
- nd->tag = TTrue; /* finish list of rules */
-}
-
-
-/*
-** Check whether a tree has potential infinite loops
-*/
-static int checkloops (TTree *tree) {
- tailcall:
- if (tree->tag == TRep && nullable(sib1(tree)))
- return 1;
- else if (tree->tag == TGrammar)
- return 0; /* sub-grammars already checked */
- else {
- switch (numsiblings[tree->tag]) {
- case 1: /* return checkloops(sib1(tree)); */
- tree = sib1(tree); goto tailcall;
- case 2:
- if (checkloops(sib1(tree))) return 1;
- /* else return checkloops(sib2(tree)); */
- tree = sib2(tree); goto tailcall;
- default: assert(numsiblings[tree->tag] == 0); return 0;
- }
- }
-}
-
-
-/*
-** Give appropriate error message for 'verifyrule'. If a rule appears
-** twice in 'passed', there is path from it back to itself without
-** advancing the subject.
-*/
-static int verifyerror (lua_State *L, int *passed, int npassed) {
- int i, j;
- for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
- for (j = i - 1; j >= 0; j--) {
- if (passed[i] == passed[j]) {
- lua_rawgeti(L, -1, passed[i]); /* get rule's key */
- return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
- }
- }
- }
- return luaL_error(L, "too many left calls in grammar");
-}
-
-
-/*
-** Check whether a rule can be left recursive; raise an error in that
-** case; otherwise return 1 iff pattern is nullable.
-** The return value is used to check sequences, where the second pattern
-** is only relevant if the first is nullable.
-** Parameter 'nb' works as an accumulator, to allow tail calls in
-** choices. ('nb' true makes function returns true.)
-** Parameter 'passed' is a list of already visited rules, 'npassed'
-** counts the elements in 'passed'.
-** Assume ktable at the top of the stack.
-*/
-static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
- int nb) {
- tailcall:
- switch (tree->tag) {
- case TChar: case TSet: case TAny:
- case TFalse:
- return nb; /* cannot pass from here */
- case TTrue:
- case TBehind: /* look-behind cannot have calls */
- return 1;
- case TNot: case TAnd: case TRep:
- /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
- tree = sib1(tree); nb = 1; goto tailcall;
- case TCapture: case TRunTime:
- /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
- tree = sib1(tree); goto tailcall;
- case TCall:
- /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
- tree = sib2(tree); goto tailcall;
- case TSeq: /* only check 2nd child if first is nb */
- if (!verifyrule(L, sib1(tree), passed, npassed, 0))
- return nb;
- /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
- tree = sib2(tree); goto tailcall;
- case TChoice: /* must check both children */
- nb = verifyrule(L, sib1(tree), passed, npassed, nb);
- /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
- tree = sib2(tree); goto tailcall;
- case TRule:
- if (npassed >= MAXRULES)
- return verifyerror(L, passed, npassed);
- else {
- passed[npassed++] = tree->key;
- /* return verifyrule(L, sib1(tree), passed, npassed); */
- tree = sib1(tree); goto tailcall;
- }
- case TGrammar:
- return nullable(tree); /* sub-grammar cannot be left recursive */
- default: assert(0); return 0;
- }
-}
-
-
-static void verifygrammar (lua_State *L, TTree *grammar) {
- int passed[MAXRULES];
- TTree *rule;
- /* check left-recursive rules */
- for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
- if (rule->key == 0) continue; /* unused rule */
- verifyrule(L, sib1(rule), passed, 0, 0);
- }
- assert(rule->tag == TTrue);
- /* check infinite loops inside rules */
- for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
- if (rule->key == 0) continue; /* unused rule */
- if (checkloops(sib1(rule))) {
- lua_rawgeti(L, -1, rule->key); /* get rule's key */
- luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
- }
- }
- assert(rule->tag == TTrue);
-}
-
-
-/*
-** Give a name for the initial rule if it is not referenced
-*/
-static void initialrulename (lua_State *L, TTree *grammar, int frule) {
- if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */
- int n = lua_rawlen(L, -1) + 1; /* index for name */
- lua_pushvalue(L, frule); /* rule's name */
- lua_rawseti(L, -2, n); /* ktable was on the top of the stack */
- sib1(grammar)->key = n;
- }
-}
-
-
-static TTree *newgrammar (lua_State *L, int arg) {
- int treesize;
- int frule = lua_gettop(L) + 2; /* position of first rule's key */
- int n = collectrules(L, arg, &treesize);
- TTree *g = newtree(L, treesize);
- luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
- g->tag = TGrammar; g->u.n = n;
- lua_newtable(L); /* create 'ktable' */
- lua_setuservalue(L, -2);
- buildgrammar(L, g, frule, n);
- lua_getuservalue(L, -1); /* get 'ktable' for new tree */
- finalfix(L, frule - 1, g, sib1(g));
- initialrulename(L, g, frule);
- verifygrammar(L, g);
- lua_pop(L, 1); /* remove 'ktable' */
- lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */
- lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */
- return g; /* new table at the top of the stack */
-}
-
-/* }====================================================== */
-
-
-static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
- lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */
- finalfix(L, 0, NULL, p->tree);
- lua_pop(L, 1); /* remove 'ktable' */
- return compile(L, p);
-}
-
-
-static int lp_printtree (lua_State *L) {
- TTree *tree = getpatt(L, 1, NULL);
- int c = lua_toboolean(L, 2);
- if (c) {
- lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */
- finalfix(L, 0, NULL, tree);
- lua_pop(L, 1); /* remove 'ktable' */
- }
- printktable(L, 1);
- printtree(tree, 0);
- return 0;
-}
-
-
-static int lp_printcode (lua_State *L) {
- Pattern *p = getpattern(L, 1);
- printktable(L, 1);
- if (p->code == NULL) /* not compiled yet? */
- prepcompile(L, p, 1);
- printpatt(p->code, p->codesize);
- return 0;
-}
-
-
-/*
-** Get the initial position for the match, interpreting negative
-** values from the end of the subject
-*/
-static size_t initposition (lua_State *L, size_t len) {
- lua_Integer ii = luaL_optinteger(L, 3, 1);
- if (ii > 0) { /* positive index? */
- if ((size_t)ii <= len) /* inside the string? */
- return (size_t)ii - 1; /* return it (corrected to 0-base) */
- else return len; /* crop at the end */
- }
- else { /* negative index */
- if ((size_t)(-ii) <= len) /* inside the string? */
- return len - ((size_t)(-ii)); /* return position from the end */
- else return 0; /* crop at the beginning */
- }
-}
-
-
-/*
-** Main match function
-*/
-static int lp_match (lua_State *L) {
- Capture capture[INITCAPSIZE];
- const char *r;
- size_t l;
- Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1));
- Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1);
- const char *s = luaL_checklstring(L, SUBJIDX, &l);
- size_t i = initposition(L, l);
- int ptop = lua_gettop(L);
- lua_pushnil(L); /* initialize subscache */
- lua_pushlightuserdata(L, capture); /* initialize caplistidx */
- lua_getuservalue(L, 1); /* initialize penvidx */
- r = match(L, s, s + i, s + l, code, capture, ptop);
- if (r == NULL) {
- lua_pushnil(L);
- return 1;
- }
- return getcaptures(L, s, r, ptop);
-}
-
-
-
-/*
-** {======================================================
-** Library creation and functions not related to matching
-** =======================================================
-*/
-
-/* maximum limit for stack size */
-#define MAXLIM (INT_MAX / 100)
-
-static int lp_setmax (lua_State *L) {
- lua_Integer lim = luaL_checkinteger(L, 1);
- luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range");
- lua_settop(L, 1);
- lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
- return 0;
-}
-
-
-static int lp_version (lua_State *L) {
- lua_pushstring(L, VERSION);
- return 1;
-}
-
-
-static int lp_type (lua_State *L) {
- if (testpattern(L, 1))
- lua_pushliteral(L, "pattern");
- else
- lua_pushnil(L);
- return 1;
-}
-
-
-int lp_gc (lua_State *L) {
- Pattern *p = getpattern(L, 1);
- realloccode(L, p, 0); /* delete code block */
- return 0;
-}
-
-
-static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
- TTree *t = newcharset(L);
- int i;
- for (i = 0; i <= UCHAR_MAX; i++)
- if (catf(i)) setchar(treebuffer(t), i);
- lua_setfield(L, -2, catname);
-}
-
-
-static int lp_locale (lua_State *L) {
- if (lua_isnoneornil(L, 1)) {
- lua_settop(L, 0);
- lua_createtable(L, 0, 12);
- }
- else {
- luaL_checktype(L, 1, LUA_TTABLE);
- lua_settop(L, 1);
- }
- createcat(L, "alnum", isalnum);
- createcat(L, "alpha", isalpha);
- createcat(L, "cntrl", iscntrl);
- createcat(L, "digit", isdigit);
- createcat(L, "graph", isgraph);
- createcat(L, "lower", islower);
- createcat(L, "print", isprint);
- createcat(L, "punct", ispunct);
- createcat(L, "space", isspace);
- createcat(L, "upper", isupper);
- createcat(L, "xdigit", isxdigit);
- return 1;
-}
-
-
-static struct luaL_Reg pattreg[] = {
- {"ptree", lp_printtree},
- {"pcode", lp_printcode},
- {"match", lp_match},
- {"B", lp_behind},
- {"V", lp_V},
- {"C", lp_simplecapture},
- {"Cc", lp_constcapture},
- {"Cmt", lp_matchtime},
- {"Cb", lp_backref},
- {"Carg", lp_argcapture},
- {"Cp", lp_poscapture},
- {"Cs", lp_substcapture},
- {"Ct", lp_tablecapture},
- {"Cf", lp_foldcapture},
- {"Cg", lp_groupcapture},
- {"P", lp_P},
- {"S", lp_set},
- {"R", lp_range},
- {"locale", lp_locale},
- {"version", lp_version},
- {"setmaxstack", lp_setmax},
- {"type", lp_type},
- {NULL, NULL}
-};
-
-
-static struct luaL_Reg metareg[] = {
- {"__mul", lp_seq},
- {"__add", lp_choice},
- {"__pow", lp_star},
- {"__gc", lp_gc},
- {"__len", lp_and},
- {"__div", lp_divcapture},
- {"__unm", lp_not},
- {"__sub", lp_sub},
- {NULL, NULL}
-};
-
-
-int luaopen_lpeg (lua_State *L);
-int luaopen_lpeg (lua_State *L) {
- luaL_newmetatable(L, PATTERN_T);
- lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */
- lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
- luaL_setfuncs(L, metareg, 0);
- luaL_newlib(L, pattreg);
- lua_pushvalue(L, -1);
- lua_setfield(L, -3, "__index");
- return 1;
-}
-
-/* }====================================================== */
diff --git a/src/ext/lpeg/lptree.h b/src/ext/lpeg/lptree.h
deleted file mode 100644
index 34ee15c..0000000
--- a/src/ext/lpeg/lptree.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
-** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $
-*/
-
-#if !defined(lptree_h)
-#define lptree_h
-
-
-#include "lptypes.h"
-
-
-/*
-** types of trees
-*/
-typedef enum TTag {
- TChar = 0, /* 'n' = char */
- TSet, /* the set is stored in next CHARSETSIZE bytes */
- TAny,
- TTrue,
- TFalse,
- TRep, /* 'sib1'* */
- TSeq, /* 'sib1' 'sib2' */
- TChoice, /* 'sib1' / 'sib2' */
- TNot, /* !'sib1' */
- TAnd, /* &'sib1' */
- TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */
- TOpenCall, /* ktable[key] is rule's key */
- TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
- 'sib1' is rule's pattern;
- 'sib2' is next rule; 'cap' is rule's sequential number */
- TGrammar, /* 'sib1' is initial (and first) rule */
- TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
- TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
- ktable[key] is Lua value associated with capture;
- 'sib1' is capture body */
- TRunTime /* run-time capture: 'key' is Lua function;
- 'sib1' is capture body */
-} TTag;
-
-
-/*
-** Tree trees
-** The first child of a tree (if there is one) is immediately after
-** the tree. A reference to a second child (ps) is its position
-** relative to the position of the tree itself.
-*/
-typedef struct TTree {
- byte tag;
- byte cap; /* kind of capture (if it is a capture) */
- unsigned short key; /* key in ktable for Lua data (0 if no key) */
- union {
- int ps; /* occasional second child */
- int n; /* occasional counter */
- } u;
-} TTree;
-
-
-/*
-** A complete pattern has its tree plus, if already compiled,
-** its corresponding code
-*/
-typedef struct Pattern {
- union Instruction *code;
- int codesize;
- TTree tree[1];
-} Pattern;
-
-
-/* number of children for each tree */
-extern const byte numsiblings[];
-
-/* access to children */
-#define sib1(t) ((t) + 1)
-#define sib2(t) ((t) + (t)->u.ps)
-
-
-
-
-
-
-#endif
-
diff --git a/src/ext/lpeg/lptypes.h b/src/ext/lpeg/lptypes.h
deleted file mode 100644
index 8e78bc8..0000000
--- a/src/ext/lpeg/lptypes.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
-** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $
-** LPeg - PEG pattern matching for Lua
-** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-** written by Roberto Ierusalimschy
-*/
-
-#if !defined(lptypes_h)
-#define lptypes_h
-
-
-#if !defined(LPEG_DEBUG)
-#define NDEBUG
-#endif
-
-#include
-#include
-
-#include "lua.h"
-
-
-#define VERSION "1.0.1"
-
-
-#define PATTERN_T "lpeg-pattern"
-#define MAXSTACKIDX "lpeg-maxstack"
-
-
-/*
-** compatibility with Lua 5.1
-*/
-#if (LUA_VERSION_NUM == 501)
-
-#define lp_equal lua_equal
-
-#define lua_getuservalue lua_getfenv
-#define lua_setuservalue lua_setfenv
-
-#define lua_rawlen lua_objlen
-
-#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f)
-#define luaL_newlib(L,f) luaL_register(L,"lpeg",f)
-
-#endif
-
-
-#if !defined(lp_equal)
-#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
-#endif
-
-
-/* default maximum size for call/backtrack stack */
-#if !defined(MAXBACK)
-#define MAXBACK 400
-#endif
-
-
-/* maximum number of rules in a grammar (limited by 'unsigned char') */
-#if !defined(MAXRULES)
-#define MAXRULES 250
-#endif
-
-
-
-/* initial size for capture's list */
-#define INITCAPSIZE 32
-
-
-/* index, on Lua stack, for subject */
-#define SUBJIDX 2
-
-/* number of fixed arguments to 'match' (before capture arguments) */
-#define FIXEDARGS 3
-
-/* index, on Lua stack, for capture list */
-#define caplistidx(ptop) ((ptop) + 2)
-
-/* index, on Lua stack, for pattern's ktable */
-#define ktableidx(ptop) ((ptop) + 3)
-
-/* index, on Lua stack, for backtracking stack */
-#define stackidx(ptop) ((ptop) + 4)
-
-
-
-typedef unsigned char byte;
-
-
-#define BITSPERCHAR 8
-
-#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
-
-
-
-typedef struct Charset {
- byte cs[CHARSETSIZE];
-} Charset;
-
-
-
-#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
-
-/* access to charset */
-#define treebuffer(t) ((byte *)((t) + 1))
-
-/* number of slots needed for 'n' bytes */
-#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1)
-
-/* set 'b' bit in charset 'cs' */
-#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
-
-
-/*
-** in capture instructions, 'kind' of capture and its offset are
-** packed in field 'aux', 4 bits for each
-*/
-#define getkind(op) ((op)->i.aux & 0xF)
-#define getoff(op) (((op)->i.aux >> 4) & 0xF)
-#define joinkindoff(k,o) ((k) | ((o) << 4))
-
-#define MAXOFF 0xF
-#define MAXAUX 0xFF
-
-
-/* maximum number of bytes to look behind */
-#define MAXBEHIND MAXAUX
-
-
-/* maximum size (in elements) for a pattern */
-#define MAXPATTSIZE (SHRT_MAX - 10)
-
-
-/* size (in elements) for an instruction plus extra l bytes */
-#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
-
-
-/* size (in elements) for a ISet instruction */
-#define CHARSETINSTSIZE instsize(CHARSETSIZE)
-
-/* size (in elements) for a IFunc instruction */
-#define funcinstsize(p) ((p)->i.aux + 2)
-
-
-
-#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
-
-
-#endif
-
diff --git a/src/ext/lpeg/lpvm.c b/src/ext/lpeg/lpvm.c
deleted file mode 100644
index 05a5f68..0000000
--- a/src/ext/lpeg/lpvm.c
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
-** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $
-** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-*/
-
-#include
-#include
-
-
-#include "lua.h"
-#include "lauxlib.h"
-
-#include "lpcap.h"
-#include "lptypes.h"
-#include "lpvm.h"
-#include "lpprint.h"
-
-
-/* initial size for call/backtrack stack */
-#if !defined(INITBACK)
-#define INITBACK MAXBACK
-#endif
-
-
-#define getoffset(p) (((p) + 1)->offset)
-
-static const Instruction giveup = {{IGiveup, 0, 0}};
-
-
-/*
-** {======================================================
-** Virtual Machine
-** =======================================================
-*/
-
-
-typedef struct Stack {
- const char *s; /* saved position (or NULL for calls) */
- const Instruction *p; /* next instruction */
- int caplevel;
-} Stack;
-
-
-#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
-
-
-/*
-** Make the size of the array of captures 'cap' twice as large as needed
-** (which is 'captop'). ('n' is the number of new elements.)
-*/
-static Capture *doublecap (lua_State *L, Capture *cap, int captop,
- int n, int ptop) {
- Capture *newc;
- if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
- luaL_error(L, "too many captures");
- newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
- memcpy(newc, cap, (captop - n) * sizeof(Capture));
- lua_replace(L, caplistidx(ptop));
- return newc;
-}
-
-
-/*
-** Double the size of the stack
-*/
-static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
- Stack *stack = getstackbase(L, ptop);
- Stack *newstack;
- int n = *stacklimit - stack; /* current stack size */
- int max, newn;
- lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
- max = lua_tointeger(L, -1); /* maximum allowed size */
- lua_pop(L, 1);
- if (n >= max) /* already at maximum size? */
- luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
- newn = 2 * n; /* new size */
- if (newn > max) newn = max;
- newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
- memcpy(newstack, stack, n * sizeof(Stack));
- lua_replace(L, stackidx(ptop));
- *stacklimit = newstack + newn;
- return newstack + n; /* return next position */
-}
-
-
-/*
-** Interpret the result of a dynamic capture: false -> fail;
-** true -> keep current position; number -> next position.
-** Return new subject position. 'fr' is stack index where
-** is the result; 'curr' is current subject position; 'limit'
-** is subject's size.
-*/
-static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
- lua_Integer res;
- if (!lua_toboolean(L, fr)) { /* false value? */
- lua_settop(L, fr - 1); /* remove results */
- return -1; /* and fail */
- }
- else if (lua_isboolean(L, fr)) /* true? */
- res = curr; /* keep current position */
- else {
- res = lua_tointeger(L, fr) - 1; /* new position */
- if (res < curr || res > limit)
- luaL_error(L, "invalid position returned by match-time capture");
- }
- lua_remove(L, fr); /* remove first result (offset) */
- return res;
-}
-
-
-/*
-** Add capture values returned by a dynamic capture to the capture list
-** 'base', nested inside a group capture. 'fd' indexes the first capture
-** value, 'n' is the number of values (at least 1).
-*/
-static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
- int i;
- base[0].kind = Cgroup; /* create group capture */
- base[0].siz = 0;
- base[0].idx = 0; /* make it an anonymous group */
- for (i = 1; i <= n; i++) { /* add runtime captures */
- base[i].kind = Cruntime;
- base[i].siz = 1; /* mark it as closed */
- base[i].idx = fd + i - 1; /* stack index of capture value */
- base[i].s = s;
- }
- base[i].kind = Cclose; /* close group */
- base[i].siz = 1;
- base[i].s = s;
-}
-
-
-/*
-** Remove dynamic captures from the Lua stack (called in case of failure)
-*/
-static int removedyncap (lua_State *L, Capture *capture,
- int level, int last) {
- int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
- int top = lua_gettop(L);
- if (id == 0) return 0; /* no dynamic captures? */
- lua_settop(L, id - 1); /* remove captures */
- return top - id + 1; /* number of values removed */
-}
-
-
-/*
-** Opcode interpreter
-*/
-const char *match (lua_State *L, const char *o, const char *s, const char *e,
- Instruction *op, Capture *capture, int ptop) {
- Stack stackbase[INITBACK];
- Stack *stacklimit = stackbase + INITBACK;
- Stack *stack = stackbase; /* point to first empty slot in stack */
- int capsize = INITCAPSIZE;
- int captop = 0; /* point to first empty slot in captures */
- int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
- const Instruction *p = op; /* current instruction */
- stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
- lua_pushlightuserdata(L, stackbase);
- for (;;) {
-#if defined(DEBUG)
- printf("-------------------------------------\n");
- printcaplist(capture, capture + captop);
- printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
- s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
- printinst(op, p);
-#endif
- assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
- switch ((Opcode)p->i.code) {
- case IEnd: {
- assert(stack == getstackbase(L, ptop) + 1);
- capture[captop].kind = Cclose;
- capture[captop].s = NULL;
- return s;
- }
- case IGiveup: {
- assert(stack == getstackbase(L, ptop));
- return NULL;
- }
- case IRet: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
- p = (--stack)->p;
- continue;
- }
- case IAny: {
- if (s < e) { p++; s++; }
- else goto fail;
- continue;
- }
- case ITestAny: {
- if (s < e) p += 2;
- else p += getoffset(p);
- continue;
- }
- case IChar: {
- if ((byte)*s == p->i.aux && s < e) { p++; s++; }
- else goto fail;
- continue;
- }
- case ITestChar: {
- if ((byte)*s == p->i.aux && s < e) p += 2;
- else p += getoffset(p);
- continue;
- }
- case ISet: {
- int c = (byte)*s;
- if (testchar((p+1)->buff, c) && s < e)
- { p += CHARSETINSTSIZE; s++; }
- else goto fail;
- continue;
- }
- case ITestSet: {
- int c = (byte)*s;
- if (testchar((p + 2)->buff, c) && s < e)
- p += 1 + CHARSETINSTSIZE;
- else p += getoffset(p);
- continue;
- }
- case IBehind: {
- int n = p->i.aux;
- if (n > s - o) goto fail;
- s -= n; p++;
- continue;
- }
- case ISpan: {
- for (; s < e; s++) {
- int c = (byte)*s;
- if (!testchar((p+1)->buff, c)) break;
- }
- p += CHARSETINSTSIZE;
- continue;
- }
- case IJmp: {
- p += getoffset(p);
- continue;
- }
- case IChoice: {
- if (stack == stacklimit)
- stack = doublestack(L, &stacklimit, ptop);
- stack->p = p + getoffset(p);
- stack->s = s;
- stack->caplevel = captop;
- stack++;
- p += 2;
- continue;
- }
- case ICall: {
- if (stack == stacklimit)
- stack = doublestack(L, &stacklimit, ptop);
- stack->s = NULL;
- stack->p = p + 2; /* save return address */
- stack++;
- p += getoffset(p);
- continue;
- }
- case ICommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- stack--;
- p += getoffset(p);
- continue;
- }
- case IPartialCommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- (stack - 1)->s = s;
- (stack - 1)->caplevel = captop;
- p += getoffset(p);
- continue;
- }
- case IBackCommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- s = (--stack)->s;
- captop = stack->caplevel;
- p += getoffset(p);
- continue;
- }
- case IFailTwice:
- assert(stack > getstackbase(L, ptop));
- stack--;
- /* go through */
- case IFail:
- fail: { /* pattern failed: try to backtrack */
- do { /* remove pending calls */
- assert(stack > getstackbase(L, ptop));
- s = (--stack)->s;
- } while (s == NULL);
- if (ndyncap > 0) /* is there matchtime captures? */
- ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
- captop = stack->caplevel;
- p = stack->p;
-#if defined(DEBUG)
- printf("**FAIL**\n");
-#endif
- continue;
- }
- case ICloseRunTime: {
- CapState cs;
- int rem, res, n;
- int fr = lua_gettop(L) + 1; /* stack index of first result */
- cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
- n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
- captop -= n; /* remove nested captures */
- ndyncap -= rem; /* update number of dynamic captures */
- fr -= rem; /* 'rem' items were popped from Lua stack */
- res = resdyncaptures(L, fr, s - o, e - o); /* get result */
- if (res == -1) /* fail? */
- goto fail;
- s = o + res; /* else update current position */
- n = lua_gettop(L) - fr + 1; /* number of new captures */
- ndyncap += n; /* update number of dynamic captures */
- if (n > 0) { /* any new capture? */
- if (fr + n >= SHRT_MAX)
- luaL_error(L, "too many results in match-time capture");
- if ((captop += n + 2) >= capsize) {
- capture = doublecap(L, capture, captop, n + 2, ptop);
- capsize = 2 * captop;
- }
- /* add new captures to 'capture' list */
- adddyncaptures(s, capture + captop - n - 2, n, fr);
- }
- p++;
- continue;
- }
- case ICloseCapture: {
- const char *s1 = s;
- assert(captop > 0);
- /* if possible, turn capture into a full capture */
- if (capture[captop - 1].siz == 0 &&
- s1 - capture[captop - 1].s < UCHAR_MAX) {
- capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
- p++;
- continue;
- }
- else {
- capture[captop].siz = 1; /* mark entry as closed */
- capture[captop].s = s;
- goto pushcapture;
- }
- }
- case IOpenCapture:
- capture[captop].siz = 0; /* mark entry as open */
- capture[captop].s = s;
- goto pushcapture;
- case IFullCapture:
- capture[captop].siz = getoff(p) + 1; /* save capture size */
- capture[captop].s = s - getoff(p);
- /* goto pushcapture; */
- pushcapture: {
- capture[captop].idx = p->i.key;
- capture[captop].kind = getkind(p);
- if (++captop >= capsize) {
- capture = doublecap(L, capture, captop, 0, ptop);
- capsize = 2 * captop;
- }
- p++;
- continue;
- }
- default: assert(0); return NULL;
- }
- }
-}
-
-/* }====================================================== */
-
-
diff --git a/src/ext/lpeg/lpvm.h b/src/ext/lpeg/lpvm.h
deleted file mode 100644
index 757b9e1..0000000
--- a/src/ext/lpeg/lpvm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
-*/
-
-#if !defined(lpvm_h)
-#define lpvm_h
-
-#include "lpcap.h"
-
-
-/* Virtual Machine's instructions */
-typedef enum Opcode {
- IAny, /* if no char, fail */
- IChar, /* if char != aux, fail */
- ISet, /* if char not in buff, fail */
- ITestAny, /* in no char, jump to 'offset' */
- ITestChar, /* if char != aux, jump to 'offset' */
- ITestSet, /* if char not in buff, jump to 'offset' */
- ISpan, /* read a span of chars in buff */
- IBehind, /* walk back 'aux' characters (fail if not possible) */
- IRet, /* return from a rule */
- IEnd, /* end of pattern */
- IChoice, /* stack a choice; next fail will jump to 'offset' */
- IJmp, /* jump to 'offset' */
- ICall, /* call rule at 'offset' */
- IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
- ICommit, /* pop choice and jump to 'offset' */
- IPartialCommit, /* update top choice to current position and jump */
- IBackCommit, /* "fails" but jump to its own 'offset' */
- IFailTwice, /* pop one choice and then fail */
- IFail, /* go back to saved state on choice and jump to saved offset */
- IGiveup, /* internal use */
- IFullCapture, /* complete capture of last 'off' chars */
- IOpenCapture, /* start a capture */
- ICloseCapture,
- ICloseRunTime
-} Opcode;
-
-
-
-typedef union Instruction {
- struct Inst {
- byte code;
- byte aux;
- short key;
- } i;
- int offset;
- byte buff[1];
-} Instruction;
-
-
-void printpatt (Instruction *p, int n);
-const char *match (lua_State *L, const char *o, const char *s, const char *e,
- Instruction *op, Capture *capture, int ptop);
-
-
-#endif
-
diff --git a/src/ext/lpeg/makefile b/src/ext/lpeg/makefile
deleted file mode 100644
index 7a8463e..0000000
--- a/src/ext/lpeg/makefile
+++ /dev/null
@@ -1,55 +0,0 @@
-LIBNAME = lpeg
-LUADIR = ../lua/
-
-COPT = -O2
-# COPT = -DLPEG_DEBUG -g
-
-CWARNS = -Wall -Wextra -pedantic \
- -Waggregate-return \
- -Wcast-align \
- -Wcast-qual \
- -Wdisabled-optimization \
- -Wpointer-arith \
- -Wshadow \
- -Wsign-compare \
- -Wundef \
- -Wwrite-strings \
- -Wbad-function-cast \
- -Wdeclaration-after-statement \
- -Wmissing-prototypes \
- -Wnested-externs \
- -Wstrict-prototypes \
-# -Wunreachable-code \
-
-
-CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
-CC = gcc
-
-FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
-
-# For Linux
-linux:
- make lpeg.so "DLLFLAGS = -shared -fPIC"
-
-# For Mac OS
-macosx:
- make lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
-
-lpeg.so: $(FILES)
- env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
-
-$(FILES): makefile
-
-test: test.lua re.lua lpeg.so
- ./test.lua
-
-clean:
- rm -f $(FILES) lpeg.so
-
-
-lpcap.o: lpcap.c lpcap.h lptypes.h
-lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
-lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
-lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
-lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
-
diff --git a/src/ext/lpeg/re.html b/src/ext/lpeg/re.html
deleted file mode 100644
index 32f0a45..0000000
--- a/src/ext/lpeg/re.html
+++ /dev/null
@@ -1,498 +0,0 @@
-
-
-
- LPeg.re - Regex syntax for LPEG
-
-
-
-
-
-
-
-
-
-
-
-
LPeg.re
-
- Regex syntax for LPEG
-
-
-
-
-
-
-
-
-
-
The re
Module
-
-
-The re
module
-(provided by file re.lua
in the distribution)
-supports a somewhat conventional regex syntax
-for pattern usage within LPeg .
-
-
-
-The next table summarizes re
's syntax.
-A p
represents an arbitrary pattern;
-num
represents a number ([0-9]+
);
-name
represents an identifier
-([a-zA-Z][a-zA-Z0-9_]*
).
-Constructions are listed in order of decreasing precedence.
-
-Syntax Description
-( p )
grouping
-'string'
literal string
-"string"
literal string
-[class]
character class
-.
any character
-%name
- pattern defs[name]
or a pre-defined pattern
-name
non terminal
-<name>
non terminal
-{}
position capture
-{ p }
simple capture
-{: p :}
anonymous group capture
-{:name: p :}
named group capture
-{~ p ~}
substitution capture
-{| p |}
table capture
-=name
back reference
-
-p ?
optional match
-p *
zero or more repetitions
-p +
one or more repetitions
-p^num
exactly n
repetitions
-p^+num
- at least n
repetitions
-p^-num
- at most n
repetitions
-p -> 'string'
string capture
-p -> "string"
string capture
-p -> num
numbered capture
-p -> name
function/query/string capture
-equivalent to p / defs[name]
-p => name
match-time capture
-equivalent to lpeg.Cmt(p, defs[name])
-& p
and predicate
-! p
not predicate
-p1 p2
concatenation
-p1 / p2
ordered choice
-(name <- p
)+ grammar
-
-
-Any space appearing in a syntax description can be
-replaced by zero or more space characters and Lua-style comments
-(--
until end of line).
-
-
-
-Character classes define sets of characters.
-An initial ^
complements the resulting set.
-A range x -
y includes in the set
-all characters with codes between the codes of x and y .
-A pre-defined class %
name includes all
-characters of that class.
-A simple character includes itself in the set.
-The only special characters inside a class are ^
-(special only if it is the first character);
-]
-(can be included in the set as the first character,
-after the optional ^
);
-%
(special only if followed by a letter);
-and -
-(can be included in the set as the first or the last character).
-
-
-
-Currently the pre-defined classes are similar to those from the
-Lua's string library
-(%a
for letters,
-%A
for non letters, etc.).
-There is also a class %nl
-containing only the newline character,
-which is particularly handy for grammars written inside long strings,
-as long strings do not interpret escape sequences like \n
.
-
-
-
-
-
-
re.compile (string, [, defs])
-
-Compiles the given string and
-returns an equivalent LPeg pattern.
-The given string may define either an expression or a grammar.
-The optional defs
table provides extra Lua values
-to be used by the pattern.
-
-
-
re.find (subject, pattern [, init])
-
-Searches the given pattern in the given subject.
-If it finds a match,
-returns the index where this occurrence starts and
-the index where it ends.
-Otherwise, returns nil.
-
-
-
-An optional numeric argument init
makes the search
-starts at that position in the subject string.
-As usual in Lua libraries,
-a negative value counts from the end.
-
-
-
re.gsub (subject, pattern, replacement)
-
-Does a global substitution ,
-replacing all occurrences of pattern
-in the given subject
by replacement
.
-
-
re.match (subject, pattern)
-
-Matches the given pattern against the given subject,
-returning all captures.
-
-
-
re.updatelocale ()
-
-Updates the pre-defined character classes to the current locale.
-
-
-
-
-
-
A complete simple program
-
-The next code shows a simple complete Lua program using
-the re
module:
-
-
-local re = require"re"
-
--- find the position of the first numeral in a string
-print(re.find("the number 423 is odd", "[0-9]+")) --> 12 14
-
--- returns all words in a string
-print(re.match("the number 423 is odd", "({%a+} / .)*"))
---> the number is odd
-
--- returns the first numeral in a string
-print(re.match("the number 423 is odd", "s <- {%d+} / . s"))
---> 423
-
-print(re.gsub("hello World", "[aeiou]", "."))
---> h.ll. W.rld
-
-
-
-
Balanced parentheses
-
-The following call will produce the same pattern produced by the
-Lua expression in the
-balanced parentheses example:
-
-
-b = re.compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
-
-
-
String reversal
-
-The next example reverses a string:
-
-
-rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
-print(rev:match"0123456789") --> 9876543210
-
-
-
CSV decoder
-
-The next example replicates the CSV decoder :
-
-
-record = re.compile[[
- record <- {| field (',' field)* |} (%nl / !.)
- field <- escaped / nonescaped
- nonescaped <- { [^,"%nl]* }
- escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
-]]
-
-
-
Lua's long strings
-
-The next example matches Lua long strings:
-
-
-c = re.compile([[
- longstring <- ('[' {:eq: '='* :} '[' close)
- close <- ']' =eq ']' / . close
-]])
-
-print(c:match'[==[]]===]]]]==]===[]') --> 17
-
-
-
Abstract Syntax Trees
-
-This example shows a simple way to build an
-abstract syntax tree (AST) for a given grammar.
-To keep our example simple,
-let us consider the following grammar
-for lists of names:
-
-
-p = re.compile[[
- listname <- (name s)*
- name <- [a-z][a-z]*
- s <- %s*
-]]
-
-
-Now, we will add captures to build a corresponding AST.
-As a first step, the pattern will build a table to
-represent each non terminal;
-terminals will be represented by their corresponding strings:
-
-
-c = re.compile[[
- listname <- {| (name s)* |}
- name <- {| {[a-z][a-z]*} |}
- s <- %s*
-]]
-
-
-Now, a match against "hi hello bye"
-results in the table
-{{"hi"}, {"hello"}, {"bye"}}
.
-
-
-For such a simple grammar,
-this AST is more than enough;
-actually, the tables around each single name
-are already overkilling.
-More complex grammars,
-however, may need some more structure.
-Specifically,
-it would be useful if each table had
-a tag
field telling what non terminal
-that table represents.
-We can add such a tag using
-named group captures :
-
-
-x = re.compile[[
- listname <- {| {:tag: '' -> 'list':} (name s)* |}
- name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
- s <- ' '*
-]]
-
-
-With these group captures,
-a match against "hi hello bye"
-results in the following table:
-
-
-{tag="list",
- {tag="id", "hi"},
- {tag="id", "hello"},
- {tag="id", "bye"}
-}
-
-
-
-
Indented blocks
-
-This example breaks indented blocks into tables,
-respecting the indentation:
-
-
-p = re.compile[[
- block <- {| {:ident:' '*:} line
- ((=ident !' ' line) / &(=ident ' ') block)* |}
- line <- {[^%nl]*} %nl
-]]
-
-
-As an example,
-consider the following text:
-
-
-t = p:match[[
-first line
- subline 1
- subline 2
-second line
-third line
- subline 3.1
- subline 3.1.1
- subline 3.2
-]]
-
-
-The resulting table t
will be like this:
-
-
- {'first line'; {'subline 1'; 'subline 2'; ident = ' '};
- 'second line';
- 'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = ' '};
- 'subline 3.2'; ident = ' '};
- ident = ''}
-
-
-
Macro expander
-
-This example implements a simple macro expander.
-Macros must be defined as part of the pattern,
-following some simple rules:
-
-
-p = re.compile[[
- text <- {~ item* ~}
- item <- macro / [^()] / '(' item* ')'
- arg <- ' '* {~ (!',' item)* ~}
- args <- '(' arg (',' arg)* ')'
- -- now we define some macros
- macro <- ('apply' args) -> '%1(%2)'
- / ('add' args) -> '%1 + %2'
- / ('mul' args) -> '%1 * %2'
-]]
-
-print(p:match"add(mul(a,b), apply(f,x))") --> a * b + f(x)
-
-
-A text
is a sequence of items,
-wherein we apply a substitution capture to expand any macros.
-An item
is either a macro,
-any character different from parentheses,
-or a parenthesized expression.
-A macro argument (arg
) is a sequence
-of items different from a comma.
-(Note that a comma may appear inside an item,
-e.g., inside a parenthesized expression.)
-Again we do a substitution capture to expand any macro
-in the argument before expanding the outer macro.
-args
is a list of arguments separated by commas.
-Finally we define the macros.
-Each macro is a string substitution;
-it replaces the macro name and its arguments by its corresponding string,
-with each %
n replaced by the n -th argument.
-
-
-
Patterns
-
-This example shows the complete syntax
-of patterns accepted by re
.
-
-
-p = [=[
-
-pattern <- exp !.
-exp <- S (grammar / alternative)
-
-alternative <- seq ('/' S seq)*
-seq <- prefix*
-prefix <- '&' S prefix / '!' S prefix / suffix
-suffix <- primary S (([+*?]
- / '^' [+-]? num
- / '->' S (string / '{}' / name)
- / '=>' S name) S)*
-
-primary <- '(' exp ')' / string / class / defined
- / '{:' (name ':')? exp ':}'
- / '=' name
- / '{}'
- / '{~' exp '~}'
- / '{' exp '}'
- / '.'
- / name S !arrow
- / '<' name '>' -- old-style non terminals
-
-grammar <- definition+
-definition <- name S arrow exp
-
-class <- '[' '^'? item (!']' item)* ']'
-item <- defined / range / .
-range <- . '-' [^]]
-
-S <- (%s / '--' [^%nl]*)* -- spaces and comments
-name <- [A-Za-z][A-Za-z0-9_]*
-arrow <- '<-'
-num <- [0-9]+
-string <- '"' [^"]* '"' / "'" [^']* "'"
-defined <- '%' name
-
-]=]
-
-print(re.match(p, p)) -- a self description must match itself
-
-
-
-
-
-
-
-Copyright © 2008-2015 Lua.org, PUC-Rio.
-
-
-Permission is hereby granted, free of charge,
-to any person obtaining a copy of this software and
-associated documentation files (the "Software"),
-to deal in the Software without restriction,
-including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense,
-and/or sell copies of the Software,
-and to permit persons to whom the Software is
-furnished to do so,
-subject to the following conditions:
-
-
-
-The above copyright notice and this permission notice
-shall be included in all copies or substantial portions of the Software.
-
-
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED,
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-
-
-
-
-
-
-
-$Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $
-
-
-
-
-
-
-
diff --git a/src/ext/lpeg/re.lua b/src/ext/lpeg/re.lua
deleted file mode 100644
index 3b9974f..0000000
--- a/src/ext/lpeg/re.lua
+++ /dev/null
@@ -1,259 +0,0 @@
--- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
-
--- imported functions and modules
-local tonumber, type, print, error = tonumber, type, print, error
-local setmetatable = setmetatable
-local m = require"lpeg"
-
--- 'm' will be used to parse expressions, and 'mm' will be used to
--- create expressions; that is, 're' runs on 'm', creating patterns
--- on 'mm'
-local mm = m
-
--- pattern's metatable
-local mt = getmetatable(mm.P(0))
-
-
-
--- No more global accesses after this point
-local version = _VERSION
-if version == "Lua 5.2" then _ENV = nil end
-
-
-local any = m.P(1)
-
-
--- Pre-defined names
-local Predef = { nl = m.P"\n" }
-
-
-local mem
-local fmem
-local gmem
-
-
-local function updatelocale ()
- mm.locale(Predef)
- Predef.a = Predef.alpha
- Predef.c = Predef.cntrl
- Predef.d = Predef.digit
- Predef.g = Predef.graph
- Predef.l = Predef.lower
- Predef.p = Predef.punct
- Predef.s = Predef.space
- Predef.u = Predef.upper
- Predef.w = Predef.alnum
- Predef.x = Predef.xdigit
- Predef.A = any - Predef.a
- Predef.C = any - Predef.c
- Predef.D = any - Predef.d
- Predef.G = any - Predef.g
- Predef.L = any - Predef.l
- Predef.P = any - Predef.p
- Predef.S = any - Predef.s
- Predef.U = any - Predef.u
- Predef.W = any - Predef.w
- Predef.X = any - Predef.x
- mem = {} -- restart memoization
- fmem = {}
- gmem = {}
- local mt = {__mode = "v"}
- setmetatable(mem, mt)
- setmetatable(fmem, mt)
- setmetatable(gmem, mt)
-end
-
-
-updatelocale()
-
-
-
-local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
-
-
-local function getdef (id, defs)
- local c = defs and defs[id]
- if not c then error("undefined name: " .. id) end
- return c
-end
-
-
-local function patt_error (s, i)
- local msg = (#s < i + 20) and s:sub(i)
- or s:sub(i,i+20) .. "..."
- msg = ("pattern error near '%s'"):format(msg)
- error(msg, 2)
-end
-
-local function mult (p, n)
- local np = mm.P(true)
- while n >= 1 do
- if n%2 >= 1 then np = np * p end
- p = p * p
- n = n/2
- end
- return np
-end
-
-local function equalcap (s, i, c)
- if type(c) ~= "string" then return nil end
- local e = #c + i
- if s:sub(i, e - 1) == c then return e else return nil end
-end
-
-
-local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
-
-local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
-
-local arrow = S * "<-"
-
-local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
-
-name = m.C(name)
-
-
--- a defined name only have meaning in a given environment
-local Def = name * m.Carg(1)
-
-local num = m.C(m.R"09"^1) * S / tonumber
-
-local String = "'" * m.C((any - "'")^0) * "'" +
- '"' * m.C((any - '"')^0) * '"'
-
-
-local defined = "%" * Def / function (c,Defs)
- local cat = Defs and Defs[c] or Predef[c]
- if not cat then error ("name '" .. c .. "' undefined") end
- return cat
-end
-
-local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
-
-local item = defined + Range + m.C(any)
-
-local Class =
- "["
- * (m.C(m.P"^"^-1)) -- optional complement symbol
- * m.Cf(item * (item - "]")^0, mt.__add) /
- function (c, p) return c == "^" and any - p or p end
- * "]"
-
-local function adddef (t, k, exp)
- if t[k] then
- error("'"..k.."' already defined as a rule")
- else
- t[k] = exp
- end
- return t
-end
-
-local function firstdef (n, r) return adddef({n}, n, r) end
-
-
-local function NT (n, b)
- if not b then
- error("rule '"..n.."' used outside a grammar")
- else return mm.V(n)
- end
-end
-
-
-local exp = m.P{ "Exp",
- Exp = S * ( m.V"Grammar"
- + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
- Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
- * (#seq_follow + patt_error);
- Prefix = "&" * S * m.V"Prefix" / mt.__len
- + "!" * S * m.V"Prefix" / mt.__unm
- + m.V"Suffix";
- Suffix = m.Cf(m.V"Primary" * S *
- ( ( m.P"+" * m.Cc(1, mt.__pow)
- + m.P"*" * m.Cc(0, mt.__pow)
- + m.P"?" * m.Cc(-1, mt.__pow)
- + "^" * ( m.Cg(num * m.Cc(mult))
- + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
- )
- + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
- + m.P"{}" * m.Cc(nil, m.Ct)
- + m.Cg(Def / getdef * m.Cc(mt.__div))
- )
- + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
- ) * S
- )^0, function (a,b,f) return f(a,b) end );
- Primary = "(" * m.V"Exp" * ")"
- + String / mm.P
- + Class
- + defined
- + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
- function (n, p) return mm.Cg(p, n) end
- + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
- + m.P"{}" / mm.Cp
- + "{~" * m.V"Exp" * "~}" / mm.Cs
- + "{|" * m.V"Exp" * "|}" / mm.Ct
- + "{" * m.V"Exp" * "}" / mm.C
- + m.P"." * m.Cc(any)
- + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
- Definition = name * arrow * m.V"Exp";
- Grammar = m.Cg(m.Cc(true), "G") *
- m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
- adddef) / mm.P
-}
-
-local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
-
-
-local function compile (p, defs)
- if mm.type(p) == "pattern" then return p end -- already compiled
- local cp = pattern:match(p, 1, defs)
- if not cp then error("incorrect pattern", 3) end
- return cp
-end
-
-local function match (s, p, i)
- local cp = mem[p]
- if not cp then
- cp = compile(p)
- mem[p] = cp
- end
- return cp:match(s, i or 1)
-end
-
-local function find (s, p, i)
- local cp = fmem[p]
- if not cp then
- cp = compile(p) / 0
- cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
- fmem[p] = cp
- end
- local i, e = cp:match(s, i or 1)
- if i then return i, e - 1
- else return i
- end
-end
-
-local function gsub (s, p, rep)
- local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
- gmem[p] = g
- local cp = g[rep]
- if not cp then
- cp = compile(p)
- cp = mm.Cs((cp / rep + 1)^0)
- g[rep] = cp
- end
- return cp:match(s)
-end
-
-
--- exported names
-local re = {
- compile = compile,
- match = match,
- find = find,
- gsub = gsub,
- updatelocale = updatelocale,
-}
-
-if version == "Lua 5.1" then _G.re = re end
-
-return re
diff --git a/src/ext/lpeg/test.lua b/src/ext/lpeg/test.lua
deleted file mode 100644
index 20ad07f..0000000
--- a/src/ext/lpeg/test.lua
+++ /dev/null
@@ -1,1503 +0,0 @@
-#!/usr/bin/env lua
-
--- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $
-
--- require"strict" -- just to be pedantic
-
-local m = require"lpeg"
-
-
--- for general use
-local a, b, c, d, e, f, g, p, t
-
-
--- compatibility with Lua 5.2
-local unpack = rawget(table, "unpack") or unpack
-local loadstring = rawget(_G, "loadstring") or load
-
-
-local any = m.P(1)
-local space = m.S" \t\n"^0
-
-local function checkeq (x, y, p)
-if p then print(x,y) end
- if type(x) ~= "table" then assert(x == y)
- else
- for k,v in pairs(x) do checkeq(v, y[k], p) end
- for k,v in pairs(y) do checkeq(v, x[k], p) end
- end
-end
-
-
-local mt = getmetatable(m.P(1))
-
-
-local allchar = {}
-for i=0,255 do allchar[i + 1] = i end
-allchar = string.char(unpack(allchar))
-assert(#allchar == 256)
-
-local function cs2str (c)
- return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
-end
-
-local function eqcharset (c1, c2)
- assert(cs2str(c1) == cs2str(c2))
-end
-
-
-print"General tests for LPeg library"
-
-assert(type(m.version()) == "string")
-print("version " .. m.version())
-assert(m.type("alo") ~= "pattern")
-assert(m.type(io.input) ~= "pattern")
-assert(m.type(m.P"alo") == "pattern")
-
--- tests for some basic optimizations
-assert(m.match(m.P(false) + "a", "a") == 2)
-assert(m.match(m.P(true) + "a", "a") == 1)
-assert(m.match("a" + m.P(false), "b") == nil)
-assert(m.match("a" + m.P(true), "b") == 1)
-
-assert(m.match(m.P(false) * "a", "a") == nil)
-assert(m.match(m.P(true) * "a", "a") == 2)
-assert(m.match("a" * m.P(false), "a") == nil)
-assert(m.match("a" * m.P(true), "a") == 2)
-
-assert(m.match(#m.P(false) * "a", "a") == nil)
-assert(m.match(#m.P(true) * "a", "a") == 2)
-assert(m.match("a" * #m.P(false), "a") == nil)
-assert(m.match("a" * #m.P(true), "a") == 2)
-
-
--- tests for locale
-do
- assert(m.locale(m) == m)
- local t = {}
- assert(m.locale(t, m) == t)
- local x = m.locale()
- for n,v in pairs(x) do
- assert(type(n) == "string")
- eqcharset(v, m[n])
- end
-end
-
-
-assert(m.match(3, "aaaa"))
-assert(m.match(4, "aaaa"))
-assert(not m.match(5, "aaaa"))
-assert(m.match(-3, "aa"))
-assert(not m.match(-3, "aaa"))
-assert(not m.match(-3, "aaaa"))
-assert(not m.match(-4, "aaaa"))
-assert(m.P(-5):match"aaaa")
-
-assert(m.match("a", "alo") == 2)
-assert(m.match("al", "alo") == 3)
-assert(not m.match("alu", "alo"))
-assert(m.match(true, "") == 1)
-
-local digit = m.S"0123456789"
-local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-local lower = m.S"abcdefghijklmnopqrstuvwxyz"
-local letter = m.S"" + upper + lower
-local alpha = letter + digit + m.R()
-
-eqcharset(m.S"", m.P(false))
-eqcharset(upper, m.R("AZ"))
-eqcharset(lower, m.R("az"))
-eqcharset(upper + lower, m.R("AZ", "az"))
-eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
-eqcharset(digit, m.S"01234567" + "8" + "9")
-eqcharset(upper, letter - lower)
-eqcharset(m.S(""), m.R())
-assert(cs2str(m.S("")) == "")
-
-eqcharset(m.S"\0", "\0")
-eqcharset(m.S"\1\0\2", m.R"\0\2")
-eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
-eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
-
-local word = alpha^1 * (1 - alpha)^0
-
-assert((word^0 * -1):match"alo alo")
-assert(m.match(word^1 * -1, "alo alo"))
-assert(m.match(word^2 * -1, "alo alo"))
-assert(not m.match(word^3 * -1, "alo alo"))
-
-assert(not m.match(word^-1 * -1, "alo alo"))
-assert(m.match(word^-2 * -1, "alo alo"))
-assert(m.match(word^-3 * -1, "alo alo"))
-
-local eos = m.P(-1)
-
-assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
-assert(not m.match(digit^0 * letter * eos, "1257a1"))
-
-b = {
- [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
-}
-
-assert(m.match(b, "(al())()"))
-assert(not m.match(b * eos, "(al())()"))
-assert(m.match(b * eos, "((al())()(é))"))
-assert(not m.match(b, "(al()()"))
-
-assert(not m.match(letter^1 - "for", "foreach"))
-assert(m.match(letter^1 - ("for" * eos), "foreach"))
-assert(not m.match(letter^1 - ("for" * eos), "for"))
-
-function basiclookfor (p)
- return m.P {
- [1] = p + (1 * m.V(1))
- }
-end
-
-function caplookfor (p)
- return basiclookfor(p:C())
-end
-
-assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
-a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
-checkeq(a, {"two", "words", "one", "more"})
-
-assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
-
-a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
-checkeq(a, {"123", "d"})
-
--- bug in LPeg 0.12 (nil value does not create a 'ktable')
-assert(m.match(m.Cc(nil), "") == nil)
-
-a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
-checkeq(a, {"abcd", "l"})
-
-a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
-checkeq(a, {10,20,30,2})
-a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
-checkeq(a, {1,10,20,30,2})
-a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
-checkeq(a, {1,10,20,30,2})
-a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
-checkeq(a, {1,7,8,10,20,30,2})
-a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
-checkeq(a, {1,2,3,4})
-
-a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
-checkeq(a, {1, 5})
-
-
-t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
-checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
-
--- bug in 0.12 ('hascapture' did not check for captures inside a rule)
-do
- local pat = m.P{
- 'S';
- S1 = m.C('abc') + 3,
- S = #m.V('S1') -- rule has capture, but '#' must ignore it
- }
- assert(pat:match'abc' == 1)
-end
-
-
--- bug: loop in 'hascaptures'
-do
- local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'})
- assert(p:match("xxx") == "")
-end
-
-
-
--- test for small capture boundary
-for i = 250,260 do
- assert(#m.match(m.C(i), string.rep('a', i)) == i)
- assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
-end
-
--- tests for any*n and any*-n
-for n = 1, 550, 13 do
- local x_1 = string.rep('x', n - 1)
- local x = x_1 .. 'a'
- assert(not m.P(n):match(x_1))
- assert(m.P(n):match(x) == n + 1)
- assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
- assert(m.C(n):match(x) == x)
- assert(m.C(m.C(n)):match(x) == x)
- assert(m.P(-n):match(x_1) == 1)
- assert(not m.P(-n):match(x))
- assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
- local n3 = math.floor(n/3)
- assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
-end
-
--- true values
-assert(m.P(0):match("x") == 1)
-assert(m.P(0):match("") == 1)
-assert(m.C(0):match("x") == "")
-
-assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
-assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
-assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
-p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
-
-
--- test for alternation optimization
-assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
-assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
-assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
-assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
-assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
-assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
-assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
-assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
-assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
-assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
-assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
-assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
-assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
-assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
-assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
-assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
-assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
-assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
-assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
-assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
-assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
-assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
-assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
-
-assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
-
-
--- bug in 0.12 (rc1)
-assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
-
-assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
- 4*10 + 1)
-
--- optimizations with optional parts
-assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
-assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
-assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
-assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
-
-assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
-
-p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
-assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
-
-
--- bug in 0.12.2
--- p = { ('ab' ('c' 'ef'?)*)? }
-p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1)
-s = "abcefccefc"
-assert(s == p:match(s))
-
-
-pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
-assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
- m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
-print"+"
-
-
--- tests for capture optimizations
-assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
-t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
-checkeq(t, {3, 6})
-
-
--- tests for numbered captures
-p = m.C(1)
-assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
-assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
-assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
-assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
-
-a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
-assert(a == "a" and b == "efg" and c == "h")
-
--- test for table captures
-t = m.match(m.Ct(letter^1), "alo")
-checkeq(t, {})
-
-t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
-assert(n == "t" and table.concat(t) == "alo")
-
-t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
-assert(table.concat(t, ";") == "alo;a;l;o")
-
-t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
-assert(table.concat(t, ";") == "alo;a;l;o")
-
-t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
-assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
-
-t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
-checkeq(t, {"alo", "a", "o"})
-
-
--- tests for groups
-p = m.Cg(1) -- no capture
-assert(p:match('x') == 'x')
-p = m.Cg(m.P(true)/function () end * 1) -- no value
-assert(p:match('x') == 'x')
-p = m.Cg(m.Cg(m.Cg(m.C(1))))
-assert(p:match('x') == 'x')
-p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
-t = {p:match'abc'}
-checkeq(t, {'a', 'b', 'c', 1, 2})
-
-p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
-t = p:match''
-checkeq(t, {hi = 10, ho = 20})
-t = p:match'abc'
-checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
-
--- non-string group names
-p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io))
-t = p:match('abcdefghij')
-assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c')
-
-
--- test for error messages
-local function checkerr (msg, f, ...)
- local st, err = pcall(f, ...)
- assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
-end
-
-checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
-checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
-checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
-checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
-checkerr("undefined in given grammar", m.match, { m.V{} }, "")
-
-checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
-checkerr("grammar has no initial rule", m.P, { [print] = {} })
-
--- grammar with a long call chain before left recursion
-p = {'a',
- a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
- b = m.V'c',
- c = m.V'd',
- d = m.V'e',
- e = m.V'f',
- f = m.V'g',
- g = m.P''
-}
-checkerr("rule 'a' may be left recursive", m.match, p, "a")
-
--- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
--- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
--- that is optimized to ICommit L1
-
-p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
-assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
-
-
-do
- -- large dynamic Cc
- local lim = 2^16 - 1
- local c = 0
- local function seq (n)
- if n == 1 then c = c + 1; return m.Cc(c)
- else
- local m = math.floor(n / 2)
- return seq(m) * seq(n - m)
- end
- end
- p = m.Ct(seq(lim))
- t = p:match('')
- assert(t[lim] == lim)
- checkerr("too many", function () p = p / print end)
- checkerr("too many", seq, lim + 1)
-end
-
-
--- tests for non-pattern as arguments to pattern functions
-
-p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
-assert(m.match(p, "aaabaac") == 7)
-
-p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
-
-assert(p:match("abc01de") == 8)
-assert(p:match("abc01de3456") == nil)
-
-p = 'abc' * (2 * (-5 * (true * m.P'de')))
-
-assert(p:match("abc01de") == 8)
-assert(p:match("abc01de3456") == nil)
-
-p = { m.V(2), m.P"abc" } *
- (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
-assert(p:match("abcaaaxx") == 7)
-assert(p:match("abcxx") == 6)
-
-
--- a large table capture
-t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
-assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
-
-print('+')
-
-
--- bug in 0.10 (rechecking a grammar, after tail-call optimization)
-m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
-
-local V = m.V
-
-local Space = m.S(" \n\t")^0
-local Number = m.C(m.R("09")^1) * Space
-local FactorOp = m.C(m.S("+-")) * Space
-local TermOp = m.C(m.S("*/")) * Space
-local Open = "(" * Space
-local Close = ")" * Space
-
-
-local function f_factor (v1, op, v2, d)
- assert(d == nil)
- if op == "+" then return v1 + v2
- else return v1 - v2
- end
-end
-
-
-local function f_term (v1, op, v2, d)
- assert(d == nil)
- if op == "*" then return v1 * v2
- else return v1 / v2
- end
-end
-
-G = m.P{ "Exp",
- Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
- Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
- Term = Number / tonumber + Open * V"Exp" * Close;
-}
-
-G = Space * G * -1
-
-for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
- assert(m.match(G, s) == loadstring("return "..s)())
-end
-
-
--- test for grammars (errors deep in calling non-terminals)
-g = m.P{
- [1] = m.V(2) + "a",
- [2] = "a" * m.V(3) * "x",
- [3] = "b" * m.V(3) + "c"
-}
-
-assert(m.match(g, "abbbcx") == 7)
-assert(m.match(g, "abbbbx") == 2)
-
-
--- tests for \0
-assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
-assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
-assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
-assert(not m.match(-4, "\0\1\0a"))
-assert(m.match("\0\1\0a", "\0\1\0a") == 5)
-assert(m.match("\0\0\0", "\0\0\0") == 4)
-assert(not m.match("\0\0\0", "\0\0"))
-
-
--- tests for predicates
-assert(not m.match(-m.P("a") * 2, "alo"))
-assert(m.match(- -m.P("a") * 2, "alo") == 3)
-assert(m.match(#m.P("a") * 2, "alo") == 3)
-assert(m.match(##m.P("a") * 2, "alo") == 3)
-assert(not m.match(##m.P("c") * 2, "alo"))
-assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
-assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
-assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
-assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
-
-
--- fixed length
-do
- -- 'and' predicate using fixed length
- local p = m.C(#("a" * (m.P("bd") + "cd")) * 2)
- assert(p:match("acd") == "ac")
-
- p = #m.P{ "a" * m.V(2), m.P"b" } * 2
- assert(p:match("abc") == 3)
-
- p = #(m.P"abc" * m.B"c")
- assert(p:match("abc") == 1 and not p:match("ab"))
-
- p = m.P{ "a" * m.V(2), m.P"b"^1 }
- checkerr("pattern may not have fixed length", m.B, p)
-
- p = "abc" * (m.P"b"^1 + m.P"a"^0)
- checkerr("pattern may not have fixed length", m.B, p)
-end
-
-
-p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
-assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
-
-p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
-assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
-
-
-
--- look-behind predicate
-assert(not m.match(m.B'a', 'a'))
-assert(m.match(1 * m.B'a', 'a') == 2)
-assert(not m.match(m.B(1), 'a'))
-assert(m.match(1 * m.B(1), 'a') == 2)
-assert(m.match(-m.B(1), 'a') == 1)
-assert(m.match(m.B(250), string.rep('a', 250)) == nil)
-assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
-
--- look-behind with an open call
-checkerr("pattern may not have fixed length", m.B, m.V'S1')
-checkerr("too long to look behind", m.B, 260)
-
-B = #letter * -m.B(letter) + -letter * m.B(letter)
-x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
-checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
-checkeq(m.match(x, ' ar cal '), {2,4,5,8})
-checkeq(m.match(x, ' '), {})
-checkeq(m.match(x, 'aloalo'), {1,7})
-
-assert(m.match(B, "a") == 1)
-assert(m.match(1 * B, "a") == 2)
-assert(not m.B(1 - letter):match(""))
-assert((-m.B(letter)):match("") == 1)
-
-assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
-assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
-assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
-
--- look-behind with grammars
-assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
-assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
-assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
-
-
-
--- bug in 0.9
-assert(m.match(('a' * #m.P'b'), "ab") == 2)
-assert(not m.match(('a' * #m.P'b'), "a"))
-
-assert(not m.match(#m.S'567', ""))
-assert(m.match(#m.S'567' * 1, "6") == 2)
-
-
--- tests for Tail Calls
-
-p = m.P{ 'a' * m.V(1) + '' }
-assert(p:match(string.rep('a', 1000)) == 1001)
-
--- create a grammar for a simple DFA for even number of 0s and 1s
---
--- ->1 <---0---> 2
--- ^ ^
--- | |
--- 1 1
--- | |
--- V V
--- 3 <---0---> 4
---
--- this grammar should keep no backtracking information
-
-p = m.P{
- [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
- [2] = '0' * m.V(1) + '1' * m.V(4),
- [3] = '0' * m.V(4) + '1' * m.V(1),
- [4] = '0' * m.V(3) + '1' * m.V(2),
-}
-
-assert(p:match(string.rep("00", 10000)))
-assert(p:match(string.rep("01", 10000)))
-assert(p:match(string.rep("011", 10000)))
-assert(not p:match(string.rep("011", 10000) .. "1"))
-assert(not p:match(string.rep("011", 10001)))
-
-
--- this grammar does need backtracking info.
-local lim = 10000
-p = m.P{ '0' * m.V(1) + '0' }
-checkerr("stack overflow", m.match, p, string.rep("0", lim))
-m.setmaxstack(2*lim)
-checkerr("stack overflow", m.match, p, string.rep("0", lim))
-m.setmaxstack(2*lim + 4)
-assert(m.match(p, string.rep("0", lim)) == lim + 1)
-
--- this repetition should not need stack space (only the call does)
-p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
-m.setmaxstack(200)
-assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
-
-m.setmaxstack(100) -- restore low limit
-
--- tests for optional start position
-assert(m.match("a", "abc", 1))
-assert(m.match("b", "abc", 2))
-assert(m.match("c", "abc", 3))
-assert(not m.match(1, "abc", 4))
-assert(m.match("a", "abc", -3))
-assert(m.match("b", "abc", -2))
-assert(m.match("c", "abc", -1))
-assert(m.match("abc", "abc", -4)) -- truncate to position 1
-
-assert(m.match("", "abc", 10)) -- empty string is everywhere!
-assert(m.match("", "", 10))
-assert(not m.match(1, "", 1))
-assert(not m.match(1, "", -1))
-assert(not m.match(1, "", 0))
-
-print("+")
-
-
--- tests for argument captures
-checkerr("invalid argument", m.Carg, 0)
-checkerr("invalid argument", m.Carg, -1)
-checkerr("invalid argument", m.Carg, 2^18)
-checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
-assert(m.match(m.Carg(1), 'a', 1, print) == print)
-x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
-checkeq(x, {10, 20})
-
-assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
- m.Cmt(m.Cb("a"), function (s,i,x)
- assert(s == "a" and i == 1);
- return i, x+1
- end) *
- m.Carg(2), function (s,i,a,b,c)
- assert(s == "a" and i == 1 and c == nil);
- return i, 2*a + 3*b
- end) * "a",
- "a", 1, false, 100, 1000) == 2*1001 + 3*100)
-
-
--- tests for Lua functions
-
-t = {}
-s = ""
-p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
-s = "hi, this is a test"
-assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
-assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
-
-assert(not m.match(p, s))
-
-p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
-assert(m.match(p, "alo"))
-
-p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
-assert(not m.match(p, "alo"))
-
-
-t = {}
-p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
-s = "hi, this is a test"
-assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
-assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
-
-t = {}
-p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
- return i <= s1:len() and i end) * 1
-s = "hi, this is a test"
-assert(m.match(p^0, s) == string.len(s) + 1)
-assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
-
-p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
-assert(m.match(p, "aaaa") == 5)
-assert(m.match(p, "abaa") == 2)
-assert(not m.match(p, "baaa"))
-
-checkerr("invalid position", m.match, function () return 2^20 end, s)
-checkerr("invalid position", m.match, function () return 0 end, s)
-checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
-checkerr("invalid position", m.match,
- m.P(1)^0 * function (_, i) return i - 1 end, s)
-assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
-checkerr("invalid position", m.match,
- m.P(1)^0 * function (_, i) return i + 1 end, s)
-assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
-checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
-assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
-assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
- string.len(s) + 1)
-for i = 1, string.len(s) + 1 do
- assert(m.match(function (_, _) return i end, s) == i)
-end
-
-p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
- + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
- * -1
-assert(p:match(string.rep('a', 14000)))
-
--- tests for Function Replacements
-f = function (a, ...) if a ~= "x" then return {a, ...} end end
-
-t = m.match(m.C(1)^0/f, "abc")
-checkeq(t, {"a", "b", "c"})
-
-t = m.match(m.C(1)^0/f/f, "abc")
-checkeq(t, {{"a", "b", "c"}})
-
-t = m.match(m.P(1)^0/f/f, "abc") -- no capture
-checkeq(t, {{"abc"}})
-
-t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
-checkeq(t, {{"abc"}, 4})
-
-t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
-checkeq(t, {{"a", "b", "c"}, 4})
-
-t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
-checkeq(t, {4})
-
-t = m.match(m.C(m.C(1)^0)/f, "abc")
-checkeq(t, {"abc", "a", "b", "c"})
-
-g = function (...) return 1, ... end
-t = {m.match(m.C(1)^0/g/g, "abc")}
-checkeq(t, {1, 1, "a", "b", "c"})
-
-t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
-t1 = {1,1,nil,nil,4,nil,3,nil,nil}
-for i=1,10 do assert(t[i] == t1[i]) end
-
--- bug in 0.12.2: ktable with only nil could be eliminated when joining
--- with a pattern without ktable
-assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil)
-
-t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
-checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
-
-t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
-checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
-
--- tests for Query Replacements
-
-assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
-assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
-assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
-t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
-checkeq(t, {40})
-
-assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
-assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
-assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
-assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
-assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
-assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
-assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
-assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
-assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
-assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
-assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
-assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
-assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
-
-assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
-
-assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
-assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
-assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
-assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
-assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
-assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
- "411 - abc ")
-
-assert(m.match(m.P(1)/"%0", "abc") == "a")
-checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
-checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
-
-p = m.C(1)
-p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
-assert(p:match("1234567890") == "9 - 1")
-
-assert(m.match(m.Cc(print), "") == print)
-
--- too many captures (just ignore extra ones)
-p = m.C(1)^0 / "%2-%9-%0-%9"
-assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
-s = string.rep("12345678901234567890", 20)
-assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
-
--- string captures with non-string subcaptures
-p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
-assert(p:match'x' == 'alo - x - alo')
-
-checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
-
--- long strings for string capture
-l = 10000
-s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
-
-p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
-
-assert(p:match(s) == string.rep('c', l) ..
- string.rep('b', l) ..
- string.rep('a', l))
-
-print"+"
-
--- accumulator capture
-function f (x) return x + 1 end
-assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
-
-t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
-checkeq(t, {1})
-p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
- rawset)
-t = p:match("a=b;c=du;xux=yuy;")
-checkeq(t, {a="b", c="du", xux="yuy"})
-
-
--- errors in accumulator capture
-
--- no initial capture
-checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
--- no initial capture (very long match forces fold to be a pair open-close)
-checkerr("no initial value", m.match, m.Cf(m.P(500), print),
- string.rep('a', 600))
-
--- nested capture produces no initial value
-checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
-
-
--- tests for loop checker
-
-local function isnullable (p)
- checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
-end
-
-isnullable(m.P("x")^-4)
-assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
-assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
-isnullable("")
-isnullable(m.P("x")^0)
-isnullable(m.P("x")^-1)
-isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
-isnullable(-m.P("ab"))
-isnullable(- -m.P("ab"))
-isnullable(# #(m.P("ab") + "xy"))
-isnullable(- #m.P("ab")^0)
-isnullable(# -m.P("ab")^1)
-isnullable(#m.V(3))
-isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
-isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
-assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
- == 3)
-assert(m.match(m.P""^-3, "a") == 1)
-
-local function find (p, s)
- return m.match(basiclookfor(p), s)
-end
-
-
-local function badgrammar (g, expected)
- local stat, msg = pcall(m.P, g)
- assert(not stat)
- if expected then assert(find(expected, msg)) end
-end
-
-badgrammar({[1] = m.V(1)}, "rule '1'")
-badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
-badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
-badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
-badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
-badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
-badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
-badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
-badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
-badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
-badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
-badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
-badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
-badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
-badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
-badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
-
-assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
-assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
-
-
--- good x bad grammars
-m.P{ ('a' * m.V(1))^-1 }
-m.P{ -('a' * m.V(1)) }
-m.P{ ('abc' * m.V(1))^-1 }
-m.P{ -('abc' * m.V(1)) }
-badgrammar{ #m.P('abc') * m.V(1) }
-badgrammar{ -('a' + m.V(1)) }
-m.P{ #('a' * m.V(1)) }
-badgrammar{ #('a' + m.V(1)) }
-m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
-badgrammar{ m.B{ m.P'abc' } * m.V(1) }
-badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
-
-
--- simple tests for maximum sizes:
-local p = m.P"a"
-for i=1,14 do p = p * p end
-
-p = {}
-for i=1,100 do p[i] = m.P"a" end
-p = m.P(p)
-
-
--- strange values for rule labels
-
-p = m.P{ "print",
- print = m.V(print),
- [print] = m.V(_G),
- [_G] = m.P"a",
- }
-
-assert(p:match("a"))
-
--- initial rule
-g = {}
-for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
-g.i11 = m.P""
-for i = 1, 10 do
- g[1] = "i"..i
- local p = m.P(g)
- assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
-end
-
-print"+"
-
-
--- tests for back references
-checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
-checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
-
-p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
-t = p:match("ab")
-checkeq(t, {"a", "b"})
-
-p = m.P(true)
-for i = 1, 10 do p = p * m.Cg(1, i) end
-for i = 1, 10 do
- local p = p * m.Cb(i)
- assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i))
-end
-
-
-t = {}
-function foo (p) t[#t + 1] = p; return p .. "x" end
-
-p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
- m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
-x = {p:match'ab'}
-checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
-checkeq(t, {'ab',
- 'ab', 'abx',
- 'ab', 'abx', 'abxx',
- 'ab', 'abx', 'abxx', 'abxxx'})
-
-
-
--- tests for match-time captures
-
-p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
- + 'acd'
-
-assert(p:match('abc') == 3)
-assert(p:match('acd') == 4)
-
-local function id (s, i, ...)
- return true, ...
-end
-
-assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
- m.R'09'^1 / string.char +
- m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
-
-p = m.P{'S',
- S = m.V'atom' * space
- + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
- atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
-}
-x = p:match"(a g () ((b) c) (d (e)))"
-checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
-
-x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
-assert(#x == 500)
-
-local function id(s, i, x)
- if x == 'a' then return i, 1, 3, 7
- else return nil, 2, 4, 6, 8
- end
-end
-
-p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
-assert(table.concat{p:match('abababab')} == string.rep('137', 4))
-
-local function ref (s, i, x)
- return m.match(x, s, i - x:len())
-end
-
-assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
-assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
-assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
-
-ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
-
-assert(m.Cmt(1, ref):match'2')
-assert(not m.Cmt(1, ref):match'1')
-assert(m.Cmt(m.P(1)^0, ref):match'03')
-
-function ref (s, i, a, b)
- if a == b then return i, a:upper() end
-end
-
-p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
-p = (any - p)^0 * p * any^0 * -1
-
-assert(p:match'abbbc-bc ddaa' == 'BC')
-
-do -- match-time captures cannot be optimized away
- local touch = 0
- f = m.P(function () touch = touch + 1; return true end)
-
- local function check(n) n = n or 1; assert(touch == n); touch = 0 end
-
- assert(m.match(f * false + 'b', 'a') == nil); check()
- assert(m.match(f * false + 'b', '') == nil); check()
- assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
- assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
- assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
- assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
- assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
- assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
- assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
- check()
- assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
- assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
- assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
- assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
- assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
-end
-
-c = '[' * m.Cg(m.P'='^0, "init") * '[' *
- { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
- return s1 == s2 end)
- + 1 * m.V(1) } / 0
-
-assert(c:match'[==[]]====]]]]==]===[]' == 18)
-assert(c:match'[[]=]====]=]]]==]===[]' == 14)
-assert(not c:match'[[]=]====]=]=]==]===[]')
-
-
--- old bug: optimization of concat with fail removed match-time capture
-p = m.Cmt(0, function (s) p = s end) * m.P(false)
-assert(not p:match('alo'))
-assert(p == 'alo')
-
-
--- ensure that failed match-time captures are not kept on Lua stack
-do
- local t = {__mode = "kv"}; setmetatable(t,t)
- local c = 0
-
- local function foo (s,i)
- collectgarbage();
- assert(next(t) == "__mode" and next(t, "__mode") == nil)
- local x = {}
- t[x] = true
- c = c + 1
- return i, x
- end
-
- local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
- p:match(string.rep('1', 10))
- assert(c == 11)
-end
-
-
--- Return a match-time capture that returns 'n' captures
-local function manyCmt (n)
- return m.Cmt("a", function ()
- local a = {}; for i = 1, n do a[i] = n - i end
- return true, unpack(a)
- end)
-end
-
--- bug in 1.0: failed match-time that used previous match-time results
-do
- local x
- local function aux (...) x = #{...}; return false end
- local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")}
- assert(#res == 10 and res[1] == 9 and res[10] == 0)
-end
-
-
--- bug in 1.0: problems with math-times returning too many captures
-do
- local lim = 2^11 - 10
- local res = {m.match(manyCmt(lim), "a")}
- assert(#res == lim and res[1] == lim - 1 and res[lim] == 0)
- checkerr("too many", m.match, manyCmt(2^15), "a")
-end
-
-p = (m.P(function () return true, "a" end) * 'a'
- + m.P(function (s, i) return i, "aa", 20 end) * 'b'
- + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
-
-t = {p:match('abacc')}
-checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
-
-
--------------------------------------------------------------------
--- Tests for 're' module
--------------------------------------------------------------------
-
-local re = require "re"
-
-local match, compile = re.match, re.compile
-
-
-
-assert(match("a", ".") == 2)
-assert(match("a", "''") == 1)
-assert(match("", " ! . ") == 1)
-assert(not match("a", " ! . "))
-assert(match("abcde", " ( . . ) * ") == 5)
-assert(match("abbcde", " [a-c] +") == 5)
-assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
-assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
-assert(match("abbc--", " [a-c] + +") == 5)
-assert(match("abbc--", " [ac-] +") == 2)
-assert(match("abbc--", " [-acb] + ") == 7)
-assert(not match("abbcde", " [b-z] + "))
-assert(match("abb\"de", '"abb"["]"de"') == 7)
-assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
-assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
-local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
-checkeq(t, {4, 5, 7})
-local t = {match("abceefe", "((&&'e' {})? .)*")}
-checkeq(t, {4, 5, 7})
-local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
-checkeq(t, {4, 5, 7})
-local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
-checkeq(t, {4, 5, 7})
-
-assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
-assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
-assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
-
-assert(match("abc", "a <- (. a)?") == 4)
-b = "balanced <- '(' ([^()] / balanced)* ')'"
-assert(match("(abc)", b))
-assert(match("(a(b)((c) (d)))", b))
-assert(not match("(a(b ((c) (d)))", b))
-
-b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
-assert(b == m.P(b))
-assert(b:match"((((a))(b)))")
-
-local g = [[
- S <- "0" B / "1" A / "" -- balanced strings
- A <- "0" S / "1" A A -- one more 0
- B <- "1" S / "0" B B -- one more 1
-]]
-assert(match("00011011", g) == 9)
-
-local g = [[
- S <- ("0" B / "1" A)*
- A <- "0" / "1" A A
- B <- "1" / "0" B B
-]]
-assert(match("00011011", g) == 9)
-assert(match("000110110", g) == 9)
-assert(match("011110110", g) == 3)
-assert(match("000110010", g) == 1)
-
-s = "aaaaaaaaaaaaaaaaaaaaaaaa"
-assert(match(s, "'a'^3") == 4)
-assert(match(s, "'a'^0") == 1)
-assert(match(s, "'a'^+3") == s:len() + 1)
-assert(not match(s, "'a'^+30"))
-assert(match(s, "'a'^-30") == s:len() + 1)
-assert(match(s, "'a'^-5") == 6)
-for i = 1, s:len() do
- assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
- assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
- assert(match(s, string.format("'a'^%d", i)) == i + 1)
-end
-assert(match("01234567890123456789", "[0-9]^3+") == 19)
-
-
-assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
-t = match("0123456789", "{| {.}* |}")
-checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
-assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
-
-assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
-assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
-assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
-
-assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
-assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
-
-eqcharset(compile"[]]", "]")
-eqcharset(compile"[][]", m.S"[]")
-eqcharset(compile"[]-]", m.S"-]")
-eqcharset(compile"[-]", m.S"-")
-eqcharset(compile"[az-]", m.S"a-z")
-eqcharset(compile"[-az]", m.S"a-z")
-eqcharset(compile"[a-z]", m.R"az")
-eqcharset(compile"[]['\"]", m.S[[]['"]])
-
-eqcharset(compile"[^]]", any - "]")
-eqcharset(compile"[^][]", any - m.S"[]")
-eqcharset(compile"[^]-]", any - m.S"-]")
-eqcharset(compile"[^]-]", any - m.S"-]")
-eqcharset(compile"[^-]", any - m.S"-")
-eqcharset(compile"[^az-]", any - m.S"a-z")
-eqcharset(compile"[^-az]", any - m.S"a-z")
-eqcharset(compile"[^a-z]", any - m.R"az")
-eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
-
--- tests for comments in 're'
-e = compile[[
-A <- _B -- \t \n %nl .<> <- -> --
-_B <- 'x' --]]
-assert(e:match'xy' == 2)
-
--- tests for 're' with pre-definitions
-defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
-e = compile("%letters (%letters / %digits)*", defs)
-assert(e:match"x123" == 5)
-e = compile("%_", defs)
-assert(e:match"__" == 3)
-
-e = compile([[
- S <- A+
- A <- %letters+ B
- B <- %digits+
-]], defs)
-
-e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
-assert(e:match("2.34") == math.sin(2.34))
-
-
-function eq (_, _, a, b) return a == b end
-
-c = re.compile([[
- longstring <- '[' {:init: '='* :} '[' close
- close <- ']' =init ']' / . close
-]])
-
-assert(c:match'[==[]]===]]]]==]===[]' == 17)
-assert(c:match'[[]=]====]=]]]==]===[]' == 14)
-assert(not c:match'[[]=]====]=]=]==]===[]')
-
-c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
-
-assert(c:match'[==[]]===]]]]==]')
-assert(c:match'[[]=]====]=][]==]===[]]')
-assert(not c:match'[[]=]====]=]=]==]===[]')
-
-assert(re.find("hi alalo", "{:x:..:} =x") == 4)
-assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
-assert(not re.find("hi alalo", "{:x:..:} =x", 5))
-assert(re.find("hi alalo", "{'al'}", 5) == 6)
-assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
-assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
-
--- re.find discards any captures
-local a,b,c = re.find("alo", "{.}{'o'}")
-assert(a == 2 and b == 3 and c == nil)
-
-local function match (s,p)
- local i,e = re.find(s,p)
- if i then return s:sub(i, e) end
-end
-assert(match("alo alo", '[a-z]+') == "alo")
-assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
-assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
-
-assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
-assert(re.gsub("alo alo", "%w+", ".") == ". .")
-assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
- "hI, hOw ArE yOU")
-
-s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
-c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
-assert(re.gsub(s, c, "%2") == 'hi and =]')
-assert(re.gsub(s, c, "%0") == s)
-assert(re.gsub('[=[hi]=]', c, "%2") == '=')
-
-assert(re.find("", "!.") == 1)
-assert(re.find("alo", "!.") == 4)
-
-function addtag (s, i, t, tag) t.tag = tag; return i, t end
-
-c = re.compile([[
- doc <- block !.
- block <- (start {| (block / { [^<]+ })* |} end?) => addtag
- start <- '<' {:tag: [a-z]+ :} '>'
- end <- '' { =tag } '>'
-]], {addtag = addtag})
-
-x = c:match[[
-hihello buttotheend ]]
-checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
- {'totheend'}})
-
-
--- tests for look-ahead captures
-x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
-checkeq(x, {"", "alo", ""})
-
-assert(re.match("aloalo",
- "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
- == "AallooAalloo")
-
--- bug in 0.9 (and older versions), due to captures in look-aheads
-x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
-assert(x:match"alo alo" == "+ +")
-
--- valid capture in look-ahead (used inside the look-ahead itself)
-x = re.compile[[
- S <- &({:two: .. :} . =two) {[a-z]+} / . S
-]]
-assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
-
-
-p = re.compile[[
- block <- {| {:ident:space*:} line
- ((=ident !space line) / &(=ident space) block)* |}
- line <- {[^%nl]*} %nl
- space <- '_' -- should be ' ', but '_' is simpler for editors
-]]
-
-t= p:match[[
-1
-__1.1
-__1.2
-____1.2.1
-____
-2
-__2.1
-]]
-checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
- "2", {"2.1", ident = "__"}, ident = ""})
-
-
--- nested grammars
-p = re.compile[[
- s <- a b !.
- b <- ( x <- ('b' x)? )
- a <- ( x <- 'a' x? )
-]]
-
-assert(p:match'aaabbb')
-assert(p:match'aaa')
-assert(not p:match'bbb')
-assert(not p:match'aaabbba')
-
--- testing groups
-t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
-checkeq(t, {"a", "bc", "b", "c", "c", ""})
-
-t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
-checkeq(t, {a="1", b="2", c="4"})
-t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
-checkeq(t, {a="1", b="2", c="4"})
-t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
-checkeq(t, {"1", b="2", "4", "5"})
-t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
-checkeq(t, {"1", "23", "4", "5"})
-t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
-checkeq(t, {"1", "23", "4", "5"})
-
-
--- testing pre-defined names
-assert(os.setlocale("C") == "C")
-
-function eqlpeggsub (p1, p2)
- local s1 = cs2str(re.compile(p1))
- local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
- -- if s1 ~= s2 then print(#s1,#s2) end
- assert(s1 == s2)
-end
-
-
-eqlpeggsub("%w", "%w")
-eqlpeggsub("%a", "%a")
-eqlpeggsub("%l", "%l")
-eqlpeggsub("%u", "%u")
-eqlpeggsub("%p", "%p")
-eqlpeggsub("%d", "%d")
-eqlpeggsub("%x", "%x")
-eqlpeggsub("%s", "%s")
-eqlpeggsub("%c", "%c")
-
-eqlpeggsub("%W", "%W")
-eqlpeggsub("%A", "%A")
-eqlpeggsub("%L", "%L")
-eqlpeggsub("%U", "%U")
-eqlpeggsub("%P", "%P")
-eqlpeggsub("%D", "%D")
-eqlpeggsub("%X", "%X")
-eqlpeggsub("%S", "%S")
-eqlpeggsub("%C", "%C")
-
-eqlpeggsub("[%w]", "%w")
-eqlpeggsub("[_%w]", "_%w")
-eqlpeggsub("[^%w]", "%W")
-eqlpeggsub("[%W%S]", "%W%S")
-
-re.updatelocale()
-
-
--- testing nested substitutions x string captures
-
-p = re.compile[[
- text <- {~ item* ~}
- item <- macro / [^()] / '(' item* ')'
- arg <- ' '* {~ (!',' item)* ~}
- args <- '(' arg (',' arg)* ')'
- macro <- ('apply' args) -> '%1(%2)'
- / ('add' args) -> '%1 + %2'
- / ('mul' args) -> '%1 * %2'
-]]
-
-assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
-
-rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
-
-assert(rev:match"0123456789" == "9876543210")
-
-
--- testing error messages in re
-
-local function errmsg (p, err)
- checkerr(err, re.compile, p)
-end
-
-errmsg('aaaa', "rule 'aaaa'")
-errmsg('a', 'outside')
-errmsg('b <- a', 'undefined')
-errmsg("x <- 'a' x <- 'b'", 'already defined')
-errmsg("'a' -", "near '-'")
-
-
-print"OK"
-
-