This commit is contained in:
parent
7d52c1da99
commit
7035731624
20
Makefile
20
Makefile
|
@ -26,12 +26,15 @@ MINGW_LINKER_FLAGS= \
|
|||
LINUX_INCLUDES= \
|
||||
`pkg-config --cflags gtk+-3.0`
|
||||
|
||||
LINUX_LIBS= \
|
||||
`pkg-config --libs gtk+-3.0`
|
||||
|
||||
LINUX64_LIBS= \
|
||||
`pkg-config --libs gtk+-3.0` \
|
||||
$(LINUX_LIBS) \
|
||||
-Llib/linux
|
||||
|
||||
LINUX32_LIBS= \
|
||||
`pkg-config --libs gtk+-3.0` \
|
||||
$(LINUX_LIBS) \
|
||||
-Llib/linux32
|
||||
|
||||
LINUX_ARM_LIBS= \
|
||||
|
@ -40,7 +43,7 @@ LINUX_ARM_LIBS= \
|
|||
LINUX_LINKER_FLAGS= \
|
||||
-D_GNU_SOURCE \
|
||||
-lSDL2 \
|
||||
-llua \
|
||||
-llua5.3 \
|
||||
-ldl \
|
||||
-lm \
|
||||
-lpthread \
|
||||
|
@ -114,6 +117,8 @@ SOURCES=\
|
|||
SOURCES_EXT= \
|
||||
src/html.c
|
||||
|
||||
LPEG_SRC= src/ext/lpeg/*.c
|
||||
|
||||
DEMO_ASSETS= \
|
||||
bin/assets/fire.tic.dat \
|
||||
bin/assets/p3d.tic.dat \
|
||||
|
@ -272,15 +277,18 @@ mingw: $(DEMO_ASSETS) $(TIC80_DLL) $(TIC_O) bin/html.o bin/res.o
|
|||
run: mingw
|
||||
$(MINGW_OUTPUT)
|
||||
|
||||
linux64:
|
||||
linux64-flto:
|
||||
$(CC) $(LINUX_INCLUDES) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) $(OPT) $(INCLUDES) $(LINUX64_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic
|
||||
|
||||
linux32:
|
||||
linux32-flto:
|
||||
$(CC) $(LINUX_INCLUDES) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) $(OPT) $(INCLUDES) $(LINUX32_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic
|
||||
|
||||
arm:
|
||||
arm-flto:
|
||||
$(CC) $(OPT_ARM) $(SOURCES) $(TIC80_SRC) $(OPT) $(INCLUDES) $(LINUX_ARM_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic
|
||||
|
||||
linux:
|
||||
$(CC) $(LINUX_INCLUDES) $(SOURCES) $(LPEG_SRC) $(SOURCES_EXT) $(TIC80_SRC) $(OPT) $(INCLUDES) $(LINUX_LIBS) $(LINUX_LINKER_FLAGS) -o bin/tic
|
||||
|
||||
macosx:
|
||||
$(CC) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) src/ext/file_dialog.m $(OPT) $(MACOSX_OPT) $(INCLUDES) $(MACOSX_LIBS) -o bin/tic
|
||||
|
||||
|
|
|
@ -31,10 +31,10 @@ made by [@matheuslessarodrigues](https://github.com/matheuslessarodrigues)
|
|||
## Linux
|
||||
run the following commands in the Terminal
|
||||
```
|
||||
sudo apt-get install git build-essential libgtk-3-dev
|
||||
sudo apt-get install git build-essential libgtk-3-dev libsdl2-dev lua5.3-dev libgif-dev lua-lpeg-dev zlib1g-dev
|
||||
git clone https://github.com/nesbox/TIC-80
|
||||
cd TIC-80
|
||||
make linux32 (or linux64/arm depending on your system)
|
||||
make linux
|
||||
```
|
||||
|
||||
## iOS / tvOS
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
HISTORY for LPeg 1.0
|
||||
|
||||
* Changes from version 0.12 to 1.0
|
||||
---------------------------------
|
||||
+ group "names" can be any Lua value
|
||||
+ some bugs fixed
|
||||
+ other small improvements
|
||||
|
||||
* Changes from version 0.11 to 0.12
|
||||
---------------------------------
|
||||
+ no "unsigned short" limit for pattern sizes
|
||||
+ mathtime captures considered nullable
|
||||
+ some bugs fixed
|
||||
|
||||
* Changes from version 0.10 to 0.11
|
||||
-------------------------------
|
||||
+ complete reimplementation of the code generator
|
||||
+ new syntax for table captures
|
||||
+ new functions in module 're'
|
||||
+ other small improvements
|
||||
|
||||
* Changes from version 0.9 to 0.10
|
||||
-------------------------------
|
||||
+ backtrack stack has configurable size
|
||||
+ better error messages
|
||||
+ Notation for non-terminals in 're' back to A instead o <A>
|
||||
+ experimental look-behind pattern
|
||||
+ support for external extensions
|
||||
+ works with Lua 5.2
|
||||
+ consumes less C stack
|
||||
|
||||
- "and" predicates do not keep captures
|
||||
|
||||
* Changes from version 0.8 to 0.9
|
||||
-------------------------------
|
||||
+ The accumulator capture was replaced by a fold capture;
|
||||
programs that used the old 'lpeg.Ca' will need small changes.
|
||||
+ Some support for character classes from old C locales.
|
||||
+ A new named-group capture.
|
||||
|
||||
* Changes from version 0.7 to 0.8
|
||||
-------------------------------
|
||||
+ New "match-time" capture.
|
||||
+ New "argument capture" that allows passing arguments into the pattern.
|
||||
+ Better documentation for 're'.
|
||||
+ Several small improvements for 're'.
|
||||
+ The 're' module has an incompatibility with previous versions:
|
||||
now, any use of a non-terminal must be enclosed in angle brackets
|
||||
(like <B>).
|
||||
|
||||
* Changes from version 0.6 to 0.7
|
||||
-------------------------------
|
||||
+ Several improvements in module 're':
|
||||
- better documentation;
|
||||
- support for most captures (all but accumulator);
|
||||
- limited repetitions p{n,m}.
|
||||
+ Small improvements in efficiency.
|
||||
+ Several small bugs corrected (special thanks to Hans Hagen
|
||||
and Taco Hoekwater).
|
||||
|
||||
* Changes from version 0.5 to 0.6
|
||||
-------------------------------
|
||||
+ Support for non-numeric indices in grammars.
|
||||
+ Some bug fixes (thanks to the luatex team).
|
||||
+ Some new optimizations; (thanks to Mike Pall).
|
||||
+ A new page layout (thanks to Andre Carregal).
|
||||
+ Minimal documentation for module 're'.
|
||||
|
||||
* Changes from version 0.4 to 0.5
|
||||
-------------------------------
|
||||
+ Several optimizations.
|
||||
+ lpeg.P now accepts booleans.
|
||||
+ Some new examples.
|
||||
+ A proper license.
|
||||
+ Several small improvements.
|
||||
|
||||
* Changes from version 0.3 to 0.4
|
||||
-------------------------------
|
||||
+ Static check for loops in repetitions and grammars.
|
||||
+ Removed label option in captures.
|
||||
+ The implementation of captures uses less memory.
|
||||
|
||||
* Changes from version 0.2 to 0.3
|
||||
-------------------------------
|
||||
+ User-defined patterns in Lua.
|
||||
+ Several new captures.
|
||||
|
||||
* Changes from version 0.1 to 0.2
|
||||
-------------------------------
|
||||
+ Several small corrections.
|
||||
+ Handles embedded zeros like any other character.
|
||||
+ Capture "name" can be any Lua value.
|
||||
+ Unlimited number of captures.
|
||||
+ Match gets an optional initial position.
|
||||
|
||||
(end of HISTORY)
|
|
@ -0,0 +1,537 @@
|
|||
/*
|
||||
** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
|
||||
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
||||
*/
|
||||
|
||||
#include "lua.h"
|
||||
#include "lauxlib.h"
|
||||
|
||||
#include "lpcap.h"
|
||||
#include "lptypes.h"
|
||||
|
||||
|
||||
#define captype(cap) ((cap)->kind)
|
||||
|
||||
#define isclosecap(cap) (captype(cap) == Cclose)
|
||||
|
||||
#define closeaddr(c) ((c)->s + (c)->siz - 1)
|
||||
|
||||
#define isfullcap(cap) ((cap)->siz != 0)
|
||||
|
||||
#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
|
||||
|
||||
#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** Put at the cache for Lua values the value indexed by 'v' in ktable
|
||||
** of the running pattern (if it is not there yet); returns its index.
|
||||
*/
|
||||
static int updatecache (CapState *cs, int v) {
|
||||
int idx = cs->ptop + 1; /* stack index of cache for Lua values */
|
||||
if (v != cs->valuecached) { /* not there? */
|
||||
getfromktable(cs, v); /* get value from 'ktable' */
|
||||
lua_replace(cs->L, idx); /* put it at reserved stack position */
|
||||
cs->valuecached = v; /* keep track of what is there */
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
static int pushcapture (CapState *cs);
|
||||
|
||||
|
||||
/*
|
||||
** Goes back in a list of captures looking for an open capture
|
||||
** corresponding to a close
|
||||
*/
|
||||
static Capture *findopen (Capture *cap) {
|
||||
int n = 0; /* number of closes waiting an open */
|
||||
for (;;) {
|
||||
cap--;
|
||||
if (isclosecap(cap)) n++; /* one more open to skip */
|
||||
else if (!isfullcap(cap))
|
||||
if (n-- == 0) return cap;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Go to the next capture
|
||||
*/
|
||||
static void nextcap (CapState *cs) {
|
||||
Capture *cap = cs->cap;
|
||||
if (!isfullcap(cap)) { /* not a single capture? */
|
||||
int n = 0; /* number of opens waiting a close */
|
||||
for (;;) { /* look for corresponding close */
|
||||
cap++;
|
||||
if (isclosecap(cap)) {
|
||||
if (n-- == 0) break;
|
||||
}
|
||||
else if (!isfullcap(cap)) n++;
|
||||
}
|
||||
}
|
||||
cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Push on the Lua stack all values generated by nested captures inside
|
||||
** the current capture. Returns number of values pushed. 'addextra'
|
||||
** makes it push the entire match after all captured values. The
|
||||
** entire match is pushed also if there are no other nested values,
|
||||
** so the function never returns zero.
|
||||
*/
|
||||
static int pushnestedvalues (CapState *cs, int addextra) {
|
||||
Capture *co = cs->cap;
|
||||
if (isfullcap(cs->cap++)) { /* no nested captures? */
|
||||
lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
|
||||
return 1; /* that is it */
|
||||
}
|
||||
else {
|
||||
int n = 0;
|
||||
while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
|
||||
n += pushcapture(cs);
|
||||
if (addextra || n == 0) { /* need extra? */
|
||||
lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
|
||||
n++;
|
||||
}
|
||||
cs->cap++; /* skip close entry */
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Push only the first value generated by nested captures
|
||||
*/
|
||||
static void pushonenestedvalue (CapState *cs) {
|
||||
int n = pushnestedvalues(cs, 0);
|
||||
if (n > 1)
|
||||
lua_pop(cs->L, n - 1); /* pop extra values */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Try to find a named group capture with the name given at the top of
|
||||
** the stack; goes backward from 'cap'.
|
||||
*/
|
||||
static Capture *findback (CapState *cs, Capture *cap) {
|
||||
lua_State *L = cs->L;
|
||||
while (cap-- > cs->ocap) { /* repeat until end of list */
|
||||
if (isclosecap(cap))
|
||||
cap = findopen(cap); /* skip nested captures */
|
||||
else if (!isfullcap(cap))
|
||||
continue; /* opening an enclosing capture: skip and get previous */
|
||||
if (captype(cap) == Cgroup) {
|
||||
getfromktable(cs, cap->idx); /* get group name */
|
||||
if (lp_equal(L, -2, -1)) { /* right group? */
|
||||
lua_pop(L, 2); /* remove reference name and group name */
|
||||
return cap;
|
||||
}
|
||||
else lua_pop(L, 1); /* remove group name */
|
||||
}
|
||||
}
|
||||
luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
|
||||
return NULL; /* to avoid warnings */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Back-reference capture. Return number of values pushed.
|
||||
*/
|
||||
static int backrefcap (CapState *cs) {
|
||||
int n;
|
||||
Capture *curr = cs->cap;
|
||||
pushluaval(cs); /* reference name */
|
||||
cs->cap = findback(cs, curr); /* find corresponding group */
|
||||
n = pushnestedvalues(cs, 0); /* push group's values */
|
||||
cs->cap = curr + 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Table capture: creates a new table and populates it with nested
|
||||
** captures.
|
||||
*/
|
||||
static int tablecap (CapState *cs) {
|
||||
lua_State *L = cs->L;
|
||||
int n = 0;
|
||||
lua_newtable(L);
|
||||
if (isfullcap(cs->cap++))
|
||||
return 1; /* table is empty */
|
||||
while (!isclosecap(cs->cap)) {
|
||||
if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
|
||||
pushluaval(cs); /* push group name */
|
||||
pushonenestedvalue(cs);
|
||||
lua_settable(L, -3);
|
||||
}
|
||||
else { /* not a named group */
|
||||
int i;
|
||||
int k = pushcapture(cs);
|
||||
for (i = k; i > 0; i--) /* store all values into table */
|
||||
lua_rawseti(L, -(i + 1), n + i);
|
||||
n += k;
|
||||
}
|
||||
}
|
||||
cs->cap++; /* skip close entry */
|
||||
return 1; /* number of values pushed (only the table) */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Table-query capture
|
||||
*/
|
||||
static int querycap (CapState *cs) {
|
||||
int idx = cs->cap->idx;
|
||||
pushonenestedvalue(cs); /* get nested capture */
|
||||
lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
|
||||
if (!lua_isnil(cs->L, -1))
|
||||
return 1;
|
||||
else { /* no value */
|
||||
lua_pop(cs->L, 1); /* remove nil */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Fold capture
|
||||
*/
|
||||
static int foldcap (CapState *cs) {
|
||||
int n;
|
||||
lua_State *L = cs->L;
|
||||
int idx = cs->cap->idx;
|
||||
if (isfullcap(cs->cap++) || /* no nested captures? */
|
||||
isclosecap(cs->cap) || /* no nested captures (large subject)? */
|
||||
(n = pushcapture(cs)) == 0) /* nested captures with no values? */
|
||||
return luaL_error(L, "no initial value for fold capture");
|
||||
if (n > 1)
|
||||
lua_pop(L, n - 1); /* leave only one result for accumulator */
|
||||
while (!isclosecap(cs->cap)) {
|
||||
lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
|
||||
lua_insert(L, -2); /* put it before accumulator */
|
||||
n = pushcapture(cs); /* get next capture's values */
|
||||
lua_call(L, n + 1, 1); /* call folding function */
|
||||
}
|
||||
cs->cap++; /* skip close entry */
|
||||
return 1; /* only accumulator left on the stack */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Function capture
|
||||
*/
|
||||
static int functioncap (CapState *cs) {
|
||||
int n;
|
||||
int top = lua_gettop(cs->L);
|
||||
pushluaval(cs); /* push function */
|
||||
n = pushnestedvalues(cs, 0); /* push nested captures */
|
||||
lua_call(cs->L, n, LUA_MULTRET); /* call function */
|
||||
return lua_gettop(cs->L) - top; /* return function's results */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Select capture
|
||||
*/
|
||||
static int numcap (CapState *cs) {
|
||||
int idx = cs->cap->idx; /* value to select */
|
||||
if (idx == 0) { /* no values? */
|
||||
nextcap(cs); /* skip entire capture */
|
||||
return 0; /* no value produced */
|
||||
}
|
||||
else {
|
||||
int n = pushnestedvalues(cs, 0);
|
||||
if (n < idx) /* invalid index? */
|
||||
return luaL_error(cs->L, "no capture '%d'", idx);
|
||||
else {
|
||||
lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
|
||||
lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
|
||||
lua_pop(cs->L, n - 1); /* remove other captures */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Return the stack index of the first runtime capture in the given
|
||||
** list of captures (or zero if no runtime captures)
|
||||
*/
|
||||
int finddyncap (Capture *cap, Capture *last) {
|
||||
for (; cap < last; cap++) {
|
||||
if (cap->kind == Cruntime)
|
||||
return cap->idx; /* stack position of first capture */
|
||||
}
|
||||
return 0; /* no dynamic captures in this segment */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Calls a runtime capture. Returns number of captures removed by
|
||||
** the call, including the initial Cgroup. (Captures to be added are
|
||||
** on the Lua stack.)
|
||||
*/
|
||||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
|
||||
int n, id;
|
||||
lua_State *L = cs->L;
|
||||
int otop = lua_gettop(L);
|
||||
Capture *open = findopen(close);
|
||||
assert(captype(open) == Cgroup);
|
||||
id = finddyncap(open, close); /* get first dynamic capture argument */
|
||||
close->kind = Cclose; /* closes the group */
|
||||
close->s = s;
|
||||
cs->cap = open; cs->valuecached = 0; /* prepare capture state */
|
||||
luaL_checkstack(L, 4, "too many runtime captures");
|
||||
pushluaval(cs); /* push function to be called */
|
||||
lua_pushvalue(L, SUBJIDX); /* push original subject */
|
||||
lua_pushinteger(L, s - cs->s + 1); /* push current position */
|
||||
n = pushnestedvalues(cs, 0); /* push nested captures */
|
||||
lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
|
||||
if (id > 0) { /* are there old dynamic captures to be removed? */
|
||||
int i;
|
||||
for (i = id; i <= otop; i++)
|
||||
lua_remove(L, id); /* remove old dynamic captures */
|
||||
*rem = otop - id + 1; /* total number of dynamic captures removed */
|
||||
}
|
||||
else
|
||||
*rem = 0; /* no dynamic captures removed */
|
||||
return close - open; /* number of captures of all kinds removed */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Auxiliary structure for substitution and string captures: keep
|
||||
** information about nested captures for future use, avoiding to push
|
||||
** string results into Lua
|
||||
*/
|
||||
typedef struct StrAux {
|
||||
int isstring; /* whether capture is a string */
|
||||
union {
|
||||
Capture *cp; /* if not a string, respective capture */
|
||||
struct { /* if it is a string... */
|
||||
const char *s; /* ... starts here */
|
||||
const char *e; /* ... ends here */
|
||||
} s;
|
||||
} u;
|
||||
} StrAux;
|
||||
|
||||
#define MAXSTRCAPS 10
|
||||
|
||||
/*
|
||||
** Collect values from current capture into array 'cps'. Current
|
||||
** capture must be Cstring (first call) or Csimple (recursive calls).
|
||||
** (In first call, fills %0 with whole match for Cstring.)
|
||||
** Returns number of elements in the array that were filled.
|
||||
*/
|
||||
static int getstrcaps (CapState *cs, StrAux *cps, int n) {
|
||||
int k = n++;
|
||||
cps[k].isstring = 1; /* get string value */
|
||||
cps[k].u.s.s = cs->cap->s; /* starts here */
|
||||
if (!isfullcap(cs->cap++)) { /* nested captures? */
|
||||
while (!isclosecap(cs->cap)) { /* traverse them */
|
||||
if (n >= MAXSTRCAPS) /* too many captures? */
|
||||
nextcap(cs); /* skip extra captures (will not need them) */
|
||||
else if (captype(cs->cap) == Csimple) /* string? */
|
||||
n = getstrcaps(cs, cps, n); /* put info. into array */
|
||||
else {
|
||||
cps[n].isstring = 0; /* not a string */
|
||||
cps[n].u.cp = cs->cap; /* keep original capture */
|
||||
nextcap(cs);
|
||||
n++;
|
||||
}
|
||||
}
|
||||
cs->cap++; /* skip close */
|
||||
}
|
||||
cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** add next capture value (which should be a string) to buffer 'b'
|
||||
*/
|
||||
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
|
||||
|
||||
|
||||
/*
|
||||
** String capture: add result to buffer 'b' (instead of pushing
|
||||
** it into the stack)
|
||||
*/
|
||||
static void stringcap (luaL_Buffer *b, CapState *cs) {
|
||||
StrAux cps[MAXSTRCAPS];
|
||||
int n;
|
||||
size_t len, i;
|
||||
const char *fmt; /* format string */
|
||||
fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
|
||||
n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
|
||||
for (i = 0; i < len; i++) { /* traverse them */
|
||||
if (fmt[i] != '%') /* not an escape? */
|
||||
luaL_addchar(b, fmt[i]); /* add it to buffer */
|
||||
else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
|
||||
luaL_addchar(b, fmt[i]); /* add to buffer */
|
||||
else {
|
||||
int l = fmt[i] - '0'; /* capture index */
|
||||
if (l > n)
|
||||
luaL_error(cs->L, "invalid capture index (%d)", l);
|
||||
else if (cps[l].isstring)
|
||||
luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
|
||||
else {
|
||||
Capture *curr = cs->cap;
|
||||
cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
|
||||
if (!addonestring(b, cs, "capture"))
|
||||
luaL_error(cs->L, "no values in capture index %d", l);
|
||||
cs->cap = curr; /* continue from where it stopped */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Substitution capture: add result to buffer 'b'
|
||||
*/
|
||||
static void substcap (luaL_Buffer *b, CapState *cs) {
|
||||
const char *curr = cs->cap->s;
|
||||
if (isfullcap(cs->cap)) /* no nested captures? */
|
||||
luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
|
||||
else {
|
||||
cs->cap++; /* skip open entry */
|
||||
while (!isclosecap(cs->cap)) { /* traverse nested captures */
|
||||
const char *next = cs->cap->s;
|
||||
luaL_addlstring(b, curr, next - curr); /* add text up to capture */
|
||||
if (addonestring(b, cs, "replacement"))
|
||||
curr = closeaddr(cs->cap - 1); /* continue after match */
|
||||
else /* no capture value */
|
||||
curr = next; /* keep original text in final result */
|
||||
}
|
||||
luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
|
||||
}
|
||||
cs->cap++; /* go to next capture */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Evaluates a capture and adds its first value to buffer 'b'; returns
|
||||
** whether there was a value
|
||||
*/
|
||||
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
|
||||
switch (captype(cs->cap)) {
|
||||
case Cstring:
|
||||
stringcap(b, cs); /* add capture directly to buffer */
|
||||
return 1;
|
||||
case Csubst:
|
||||
substcap(b, cs); /* add capture directly to buffer */
|
||||
return 1;
|
||||
default: {
|
||||
lua_State *L = cs->L;
|
||||
int n = pushcapture(cs);
|
||||
if (n > 0) {
|
||||
if (n > 1) lua_pop(L, n - 1); /* only one result */
|
||||
if (!lua_isstring(L, -1))
|
||||
luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
|
||||
luaL_addvalue(b);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Push all values of the current capture into the stack; returns
|
||||
** number of values pushed
|
||||
*/
|
||||
static int pushcapture (CapState *cs) {
|
||||
lua_State *L = cs->L;
|
||||
luaL_checkstack(L, 4, "too many captures");
|
||||
switch (captype(cs->cap)) {
|
||||
case Cposition: {
|
||||
lua_pushinteger(L, cs->cap->s - cs->s + 1);
|
||||
cs->cap++;
|
||||
return 1;
|
||||
}
|
||||
case Cconst: {
|
||||
pushluaval(cs);
|
||||
cs->cap++;
|
||||
return 1;
|
||||
}
|
||||
case Carg: {
|
||||
int arg = (cs->cap++)->idx;
|
||||
if (arg + FIXEDARGS > cs->ptop)
|
||||
return luaL_error(L, "reference to absent extra argument #%d", arg);
|
||||
lua_pushvalue(L, arg + FIXEDARGS);
|
||||
return 1;
|
||||
}
|
||||
case Csimple: {
|
||||
int k = pushnestedvalues(cs, 1);
|
||||
lua_insert(L, -k); /* make whole match be first result */
|
||||
return k;
|
||||
}
|
||||
case Cruntime: {
|
||||
lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
|
||||
return 1;
|
||||
}
|
||||
case Cstring: {
|
||||
luaL_Buffer b;
|
||||
luaL_buffinit(L, &b);
|
||||
stringcap(&b, cs);
|
||||
luaL_pushresult(&b);
|
||||
return 1;
|
||||
}
|
||||
case Csubst: {
|
||||
luaL_Buffer b;
|
||||
luaL_buffinit(L, &b);
|
||||
substcap(&b, cs);
|
||||
luaL_pushresult(&b);
|
||||
return 1;
|
||||
}
|
||||
case Cgroup: {
|
||||
if (cs->cap->idx == 0) /* anonymous group? */
|
||||
return pushnestedvalues(cs, 0); /* add all nested values */
|
||||
else { /* named group: add no values */
|
||||
nextcap(cs); /* skip capture */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
case Cbackref: return backrefcap(cs);
|
||||
case Ctable: return tablecap(cs);
|
||||
case Cfunction: return functioncap(cs);
|
||||
case Cnum: return numcap(cs);
|
||||
case Cquery: return querycap(cs);
|
||||
case Cfold: return foldcap(cs);
|
||||
default: assert(0); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Prepare a CapState structure and traverse the entire list of
|
||||
** captures in the stack pushing its results. 's' is the subject
|
||||
** string, 'r' is the final position of the match, and 'ptop'
|
||||
** the index in the stack where some useful values were pushed.
|
||||
** Returns the number of results pushed. (If the list produces no
|
||||
** results, push the final position of the match.)
|
||||
*/
|
||||
int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
|
||||
Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
|
||||
int n = 0;
|
||||
if (!isclosecap(capture)) { /* is there any capture? */
|
||||
CapState cs;
|
||||
cs.ocap = cs.cap = capture; cs.L = L;
|
||||
cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
|
||||
do { /* collect their values */
|
||||
n += pushcapture(&cs);
|
||||
} while (!isclosecap(cs.cap));
|
||||
}
|
||||
if (n == 0) { /* no capture values? */
|
||||
lua_pushinteger(L, r - s + 1); /* return only end position */
|
||||
n = 1;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $
|
||||
*/
|
||||
|
||||
#if !defined(lpcap_h)
|
||||
#define lpcap_h
|
||||
|
||||
|
||||
#include "lptypes.h"
|
||||
|
||||
|
||||
/* kinds of captures */
|
||||
typedef enum CapKind {
|
||||
Cclose, /* not used in trees */
|
||||
Cposition,
|
||||
Cconst, /* ktable[key] is Lua constant */
|
||||
Cbackref, /* ktable[key] is "name" of group to get capture */
|
||||
Carg, /* 'key' is arg's number */
|
||||
Csimple, /* next node is pattern */
|
||||
Ctable, /* next node is pattern */
|
||||
Cfunction, /* ktable[key] is function; next node is pattern */
|
||||
Cquery, /* ktable[key] is table; next node is pattern */
|
||||
Cstring, /* ktable[key] is string; next node is pattern */
|
||||
Cnum, /* numbered capture; 'key' is number of value to return */
|
||||
Csubst, /* substitution capture; next node is pattern */
|
||||
Cfold, /* ktable[key] is function; next node is pattern */
|
||||
Cruntime, /* not used in trees (is uses another type for tree) */
|
||||
Cgroup /* ktable[key] is group's "name" */
|
||||
} CapKind;
|
||||
|
||||
|
||||
typedef struct Capture {
|
||||
const char *s; /* subject position */
|
||||
unsigned short idx; /* extra info (group name, arg index, etc.) */
|
||||
byte kind; /* kind of capture */
|
||||
byte siz; /* size of full capture + 1 (0 = not a full capture) */
|
||||
} Capture;
|
||||
|
||||
|
||||
typedef struct CapState {
|
||||
Capture *cap; /* current capture */
|
||||
Capture *ocap; /* (original) capture list */
|
||||
lua_State *L;
|
||||
int ptop; /* index of last argument to 'match' */
|
||||
const char *s; /* original string */
|
||||
int valuecached; /* value stored in cache slot */
|
||||
} CapState;
|
||||
|
||||
|
||||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
|
||||
int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
|
||||
int finddyncap (Capture *cap, Capture *last);
|
||||
|
||||
#endif
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $
|
||||
*/
|
||||
|
||||
#if !defined(lpcode_h)
|
||||
#define lpcode_h
|
||||
|
||||
#include "lua.h"
|
||||
|
||||
#include "lptypes.h"
|
||||
#include "lptree.h"
|
||||
#include "lpvm.h"
|
||||
|
||||
int tocharset (TTree *tree, Charset *cs);
|
||||
int checkaux (TTree *tree, int pred);
|
||||
int fixedlen (TTree *tree);
|
||||
int hascaptures (TTree *tree);
|
||||
int lp_gc (lua_State *L);
|
||||
Instruction *compile (lua_State *L, Pattern *p);
|
||||
void realloccode (lua_State *L, Pattern *p, int nsize);
|
||||
int sizei (const Instruction *i);
|
||||
|
||||
|
||||
#define PEnullable 0
|
||||
#define PEnofail 1
|
||||
|
||||
/*
|
||||
** nofail(t) implies that 't' cannot fail with any input
|
||||
*/
|
||||
#define nofail(t) checkaux(t, PEnofail)
|
||||
|
||||
/*
|
||||
** (not nullable(t)) implies 't' cannot match without consuming
|
||||
** something
|
||||
*/
|
||||
#define nullable(t) checkaux(t, PEnullable)
|
||||
|
||||
|
||||
|
||||
#endif
|
Binary file not shown.
After Width: | Height: | Size: 4.8 KiB |
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,244 @@
|
|||
/*
|
||||
** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $
|
||||
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#include "lptypes.h"
|
||||
#include "lpprint.h"
|
||||
#include "lpcode.h"
|
||||
|
||||
|
||||
#if defined(LPEG_DEBUG)
|
||||
|
||||
/*
|
||||
** {======================================================
|
||||
** Printing patterns (for debugging)
|
||||
** =======================================================
|
||||
*/
|
||||
|
||||
|
||||
void printcharset (const byte *st) {
|
||||
int i;
|
||||
printf("[");
|
||||
for (i = 0; i <= UCHAR_MAX; i++) {
|
||||
int first = i;
|
||||
while (testchar(st, i) && i <= UCHAR_MAX) i++;
|
||||
if (i - 1 == first) /* unary range? */
|
||||
printf("(%02x)", first);
|
||||
else if (i - 1 > first) /* non-empty range? */
|
||||
printf("(%02x-%02x)", first, i - 1);
|
||||
}
|
||||
printf("]");
|
||||
}
|
||||
|
||||
|
||||
static const char *capkind (int kind) {
|
||||
const char *const modes[] = {
|
||||
"close", "position", "constant", "backref",
|
||||
"argument", "simple", "table", "function",
|
||||
"query", "string", "num", "substitution", "fold",
|
||||
"runtime", "group"};
|
||||
return modes[kind];
|
||||
}
|
||||
|
||||
|
||||
static void printjmp (const Instruction *op, const Instruction *p) {
|
||||
printf("-> %d", (int)(p + (p + 1)->offset - op));
|
||||
}
|
||||
|
||||
|
||||
void printinst (const Instruction *op, const Instruction *p) {
|
||||
const char *const names[] = {
|
||||
"any", "char", "set",
|
||||
"testany", "testchar", "testset",
|
||||
"span", "behind",
|
||||
"ret", "end",
|
||||
"choice", "jmp", "call", "open_call",
|
||||
"commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
|
||||
"fullcapture", "opencapture", "closecapture", "closeruntime"
|
||||
};
|
||||
printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
|
||||
switch ((Opcode)p->i.code) {
|
||||
case IChar: {
|
||||
printf("'%c'", p->i.aux);
|
||||
break;
|
||||
}
|
||||
case ITestChar: {
|
||||
printf("'%c'", p->i.aux); printjmp(op, p);
|
||||
break;
|
||||
}
|
||||
case IFullCapture: {
|
||||
printf("%s (size = %d) (idx = %d)",
|
||||
capkind(getkind(p)), getoff(p), p->i.key);
|
||||
break;
|
||||
}
|
||||
case IOpenCapture: {
|
||||
printf("%s (idx = %d)", capkind(getkind(p)), p->i.key);
|
||||
break;
|
||||
}
|
||||
case ISet: {
|
||||
printcharset((p+1)->buff);
|
||||
break;
|
||||
}
|
||||
case ITestSet: {
|
||||
printcharset((p+2)->buff); printjmp(op, p);
|
||||
break;
|
||||
}
|
||||
case ISpan: {
|
||||
printcharset((p+1)->buff);
|
||||
break;
|
||||
}
|
||||
case IOpenCall: {
|
||||
printf("-> %d", (p + 1)->offset);
|
||||
break;
|
||||
}
|
||||
case IBehind: {
|
||||
printf("%d", p->i.aux);
|
||||
break;
|
||||
}
|
||||
case IJmp: case ICall: case ICommit: case IChoice:
|
||||
case IPartialCommit: case IBackCommit: case ITestAny: {
|
||||
printjmp(op, p);
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
void printpatt (Instruction *p, int n) {
|
||||
Instruction *op = p;
|
||||
while (p < op + n) {
|
||||
printinst(op, p);
|
||||
p += sizei(p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(LPEG_DEBUG)
|
||||
static void printcap (Capture *cap) {
|
||||
printf("%s (idx: %d - size: %d) -> %p\n",
|
||||
capkind(cap->kind), cap->idx, cap->siz, cap->s);
|
||||
}
|
||||
|
||||
|
||||
void printcaplist (Capture *cap, Capture *limit) {
|
||||
printf(">======\n");
|
||||
for (; cap->s && (limit == NULL || cap < limit); cap++)
|
||||
printcap(cap);
|
||||
printf("=======\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* }====================================================== */
|
||||
|
||||
|
||||
/*
|
||||
** {======================================================
|
||||
** Printing trees (for debugging)
|
||||
** =======================================================
|
||||
*/
|
||||
|
||||
static const char *tagnames[] = {
|
||||
"char", "set", "any",
|
||||
"true", "false",
|
||||
"rep",
|
||||
"seq", "choice",
|
||||
"not", "and",
|
||||
"call", "opencall", "rule", "grammar",
|
||||
"behind",
|
||||
"capture", "run-time"
|
||||
};
|
||||
|
||||
|
||||
void printtree (TTree *tree, int ident) {
|
||||
int i;
|
||||
for (i = 0; i < ident; i++) printf(" ");
|
||||
printf("%s", tagnames[tree->tag]);
|
||||
switch (tree->tag) {
|
||||
case TChar: {
|
||||
int c = tree->u.n;
|
||||
if (isprint(c))
|
||||
printf(" '%c'\n", c);
|
||||
else
|
||||
printf(" (%02X)\n", c);
|
||||
break;
|
||||
}
|
||||
case TSet: {
|
||||
printcharset(treebuffer(tree));
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
case TOpenCall: case TCall: {
|
||||
assert(sib2(tree)->tag == TRule);
|
||||
printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap);
|
||||
break;
|
||||
}
|
||||
case TBehind: {
|
||||
printf(" %d\n", tree->u.n);
|
||||
printtree(sib1(tree), ident + 2);
|
||||
break;
|
||||
}
|
||||
case TCapture: {
|
||||
printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
|
||||
printtree(sib1(tree), ident + 2);
|
||||
break;
|
||||
}
|
||||
case TRule: {
|
||||
printf(" n: %d key: %d\n", tree->cap, tree->key);
|
||||
printtree(sib1(tree), ident + 2);
|
||||
break; /* do not print next rule as a sibling */
|
||||
}
|
||||
case TGrammar: {
|
||||
TTree *rule = sib1(tree);
|
||||
printf(" %d\n", tree->u.n); /* number of rules */
|
||||
for (i = 0; i < tree->u.n; i++) {
|
||||
printtree(rule, ident + 2);
|
||||
rule = sib2(rule);
|
||||
}
|
||||
assert(rule->tag == TTrue); /* sentinel */
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
int sibs = numsiblings[tree->tag];
|
||||
printf("\n");
|
||||
if (sibs >= 1) {
|
||||
printtree(sib1(tree), ident + 2);
|
||||
if (sibs >= 2)
|
||||
printtree(sib2(tree), ident + 2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void printktable (lua_State *L, int idx) {
|
||||
int n, i;
|
||||
lua_getuservalue(L, idx);
|
||||
if (lua_isnil(L, -1)) /* no ktable? */
|
||||
return;
|
||||
n = lua_rawlen(L, -1);
|
||||
printf("[");
|
||||
for (i = 1; i <= n; i++) {
|
||||
printf("%d = ", i);
|
||||
lua_rawgeti(L, -1, i);
|
||||
if (lua_isstring(L, -1))
|
||||
printf("%s ", lua_tostring(L, -1));
|
||||
else
|
||||
printf("%s ", lua_typename(L, lua_type(L, -1)));
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
printf("]\n");
|
||||
/* leave ktable at the stack */
|
||||
}
|
||||
|
||||
/* }====================================================== */
|
||||
|
||||
#endif
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(lpprint_h)
|
||||
#define lpprint_h
|
||||
|
||||
|
||||
#include "lptree.h"
|
||||
#include "lpvm.h"
|
||||
|
||||
|
||||
#if defined(LPEG_DEBUG)
|
||||
|
||||
void printpatt (Instruction *p, int n);
|
||||
void printtree (TTree *tree, int ident);
|
||||
void printktable (lua_State *L, int idx);
|
||||
void printcharset (const byte *st);
|
||||
void printcaplist (Capture *cap, Capture *limit);
|
||||
void printinst (const Instruction *op, const Instruction *p);
|
||||
|
||||
#else
|
||||
|
||||
#define printktable(L,idx) \
|
||||
luaL_error(L, "function only implemented in debug mode")
|
||||
#define printtree(tree,i) \
|
||||
luaL_error(L, "function only implemented in debug mode")
|
||||
#define printpatt(p,n) \
|
||||
luaL_error(L, "function only implemented in debug mode")
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $
|
||||
*/
|
||||
|
||||
#if !defined(lptree_h)
|
||||
#define lptree_h
|
||||
|
||||
|
||||
#include "lptypes.h"
|
||||
|
||||
|
||||
/*
|
||||
** types of trees
|
||||
*/
|
||||
typedef enum TTag {
|
||||
TChar = 0, /* 'n' = char */
|
||||
TSet, /* the set is stored in next CHARSETSIZE bytes */
|
||||
TAny,
|
||||
TTrue,
|
||||
TFalse,
|
||||
TRep, /* 'sib1'* */
|
||||
TSeq, /* 'sib1' 'sib2' */
|
||||
TChoice, /* 'sib1' / 'sib2' */
|
||||
TNot, /* !'sib1' */
|
||||
TAnd, /* &'sib1' */
|
||||
TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */
|
||||
TOpenCall, /* ktable[key] is rule's key */
|
||||
TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
|
||||
'sib1' is rule's pattern;
|
||||
'sib2' is next rule; 'cap' is rule's sequential number */
|
||||
TGrammar, /* 'sib1' is initial (and first) rule */
|
||||
TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
|
||||
TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
|
||||
ktable[key] is Lua value associated with capture;
|
||||
'sib1' is capture body */
|
||||
TRunTime /* run-time capture: 'key' is Lua function;
|
||||
'sib1' is capture body */
|
||||
} TTag;
|
||||
|
||||
|
||||
/*
|
||||
** Tree trees
|
||||
** The first child of a tree (if there is one) is immediately after
|
||||
** the tree. A reference to a second child (ps) is its position
|
||||
** relative to the position of the tree itself.
|
||||
*/
|
||||
typedef struct TTree {
|
||||
byte tag;
|
||||
byte cap; /* kind of capture (if it is a capture) */
|
||||
unsigned short key; /* key in ktable for Lua data (0 if no key) */
|
||||
union {
|
||||
int ps; /* occasional second child */
|
||||
int n; /* occasional counter */
|
||||
} u;
|
||||
} TTree;
|
||||
|
||||
|
||||
/*
|
||||
** A complete pattern has its tree plus, if already compiled,
|
||||
** its corresponding code
|
||||
*/
|
||||
typedef struct Pattern {
|
||||
union Instruction *code;
|
||||
int codesize;
|
||||
TTree tree[1];
|
||||
} Pattern;
|
||||
|
||||
|
||||
/* number of children for each tree */
|
||||
extern const byte numsiblings[];
|
||||
|
||||
/* access to children */
|
||||
#define sib1(t) ((t) + 1)
|
||||
#define sib2(t) ((t) + (t)->u.ps)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
/*
|
||||
** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $
|
||||
** LPeg - PEG pattern matching for Lua
|
||||
** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
||||
** written by Roberto Ierusalimschy
|
||||
*/
|
||||
|
||||
#if !defined(lptypes_h)
|
||||
#define lptypes_h
|
||||
|
||||
|
||||
#if !defined(LPEG_DEBUG)
|
||||
#define NDEBUG
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "lua.h"
|
||||
|
||||
|
||||
#define VERSION "1.0.1"
|
||||
|
||||
|
||||
#define PATTERN_T "lpeg-pattern"
|
||||
#define MAXSTACKIDX "lpeg-maxstack"
|
||||
|
||||
|
||||
/*
|
||||
** compatibility with Lua 5.1
|
||||
*/
|
||||
#if (LUA_VERSION_NUM == 501)
|
||||
|
||||
#define lp_equal lua_equal
|
||||
|
||||
#define lua_getuservalue lua_getfenv
|
||||
#define lua_setuservalue lua_setfenv
|
||||
|
||||
#define lua_rawlen lua_objlen
|
||||
|
||||
#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f)
|
||||
#define luaL_newlib(L,f) luaL_register(L,"lpeg",f)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(lp_equal)
|
||||
#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
|
||||
#endif
|
||||
|
||||
|
||||
/* default maximum size for call/backtrack stack */
|
||||
#if !defined(MAXBACK)
|
||||
#define MAXBACK 400
|
||||
#endif
|
||||
|
||||
|
||||
/* maximum number of rules in a grammar (limited by 'unsigned char') */
|
||||
#if !defined(MAXRULES)
|
||||
#define MAXRULES 250
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* initial size for capture's list */
|
||||
#define INITCAPSIZE 32
|
||||
|
||||
|
||||
/* index, on Lua stack, for subject */
|
||||
#define SUBJIDX 2
|
||||
|
||||
/* number of fixed arguments to 'match' (before capture arguments) */
|
||||
#define FIXEDARGS 3
|
||||
|
||||
/* index, on Lua stack, for capture list */
|
||||
#define caplistidx(ptop) ((ptop) + 2)
|
||||
|
||||
/* index, on Lua stack, for pattern's ktable */
|
||||
#define ktableidx(ptop) ((ptop) + 3)
|
||||
|
||||
/* index, on Lua stack, for backtracking stack */
|
||||
#define stackidx(ptop) ((ptop) + 4)
|
||||
|
||||
|
||||
|
||||
typedef unsigned char byte;
|
||||
|
||||
|
||||
#define BITSPERCHAR 8
|
||||
|
||||
#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
|
||||
|
||||
|
||||
|
||||
typedef struct Charset {
|
||||
byte cs[CHARSETSIZE];
|
||||
} Charset;
|
||||
|
||||
|
||||
|
||||
#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
|
||||
|
||||
/* access to charset */
|
||||
#define treebuffer(t) ((byte *)((t) + 1))
|
||||
|
||||
/* number of slots needed for 'n' bytes */
|
||||
#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1)
|
||||
|
||||
/* set 'b' bit in charset 'cs' */
|
||||
#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
|
||||
|
||||
|
||||
/*
|
||||
** in capture instructions, 'kind' of capture and its offset are
|
||||
** packed in field 'aux', 4 bits for each
|
||||
*/
|
||||
#define getkind(op) ((op)->i.aux & 0xF)
|
||||
#define getoff(op) (((op)->i.aux >> 4) & 0xF)
|
||||
#define joinkindoff(k,o) ((k) | ((o) << 4))
|
||||
|
||||
#define MAXOFF 0xF
|
||||
#define MAXAUX 0xFF
|
||||
|
||||
|
||||
/* maximum number of bytes to look behind */
|
||||
#define MAXBEHIND MAXAUX
|
||||
|
||||
|
||||
/* maximum size (in elements) for a pattern */
|
||||
#define MAXPATTSIZE (SHRT_MAX - 10)
|
||||
|
||||
|
||||
/* size (in elements) for an instruction plus extra l bytes */
|
||||
#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
|
||||
|
||||
|
||||
/* size (in elements) for a ISet instruction */
|
||||
#define CHARSETINSTSIZE instsize(CHARSETSIZE)
|
||||
|
||||
/* size (in elements) for a IFunc instruction */
|
||||
#define funcinstsize(p) ((p)->i.aux + 2)
|
||||
|
||||
|
||||
|
||||
#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,364 @@
|
|||
/*
|
||||
** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $
|
||||
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#include "lua.h"
|
||||
#include "lauxlib.h"
|
||||
|
||||
#include "lpcap.h"
|
||||
#include "lptypes.h"
|
||||
#include "lpvm.h"
|
||||
#include "lpprint.h"
|
||||
|
||||
|
||||
/* initial size for call/backtrack stack */
|
||||
#if !defined(INITBACK)
|
||||
#define INITBACK MAXBACK
|
||||
#endif
|
||||
|
||||
|
||||
#define getoffset(p) (((p) + 1)->offset)
|
||||
|
||||
static const Instruction giveup = {{IGiveup, 0, 0}};
|
||||
|
||||
|
||||
/*
|
||||
** {======================================================
|
||||
** Virtual Machine
|
||||
** =======================================================
|
||||
*/
|
||||
|
||||
|
||||
typedef struct Stack {
|
||||
const char *s; /* saved position (or NULL for calls) */
|
||||
const Instruction *p; /* next instruction */
|
||||
int caplevel;
|
||||
} Stack;
|
||||
|
||||
|
||||
#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
|
||||
|
||||
|
||||
/*
|
||||
** Make the size of the array of captures 'cap' twice as large as needed
|
||||
** (which is 'captop'). ('n' is the number of new elements.)
|
||||
*/
|
||||
static Capture *doublecap (lua_State *L, Capture *cap, int captop,
|
||||
int n, int ptop) {
|
||||
Capture *newc;
|
||||
if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
|
||||
luaL_error(L, "too many captures");
|
||||
newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
|
||||
memcpy(newc, cap, (captop - n) * sizeof(Capture));
|
||||
lua_replace(L, caplistidx(ptop));
|
||||
return newc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Double the size of the stack
|
||||
*/
|
||||
static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
|
||||
Stack *stack = getstackbase(L, ptop);
|
||||
Stack *newstack;
|
||||
int n = *stacklimit - stack; /* current stack size */
|
||||
int max, newn;
|
||||
lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
|
||||
max = lua_tointeger(L, -1); /* maximum allowed size */
|
||||
lua_pop(L, 1);
|
||||
if (n >= max) /* already at maximum size? */
|
||||
luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
|
||||
newn = 2 * n; /* new size */
|
||||
if (newn > max) newn = max;
|
||||
newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
|
||||
memcpy(newstack, stack, n * sizeof(Stack));
|
||||
lua_replace(L, stackidx(ptop));
|
||||
*stacklimit = newstack + newn;
|
||||
return newstack + n; /* return next position */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Interpret the result of a dynamic capture: false -> fail;
|
||||
** true -> keep current position; number -> next position.
|
||||
** Return new subject position. 'fr' is stack index where
|
||||
** is the result; 'curr' is current subject position; 'limit'
|
||||
** is subject's size.
|
||||
*/
|
||||
static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
|
||||
lua_Integer res;
|
||||
if (!lua_toboolean(L, fr)) { /* false value? */
|
||||
lua_settop(L, fr - 1); /* remove results */
|
||||
return -1; /* and fail */
|
||||
}
|
||||
else if (lua_isboolean(L, fr)) /* true? */
|
||||
res = curr; /* keep current position */
|
||||
else {
|
||||
res = lua_tointeger(L, fr) - 1; /* new position */
|
||||
if (res < curr || res > limit)
|
||||
luaL_error(L, "invalid position returned by match-time capture");
|
||||
}
|
||||
lua_remove(L, fr); /* remove first result (offset) */
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Add capture values returned by a dynamic capture to the capture list
|
||||
** 'base', nested inside a group capture. 'fd' indexes the first capture
|
||||
** value, 'n' is the number of values (at least 1).
|
||||
*/
|
||||
static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
|
||||
int i;
|
||||
base[0].kind = Cgroup; /* create group capture */
|
||||
base[0].siz = 0;
|
||||
base[0].idx = 0; /* make it an anonymous group */
|
||||
for (i = 1; i <= n; i++) { /* add runtime captures */
|
||||
base[i].kind = Cruntime;
|
||||
base[i].siz = 1; /* mark it as closed */
|
||||
base[i].idx = fd + i - 1; /* stack index of capture value */
|
||||
base[i].s = s;
|
||||
}
|
||||
base[i].kind = Cclose; /* close group */
|
||||
base[i].siz = 1;
|
||||
base[i].s = s;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Remove dynamic captures from the Lua stack (called in case of failure)
|
||||
*/
|
||||
static int removedyncap (lua_State *L, Capture *capture,
|
||||
int level, int last) {
|
||||
int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
|
||||
int top = lua_gettop(L);
|
||||
if (id == 0) return 0; /* no dynamic captures? */
|
||||
lua_settop(L, id - 1); /* remove captures */
|
||||
return top - id + 1; /* number of values removed */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Opcode interpreter
|
||||
*/
|
||||
const char *match (lua_State *L, const char *o, const char *s, const char *e,
|
||||
Instruction *op, Capture *capture, int ptop) {
|
||||
Stack stackbase[INITBACK];
|
||||
Stack *stacklimit = stackbase + INITBACK;
|
||||
Stack *stack = stackbase; /* point to first empty slot in stack */
|
||||
int capsize = INITCAPSIZE;
|
||||
int captop = 0; /* point to first empty slot in captures */
|
||||
int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
|
||||
const Instruction *p = op; /* current instruction */
|
||||
stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
|
||||
lua_pushlightuserdata(L, stackbase);
|
||||
for (;;) {
|
||||
#if defined(DEBUG)
|
||||
printf("-------------------------------------\n");
|
||||
printcaplist(capture, capture + captop);
|
||||
printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
|
||||
s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
|
||||
printinst(op, p);
|
||||
#endif
|
||||
assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
|
||||
switch ((Opcode)p->i.code) {
|
||||
case IEnd: {
|
||||
assert(stack == getstackbase(L, ptop) + 1);
|
||||
capture[captop].kind = Cclose;
|
||||
capture[captop].s = NULL;
|
||||
return s;
|
||||
}
|
||||
case IGiveup: {
|
||||
assert(stack == getstackbase(L, ptop));
|
||||
return NULL;
|
||||
}
|
||||
case IRet: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
|
||||
p = (--stack)->p;
|
||||
continue;
|
||||
}
|
||||
case IAny: {
|
||||
if (s < e) { p++; s++; }
|
||||
else goto fail;
|
||||
continue;
|
||||
}
|
||||
case ITestAny: {
|
||||
if (s < e) p += 2;
|
||||
else p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IChar: {
|
||||
if ((byte)*s == p->i.aux && s < e) { p++; s++; }
|
||||
else goto fail;
|
||||
continue;
|
||||
}
|
||||
case ITestChar: {
|
||||
if ((byte)*s == p->i.aux && s < e) p += 2;
|
||||
else p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case ISet: {
|
||||
int c = (byte)*s;
|
||||
if (testchar((p+1)->buff, c) && s < e)
|
||||
{ p += CHARSETINSTSIZE; s++; }
|
||||
else goto fail;
|
||||
continue;
|
||||
}
|
||||
case ITestSet: {
|
||||
int c = (byte)*s;
|
||||
if (testchar((p + 2)->buff, c) && s < e)
|
||||
p += 1 + CHARSETINSTSIZE;
|
||||
else p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IBehind: {
|
||||
int n = p->i.aux;
|
||||
if (n > s - o) goto fail;
|
||||
s -= n; p++;
|
||||
continue;
|
||||
}
|
||||
case ISpan: {
|
||||
for (; s < e; s++) {
|
||||
int c = (byte)*s;
|
||||
if (!testchar((p+1)->buff, c)) break;
|
||||
}
|
||||
p += CHARSETINSTSIZE;
|
||||
continue;
|
||||
}
|
||||
case IJmp: {
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IChoice: {
|
||||
if (stack == stacklimit)
|
||||
stack = doublestack(L, &stacklimit, ptop);
|
||||
stack->p = p + getoffset(p);
|
||||
stack->s = s;
|
||||
stack->caplevel = captop;
|
||||
stack++;
|
||||
p += 2;
|
||||
continue;
|
||||
}
|
||||
case ICall: {
|
||||
if (stack == stacklimit)
|
||||
stack = doublestack(L, &stacklimit, ptop);
|
||||
stack->s = NULL;
|
||||
stack->p = p + 2; /* save return address */
|
||||
stack++;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case ICommit: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
|
||||
stack--;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IPartialCommit: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
|
||||
(stack - 1)->s = s;
|
||||
(stack - 1)->caplevel = captop;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IBackCommit: {
|
||||
assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
|
||||
s = (--stack)->s;
|
||||
captop = stack->caplevel;
|
||||
p += getoffset(p);
|
||||
continue;
|
||||
}
|
||||
case IFailTwice:
|
||||
assert(stack > getstackbase(L, ptop));
|
||||
stack--;
|
||||
/* go through */
|
||||
case IFail:
|
||||
fail: { /* pattern failed: try to backtrack */
|
||||
do { /* remove pending calls */
|
||||
assert(stack > getstackbase(L, ptop));
|
||||
s = (--stack)->s;
|
||||
} while (s == NULL);
|
||||
if (ndyncap > 0) /* is there matchtime captures? */
|
||||
ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
|
||||
captop = stack->caplevel;
|
||||
p = stack->p;
|
||||
#if defined(DEBUG)
|
||||
printf("**FAIL**\n");
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
case ICloseRunTime: {
|
||||
CapState cs;
|
||||
int rem, res, n;
|
||||
int fr = lua_gettop(L) + 1; /* stack index of first result */
|
||||
cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
|
||||
n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
|
||||
captop -= n; /* remove nested captures */
|
||||
ndyncap -= rem; /* update number of dynamic captures */
|
||||
fr -= rem; /* 'rem' items were popped from Lua stack */
|
||||
res = resdyncaptures(L, fr, s - o, e - o); /* get result */
|
||||
if (res == -1) /* fail? */
|
||||
goto fail;
|
||||
s = o + res; /* else update current position */
|
||||
n = lua_gettop(L) - fr + 1; /* number of new captures */
|
||||
ndyncap += n; /* update number of dynamic captures */
|
||||
if (n > 0) { /* any new capture? */
|
||||
if (fr + n >= SHRT_MAX)
|
||||
luaL_error(L, "too many results in match-time capture");
|
||||
if ((captop += n + 2) >= capsize) {
|
||||
capture = doublecap(L, capture, captop, n + 2, ptop);
|
||||
capsize = 2 * captop;
|
||||
}
|
||||
/* add new captures to 'capture' list */
|
||||
adddyncaptures(s, capture + captop - n - 2, n, fr);
|
||||
}
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
case ICloseCapture: {
|
||||
const char *s1 = s;
|
||||
assert(captop > 0);
|
||||
/* if possible, turn capture into a full capture */
|
||||
if (capture[captop - 1].siz == 0 &&
|
||||
s1 - capture[captop - 1].s < UCHAR_MAX) {
|
||||
capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
capture[captop].siz = 1; /* mark entry as closed */
|
||||
capture[captop].s = s;
|
||||
goto pushcapture;
|
||||
}
|
||||
}
|
||||
case IOpenCapture:
|
||||
capture[captop].siz = 0; /* mark entry as open */
|
||||
capture[captop].s = s;
|
||||
goto pushcapture;
|
||||
case IFullCapture:
|
||||
capture[captop].siz = getoff(p) + 1; /* save capture size */
|
||||
capture[captop].s = s - getoff(p);
|
||||
/* goto pushcapture; */
|
||||
pushcapture: {
|
||||
capture[captop].idx = p->i.key;
|
||||
capture[captop].kind = getkind(p);
|
||||
if (++captop >= capsize) {
|
||||
capture = doublecap(L, capture, captop, 0, ptop);
|
||||
capsize = 2 * captop;
|
||||
}
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
default: assert(0); return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* }====================================================== */
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
|
||||
*/
|
||||
|
||||
#if !defined(lpvm_h)
|
||||
#define lpvm_h
|
||||
|
||||
#include "lpcap.h"
|
||||
|
||||
|
||||
/* Virtual Machine's instructions */
|
||||
typedef enum Opcode {
|
||||
IAny, /* if no char, fail */
|
||||
IChar, /* if char != aux, fail */
|
||||
ISet, /* if char not in buff, fail */
|
||||
ITestAny, /* in no char, jump to 'offset' */
|
||||
ITestChar, /* if char != aux, jump to 'offset' */
|
||||
ITestSet, /* if char not in buff, jump to 'offset' */
|
||||
ISpan, /* read a span of chars in buff */
|
||||
IBehind, /* walk back 'aux' characters (fail if not possible) */
|
||||
IRet, /* return from a rule */
|
||||
IEnd, /* end of pattern */
|
||||
IChoice, /* stack a choice; next fail will jump to 'offset' */
|
||||
IJmp, /* jump to 'offset' */
|
||||
ICall, /* call rule at 'offset' */
|
||||
IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
|
||||
ICommit, /* pop choice and jump to 'offset' */
|
||||
IPartialCommit, /* update top choice to current position and jump */
|
||||
IBackCommit, /* "fails" but jump to its own 'offset' */
|
||||
IFailTwice, /* pop one choice and then fail */
|
||||
IFail, /* go back to saved state on choice and jump to saved offset */
|
||||
IGiveup, /* internal use */
|
||||
IFullCapture, /* complete capture of last 'off' chars */
|
||||
IOpenCapture, /* start a capture */
|
||||
ICloseCapture,
|
||||
ICloseRunTime
|
||||
} Opcode;
|
||||
|
||||
|
||||
|
||||
typedef union Instruction {
|
||||
struct Inst {
|
||||
byte code;
|
||||
byte aux;
|
||||
short key;
|
||||
} i;
|
||||
int offset;
|
||||
byte buff[1];
|
||||
} Instruction;
|
||||
|
||||
|
||||
void printpatt (Instruction *p, int n);
|
||||
const char *match (lua_State *L, const char *o, const char *s, const char *e,
|
||||
Instruction *op, Capture *capture, int ptop);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
LIBNAME = lpeg
|
||||
LUADIR = ../lua/
|
||||
|
||||
COPT = -O2
|
||||
# COPT = -DLPEG_DEBUG -g
|
||||
|
||||
CWARNS = -Wall -Wextra -pedantic \
|
||||
-Waggregate-return \
|
||||
-Wcast-align \
|
||||
-Wcast-qual \
|
||||
-Wdisabled-optimization \
|
||||
-Wpointer-arith \
|
||||
-Wshadow \
|
||||
-Wsign-compare \
|
||||
-Wundef \
|
||||
-Wwrite-strings \
|
||||
-Wbad-function-cast \
|
||||
-Wdeclaration-after-statement \
|
||||
-Wmissing-prototypes \
|
||||
-Wnested-externs \
|
||||
-Wstrict-prototypes \
|
||||
# -Wunreachable-code \
|
||||
|
||||
|
||||
CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
|
||||
CC = gcc
|
||||
|
||||
FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
|
||||
|
||||
# For Linux
|
||||
linux:
|
||||
make lpeg.so "DLLFLAGS = -shared -fPIC"
|
||||
|
||||
# For Mac OS
|
||||
macosx:
|
||||
make lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
|
||||
|
||||
lpeg.so: $(FILES)
|
||||
env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
|
||||
|
||||
$(FILES): makefile
|
||||
|
||||
test: test.lua re.lua lpeg.so
|
||||
./test.lua
|
||||
|
||||
clean:
|
||||
rm -f $(FILES) lpeg.so
|
||||
|
||||
|
||||
lpcap.o: lpcap.c lpcap.h lptypes.h
|
||||
lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
|
||||
lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
|
||||
lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
|
||||
lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
|
||||
|
|
@ -0,0 +1,498 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>LPeg.re - Regex syntax for LPEG</title>
|
||||
<link rel="stylesheet"
|
||||
href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
|
||||
type="text/css"/>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!-- $Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $ -->
|
||||
|
||||
<div id="container">
|
||||
|
||||
<div id="product">
|
||||
<div id="product_logo">
|
||||
<a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
|
||||
<img alt="LPeg logo" src="lpeg-128.gif"/>
|
||||
</a>
|
||||
</div>
|
||||
<div id="product_name"><big><strong>LPeg.re</strong></big></div>
|
||||
<div id="product_description">
|
||||
Regex syntax for LPEG
|
||||
</div>
|
||||
</div> <!-- id="product" -->
|
||||
|
||||
<div id="main">
|
||||
|
||||
<div id="navigation">
|
||||
<h1>re</h1>
|
||||
|
||||
<ul>
|
||||
<li><a href="#basic">Basic Constructions</a></li>
|
||||
<li><a href="#func">Functions</a></li>
|
||||
<li><a href="#ex">Some Examples</a></li>
|
||||
<li><a href="#license">License</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div> <!-- id="navigation" -->
|
||||
|
||||
<div id="content">
|
||||
|
||||
<h2><a name="basic"></a>The <code>re</code> Module</h2>
|
||||
|
||||
<p>
|
||||
The <code>re</code> module
|
||||
(provided by file <code>re.lua</code> in the distribution)
|
||||
supports a somewhat conventional regex syntax
|
||||
for pattern usage within <a href="lpeg.html">LPeg</a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The next table summarizes <code>re</code>'s syntax.
|
||||
A <code>p</code> represents an arbitrary pattern;
|
||||
<code>num</code> represents a number (<code>[0-9]+</code>);
|
||||
<code>name</code> represents an identifier
|
||||
(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
|
||||
Constructions are listed in order of decreasing precedence.
|
||||
<table border="1">
|
||||
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
|
||||
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
|
||||
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
|
||||
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
|
||||
<tr><td><code>[class]</code></td> <td>character class</td></tr>
|
||||
<tr><td><code>.</code></td> <td>any character</td></tr>
|
||||
<tr><td><code>%name</code></td>
|
||||
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
|
||||
<tr><td><code>name</code></td><td>non terminal</td></tr>
|
||||
<tr><td><code><name></code></td><td>non terminal</td></tr>
|
||||
<tr><td><code>{}</code></td> <td>position capture</td></tr>
|
||||
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
|
||||
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
|
||||
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
|
||||
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
|
||||
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
|
||||
<tr><td><code>=name</code></td> <td>back reference
|
||||
</td></tr>
|
||||
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
|
||||
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
|
||||
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
|
||||
<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
|
||||
<tr><td><code>p^+num</code></td>
|
||||
<td>at least <code>n</code> repetitions</td></tr>
|
||||
<tr><td><code>p^-num</code></td>
|
||||
<td>at most <code>n</code> repetitions</td></tr>
|
||||
<tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr>
|
||||
<tr><td><code>p -> "string"</code></td> <td>string capture</td></tr>
|
||||
<tr><td><code>p -> num</code></td> <td>numbered capture</td></tr>
|
||||
<tr><td><code>p -> name</code></td> <td>function/query/string capture
|
||||
equivalent to <code>p / defs[name]</code></td></tr>
|
||||
<tr><td><code>p => name</code></td> <td>match-time capture
|
||||
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
|
||||
<tr><td><code>& p</code></td> <td>and predicate</td></tr>
|
||||
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
|
||||
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
|
||||
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
|
||||
<tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
|
||||
</tbody></table>
|
||||
<p>
|
||||
Any space appearing in a syntax description can be
|
||||
replaced by zero or more space characters and Lua-style comments
|
||||
(<code>--</code> until end of line).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Character classes define sets of characters.
|
||||
An initial <code>^</code> complements the resulting set.
|
||||
A range <em>x</em><code>-</code><em>y</em> includes in the set
|
||||
all characters with codes between the codes of <em>x</em> and <em>y</em>.
|
||||
A pre-defined class <code>%</code><em>name</em> includes all
|
||||
characters of that class.
|
||||
A simple character includes itself in the set.
|
||||
The only special characters inside a class are <code>^</code>
|
||||
(special only if it is the first character);
|
||||
<code>]</code>
|
||||
(can be included in the set as the first character,
|
||||
after the optional <code>^</code>);
|
||||
<code>%</code> (special only if followed by a letter);
|
||||
and <code>-</code>
|
||||
(can be included in the set as the first or the last character).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Currently the pre-defined classes are similar to those from the
|
||||
Lua's string library
|
||||
(<code>%a</code> for letters,
|
||||
<code>%A</code> for non letters, etc.).
|
||||
There is also a class <code>%nl</code>
|
||||
containing only the newline character,
|
||||
which is particularly handy for grammars written inside long strings,
|
||||
as long strings do not interpret escape sequences like <code>\n</code>.
|
||||
</p>
|
||||
|
||||
|
||||
<h2><a name="func">Functions</a></h2>
|
||||
|
||||
<h3><code>re.compile (string, [, defs])</code></h3>
|
||||
<p>
|
||||
Compiles the given string and
|
||||
returns an equivalent LPeg pattern.
|
||||
The given string may define either an expression or a grammar.
|
||||
The optional <code>defs</code> table provides extra Lua values
|
||||
to be used by the pattern.
|
||||
</p>
|
||||
|
||||
<h3><code>re.find (subject, pattern [, init])</code></h3>
|
||||
<p>
|
||||
Searches the given pattern in the given subject.
|
||||
If it finds a match,
|
||||
returns the index where this occurrence starts and
|
||||
the index where it ends.
|
||||
Otherwise, returns nil.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
An optional numeric argument <code>init</code> makes the search
|
||||
starts at that position in the subject string.
|
||||
As usual in Lua libraries,
|
||||
a negative value counts from the end.
|
||||
</p>
|
||||
|
||||
<h3><code>re.gsub (subject, pattern, replacement)</code></h3>
|
||||
<p>
|
||||
Does a <em>global substitution</em>,
|
||||
replacing all occurrences of <code>pattern</code>
|
||||
in the given <code>subject</code> by <code>replacement</code>.
|
||||
|
||||
<h3><code>re.match (subject, pattern)</code></h3>
|
||||
<p>
|
||||
Matches the given pattern against the given subject,
|
||||
returning all captures.
|
||||
</p>
|
||||
|
||||
<h3><code>re.updatelocale ()</code></h3>
|
||||
<p>
|
||||
Updates the pre-defined character classes to the current locale.
|
||||
</p>
|
||||
|
||||
|
||||
<h2><a name="ex">Some Examples</a></h2>
|
||||
|
||||
<h3>A complete simple program</h3>
|
||||
<p>
|
||||
The next code shows a simple complete Lua program using
|
||||
the <code>re</code> module:
|
||||
</p>
|
||||
<pre class="example">
|
||||
local re = require"re"
|
||||
|
||||
-- find the position of the first numeral in a string
|
||||
print(re.find("the number 423 is odd", "[0-9]+")) --> 12 14
|
||||
|
||||
-- returns all words in a string
|
||||
print(re.match("the number 423 is odd", "({%a+} / .)*"))
|
||||
--> the number is odd
|
||||
|
||||
-- returns the first numeral in a string
|
||||
print(re.match("the number 423 is odd", "s <- {%d+} / . s"))
|
||||
--> 423
|
||||
|
||||
print(re.gsub("hello World", "[aeiou]", "."))
|
||||
--> h.ll. W.rld
|
||||
</pre>
|
||||
|
||||
|
||||
<h3>Balanced parentheses</h3>
|
||||
<p>
|
||||
The following call will produce the same pattern produced by the
|
||||
Lua expression in the
|
||||
<a href="lpeg.html#balanced">balanced parentheses</a> example:
|
||||
</p>
|
||||
<pre class="example">
|
||||
b = re.compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
|
||||
</pre>
|
||||
|
||||
<h3>String reversal</h3>
|
||||
<p>
|
||||
The next example reverses a string:
|
||||
</p>
|
||||
<pre class="example">
|
||||
rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
|
||||
print(rev:match"0123456789") --> 9876543210
|
||||
</pre>
|
||||
|
||||
<h3>CSV decoder</h3>
|
||||
<p>
|
||||
The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
|
||||
</p>
|
||||
<pre class="example">
|
||||
record = re.compile[[
|
||||
record <- {| field (',' field)* |} (%nl / !.)
|
||||
field <- escaped / nonescaped
|
||||
nonescaped <- { [^,"%nl]* }
|
||||
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
|
||||
]]
|
||||
</pre>
|
||||
|
||||
<h3>Lua's long strings</h3>
|
||||
<p>
|
||||
The next example matches Lua long strings:
|
||||
</p>
|
||||
<pre class="example">
|
||||
c = re.compile([[
|
||||
longstring <- ('[' {:eq: '='* :} '[' close)
|
||||
close <- ']' =eq ']' / . close
|
||||
]])
|
||||
|
||||
print(c:match'[==[]]===]]]]==]===[]') --> 17
|
||||
</pre>
|
||||
|
||||
<h3>Abstract Syntax Trees</h3>
|
||||
<p>
|
||||
This example shows a simple way to build an
|
||||
abstract syntax tree (AST) for a given grammar.
|
||||
To keep our example simple,
|
||||
let us consider the following grammar
|
||||
for lists of names:
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = re.compile[[
|
||||
listname <- (name s)*
|
||||
name <- [a-z][a-z]*
|
||||
s <- %s*
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
Now, we will add captures to build a corresponding AST.
|
||||
As a first step, the pattern will build a table to
|
||||
represent each non terminal;
|
||||
terminals will be represented by their corresponding strings:
|
||||
</p>
|
||||
<pre class="example">
|
||||
c = re.compile[[
|
||||
listname <- {| (name s)* |}
|
||||
name <- {| {[a-z][a-z]*} |}
|
||||
s <- %s*
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
Now, a match against <code>"hi hello bye"</code>
|
||||
results in the table
|
||||
<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
|
||||
</p>
|
||||
<p>
|
||||
For such a simple grammar,
|
||||
this AST is more than enough;
|
||||
actually, the tables around each single name
|
||||
are already overkilling.
|
||||
More complex grammars,
|
||||
however, may need some more structure.
|
||||
Specifically,
|
||||
it would be useful if each table had
|
||||
a <code>tag</code> field telling what non terminal
|
||||
that table represents.
|
||||
We can add such a tag using
|
||||
<a href="lpeg.html#cap-g">named group captures</a>:
|
||||
</p>
|
||||
<pre class="example">
|
||||
x = re.compile[[
|
||||
listname <- {| {:tag: '' -> 'list':} (name s)* |}
|
||||
name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
|
||||
s <- ' '*
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
With these group captures,
|
||||
a match against <code>"hi hello bye"</code>
|
||||
results in the following table:
|
||||
</p>
|
||||
<pre class="example">
|
||||
{tag="list",
|
||||
{tag="id", "hi"},
|
||||
{tag="id", "hello"},
|
||||
{tag="id", "bye"}
|
||||
}
|
||||
</pre>
|
||||
|
||||
|
||||
<h3>Indented blocks</h3>
|
||||
<p>
|
||||
This example breaks indented blocks into tables,
|
||||
respecting the indentation:
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = re.compile[[
|
||||
block <- {| {:ident:' '*:} line
|
||||
((=ident !' ' line) / &(=ident ' ') block)* |}
|
||||
line <- {[^%nl]*} %nl
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
As an example,
|
||||
consider the following text:
|
||||
</p>
|
||||
<pre class="example">
|
||||
t = p:match[[
|
||||
first line
|
||||
subline 1
|
||||
subline 2
|
||||
second line
|
||||
third line
|
||||
subline 3.1
|
||||
subline 3.1.1
|
||||
subline 3.2
|
||||
]]
|
||||
</pre>
|
||||
<p>
|
||||
The resulting table <code>t</code> will be like this:
|
||||
</p>
|
||||
<pre class="example">
|
||||
{'first line'; {'subline 1'; 'subline 2'; ident = ' '};
|
||||
'second line';
|
||||
'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = ' '};
|
||||
'subline 3.2'; ident = ' '};
|
||||
ident = ''}
|
||||
</pre>
|
||||
|
||||
<h3>Macro expander</h3>
|
||||
<p>
|
||||
This example implements a simple macro expander.
|
||||
Macros must be defined as part of the pattern,
|
||||
following some simple rules:
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = re.compile[[
|
||||
text <- {~ item* ~}
|
||||
item <- macro / [^()] / '(' item* ')'
|
||||
arg <- ' '* {~ (!',' item)* ~}
|
||||
args <- '(' arg (',' arg)* ')'
|
||||
-- now we define some macros
|
||||
macro <- ('apply' args) -> '%1(%2)'
|
||||
/ ('add' args) -> '%1 + %2'
|
||||
/ ('mul' args) -> '%1 * %2'
|
||||
]]
|
||||
|
||||
print(p:match"add(mul(a,b), apply(f,x))") --> a * b + f(x)
|
||||
</pre>
|
||||
<p>
|
||||
A <code>text</code> is a sequence of items,
|
||||
wherein we apply a substitution capture to expand any macros.
|
||||
An <code>item</code> is either a macro,
|
||||
any character different from parentheses,
|
||||
or a parenthesized expression.
|
||||
A macro argument (<code>arg</code>) is a sequence
|
||||
of items different from a comma.
|
||||
(Note that a comma may appear inside an item,
|
||||
e.g., inside a parenthesized expression.)
|
||||
Again we do a substitution capture to expand any macro
|
||||
in the argument before expanding the outer macro.
|
||||
<code>args</code> is a list of arguments separated by commas.
|
||||
Finally we define the macros.
|
||||
Each macro is a string substitution;
|
||||
it replaces the macro name and its arguments by its corresponding string,
|
||||
with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
|
||||
</p>
|
||||
|
||||
<h3>Patterns</h3>
|
||||
<p>
|
||||
This example shows the complete syntax
|
||||
of patterns accepted by <code>re</code>.
|
||||
</p>
|
||||
<pre class="example">
|
||||
p = [=[
|
||||
|
||||
pattern <- exp !.
|
||||
exp <- S (grammar / alternative)
|
||||
|
||||
alternative <- seq ('/' S seq)*
|
||||
seq <- prefix*
|
||||
prefix <- '&' S prefix / '!' S prefix / suffix
|
||||
suffix <- primary S (([+*?]
|
||||
/ '^' [+-]? num
|
||||
/ '->' S (string / '{}' / name)
|
||||
/ '=>' S name) S)*
|
||||
|
||||
primary <- '(' exp ')' / string / class / defined
|
||||
/ '{:' (name ':')? exp ':}'
|
||||
/ '=' name
|
||||
/ '{}'
|
||||
/ '{~' exp '~}'
|
||||
/ '{' exp '}'
|
||||
/ '.'
|
||||
/ name S !arrow
|
||||
/ '<' name '>' -- old-style non terminals
|
||||
|
||||
grammar <- definition+
|
||||
definition <- name S arrow exp
|
||||
|
||||
class <- '[' '^'? item (!']' item)* ']'
|
||||
item <- defined / range / .
|
||||
range <- . '-' [^]]
|
||||
|
||||
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
||||
name <- [A-Za-z][A-Za-z0-9_]*
|
||||
arrow <- '<-'
|
||||
num <- [0-9]+
|
||||
string <- '"' [^"]* '"' / "'" [^']* "'"
|
||||
defined <- '%' name
|
||||
|
||||
]=]
|
||||
|
||||
print(re.match(p, p)) -- a self description must match itself
|
||||
</pre>
|
||||
|
||||
|
||||
|
||||
<h2><a name="license">License</a></h2>
|
||||
|
||||
<p>
|
||||
Copyright © 2008-2015 Lua.org, PUC-Rio.
|
||||
</p>
|
||||
<p>
|
||||
Permission is hereby granted, free of charge,
|
||||
to any person obtaining a copy of this software and
|
||||
associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction,
|
||||
including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software,
|
||||
and to permit persons to whom the Software is
|
||||
furnished to do so,
|
||||
subject to the following conditions:
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions of the Software.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
</p>
|
||||
|
||||
</div> <!-- id="content" -->
|
||||
|
||||
</div> <!-- id="main" -->
|
||||
|
||||
<div id="about">
|
||||
<p><small>
|
||||
$Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $
|
||||
</small></p>
|
||||
</div> <!-- id="about" -->
|
||||
|
||||
</div> <!-- id="container" -->
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,259 @@
|
|||
-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
|
||||
|
||||
-- imported functions and modules
|
||||
local tonumber, type, print, error = tonumber, type, print, error
|
||||
local setmetatable = setmetatable
|
||||
local m = require"lpeg"
|
||||
|
||||
-- 'm' will be used to parse expressions, and 'mm' will be used to
|
||||
-- create expressions; that is, 're' runs on 'm', creating patterns
|
||||
-- on 'mm'
|
||||
local mm = m
|
||||
|
||||
-- pattern's metatable
|
||||
local mt = getmetatable(mm.P(0))
|
||||
|
||||
|
||||
|
||||
-- No more global accesses after this point
|
||||
local version = _VERSION
|
||||
if version == "Lua 5.2" then _ENV = nil end
|
||||
|
||||
|
||||
local any = m.P(1)
|
||||
|
||||
|
||||
-- Pre-defined names
|
||||
local Predef = { nl = m.P"\n" }
|
||||
|
||||
|
||||
local mem
|
||||
local fmem
|
||||
local gmem
|
||||
|
||||
|
||||
local function updatelocale ()
|
||||
mm.locale(Predef)
|
||||
Predef.a = Predef.alpha
|
||||
Predef.c = Predef.cntrl
|
||||
Predef.d = Predef.digit
|
||||
Predef.g = Predef.graph
|
||||
Predef.l = Predef.lower
|
||||
Predef.p = Predef.punct
|
||||
Predef.s = Predef.space
|
||||
Predef.u = Predef.upper
|
||||
Predef.w = Predef.alnum
|
||||
Predef.x = Predef.xdigit
|
||||
Predef.A = any - Predef.a
|
||||
Predef.C = any - Predef.c
|
||||
Predef.D = any - Predef.d
|
||||
Predef.G = any - Predef.g
|
||||
Predef.L = any - Predef.l
|
||||
Predef.P = any - Predef.p
|
||||
Predef.S = any - Predef.s
|
||||
Predef.U = any - Predef.u
|
||||
Predef.W = any - Predef.w
|
||||
Predef.X = any - Predef.x
|
||||
mem = {} -- restart memoization
|
||||
fmem = {}
|
||||
gmem = {}
|
||||
local mt = {__mode = "v"}
|
||||
setmetatable(mem, mt)
|
||||
setmetatable(fmem, mt)
|
||||
setmetatable(gmem, mt)
|
||||
end
|
||||
|
||||
|
||||
updatelocale()
|
||||
|
||||
|
||||
|
||||
local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
|
||||
|
||||
|
||||
local function getdef (id, defs)
|
||||
local c = defs and defs[id]
|
||||
if not c then error("undefined name: " .. id) end
|
||||
return c
|
||||
end
|
||||
|
||||
|
||||
local function patt_error (s, i)
|
||||
local msg = (#s < i + 20) and s:sub(i)
|
||||
or s:sub(i,i+20) .. "..."
|
||||
msg = ("pattern error near '%s'"):format(msg)
|
||||
error(msg, 2)
|
||||
end
|
||||
|
||||
local function mult (p, n)
|
||||
local np = mm.P(true)
|
||||
while n >= 1 do
|
||||
if n%2 >= 1 then np = np * p end
|
||||
p = p * p
|
||||
n = n/2
|
||||
end
|
||||
return np
|
||||
end
|
||||
|
||||
local function equalcap (s, i, c)
|
||||
if type(c) ~= "string" then return nil end
|
||||
local e = #c + i
|
||||
if s:sub(i, e - 1) == c then return e else return nil end
|
||||
end
|
||||
|
||||
|
||||
local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
|
||||
|
||||
local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
|
||||
|
||||
local arrow = S * "<-"
|
||||
|
||||
local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
|
||||
|
||||
name = m.C(name)
|
||||
|
||||
|
||||
-- a defined name only have meaning in a given environment
|
||||
local Def = name * m.Carg(1)
|
||||
|
||||
local num = m.C(m.R"09"^1) * S / tonumber
|
||||
|
||||
local String = "'" * m.C((any - "'")^0) * "'" +
|
||||
'"' * m.C((any - '"')^0) * '"'
|
||||
|
||||
|
||||
local defined = "%" * Def / function (c,Defs)
|
||||
local cat = Defs and Defs[c] or Predef[c]
|
||||
if not cat then error ("name '" .. c .. "' undefined") end
|
||||
return cat
|
||||
end
|
||||
|
||||
local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
|
||||
|
||||
local item = defined + Range + m.C(any)
|
||||
|
||||
local Class =
|
||||
"["
|
||||
* (m.C(m.P"^"^-1)) -- optional complement symbol
|
||||
* m.Cf(item * (item - "]")^0, mt.__add) /
|
||||
function (c, p) return c == "^" and any - p or p end
|
||||
* "]"
|
||||
|
||||
local function adddef (t, k, exp)
|
||||
if t[k] then
|
||||
error("'"..k.."' already defined as a rule")
|
||||
else
|
||||
t[k] = exp
|
||||
end
|
||||
return t
|
||||
end
|
||||
|
||||
local function firstdef (n, r) return adddef({n}, n, r) end
|
||||
|
||||
|
||||
local function NT (n, b)
|
||||
if not b then
|
||||
error("rule '"..n.."' used outside a grammar")
|
||||
else return mm.V(n)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
local exp = m.P{ "Exp",
|
||||
Exp = S * ( m.V"Grammar"
|
||||
+ m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
|
||||
Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
|
||||
* (#seq_follow + patt_error);
|
||||
Prefix = "&" * S * m.V"Prefix" / mt.__len
|
||||
+ "!" * S * m.V"Prefix" / mt.__unm
|
||||
+ m.V"Suffix";
|
||||
Suffix = m.Cf(m.V"Primary" * S *
|
||||
( ( m.P"+" * m.Cc(1, mt.__pow)
|
||||
+ m.P"*" * m.Cc(0, mt.__pow)
|
||||
+ m.P"?" * m.Cc(-1, mt.__pow)
|
||||
+ "^" * ( m.Cg(num * m.Cc(mult))
|
||||
+ m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
|
||||
)
|
||||
+ "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
|
||||
+ m.P"{}" * m.Cc(nil, m.Ct)
|
||||
+ m.Cg(Def / getdef * m.Cc(mt.__div))
|
||||
)
|
||||
+ "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
|
||||
) * S
|
||||
)^0, function (a,b,f) return f(a,b) end );
|
||||
Primary = "(" * m.V"Exp" * ")"
|
||||
+ String / mm.P
|
||||
+ Class
|
||||
+ defined
|
||||
+ "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
|
||||
function (n, p) return mm.Cg(p, n) end
|
||||
+ "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
|
||||
+ m.P"{}" / mm.Cp
|
||||
+ "{~" * m.V"Exp" * "~}" / mm.Cs
|
||||
+ "{|" * m.V"Exp" * "|}" / mm.Ct
|
||||
+ "{" * m.V"Exp" * "}" / mm.C
|
||||
+ m.P"." * m.Cc(any)
|
||||
+ (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
|
||||
Definition = name * arrow * m.V"Exp";
|
||||
Grammar = m.Cg(m.Cc(true), "G") *
|
||||
m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
|
||||
adddef) / mm.P
|
||||
}
|
||||
|
||||
local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
|
||||
|
||||
|
||||
local function compile (p, defs)
|
||||
if mm.type(p) == "pattern" then return p end -- already compiled
|
||||
local cp = pattern:match(p, 1, defs)
|
||||
if not cp then error("incorrect pattern", 3) end
|
||||
return cp
|
||||
end
|
||||
|
||||
local function match (s, p, i)
|
||||
local cp = mem[p]
|
||||
if not cp then
|
||||
cp = compile(p)
|
||||
mem[p] = cp
|
||||
end
|
||||
return cp:match(s, i or 1)
|
||||
end
|
||||
|
||||
local function find (s, p, i)
|
||||
local cp = fmem[p]
|
||||
if not cp then
|
||||
cp = compile(p) / 0
|
||||
cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
|
||||
fmem[p] = cp
|
||||
end
|
||||
local i, e = cp:match(s, i or 1)
|
||||
if i then return i, e - 1
|
||||
else return i
|
||||
end
|
||||
end
|
||||
|
||||
local function gsub (s, p, rep)
|
||||
local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
|
||||
gmem[p] = g
|
||||
local cp = g[rep]
|
||||
if not cp then
|
||||
cp = compile(p)
|
||||
cp = mm.Cs((cp / rep + 1)^0)
|
||||
g[rep] = cp
|
||||
end
|
||||
return cp:match(s)
|
||||
end
|
||||
|
||||
|
||||
-- exported names
|
||||
local re = {
|
||||
compile = compile,
|
||||
match = match,
|
||||
find = find,
|
||||
gsub = gsub,
|
||||
updatelocale = updatelocale,
|
||||
}
|
||||
|
||||
if version == "Lua 5.1" then _G.re = re end
|
||||
|
||||
return re
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue