This commit is contained in:
		
							parent
							
								
									7d52c1da99
								
							
						
					
					
						commit
						7035731624
					
				
							
								
								
									
										20
									
								
								Makefile
								
								
								
								
							
							
						
						
									
										20
									
								
								Makefile
								
								
								
								
							|  | @ -26,12 +26,15 @@ MINGW_LINKER_FLAGS= \ | ||||||
| LINUX_INCLUDES= \
 | LINUX_INCLUDES= \
 | ||||||
| 	`pkg-config --cflags gtk+-3.0` | 	`pkg-config --cflags gtk+-3.0` | ||||||
| 
 | 
 | ||||||
|  | LINUX_LIBS= \
 | ||||||
|  | 	`pkg-config --libs gtk+-3.0` | ||||||
|  | 
 | ||||||
| LINUX64_LIBS= \
 | LINUX64_LIBS= \
 | ||||||
| 	`pkg-config --libs gtk+-3.0` \
 | 	$(LINUX_LIBS) \
 | ||||||
| 	-Llib/linux | 	-Llib/linux | ||||||
| 
 | 
 | ||||||
| LINUX32_LIBS= \
 | LINUX32_LIBS= \
 | ||||||
| 	`pkg-config --libs gtk+-3.0` \
 | 	$(LINUX_LIBS) \
 | ||||||
| 	-Llib/linux32 | 	-Llib/linux32 | ||||||
| 
 | 
 | ||||||
| LINUX_ARM_LIBS= \
 | LINUX_ARM_LIBS= \
 | ||||||
|  | @ -40,7 +43,7 @@ LINUX_ARM_LIBS= \ | ||||||
| LINUX_LINKER_FLAGS= \
 | LINUX_LINKER_FLAGS= \
 | ||||||
| 	-D_GNU_SOURCE \
 | 	-D_GNU_SOURCE \
 | ||||||
| 	-lSDL2 \
 | 	-lSDL2 \
 | ||||||
| 	-llua \
 | 	-llua5.3 \
 | ||||||
| 	-ldl \
 | 	-ldl \
 | ||||||
| 	-lm \
 | 	-lm \
 | ||||||
| 	-lpthread \
 | 	-lpthread \
 | ||||||
|  | @ -114,6 +117,8 @@ SOURCES=\ | ||||||
| SOURCES_EXT= \
 | SOURCES_EXT= \
 | ||||||
| 	src/html.c | 	src/html.c | ||||||
| 
 | 
 | ||||||
|  | LPEG_SRC= src/ext/lpeg/*.c | ||||||
|  | 
 | ||||||
| DEMO_ASSETS= \
 | DEMO_ASSETS= \
 | ||||||
| 	bin/assets/fire.tic.dat \
 | 	bin/assets/fire.tic.dat \
 | ||||||
| 	bin/assets/p3d.tic.dat \
 | 	bin/assets/p3d.tic.dat \
 | ||||||
|  | @ -272,15 +277,18 @@ mingw: $(DEMO_ASSETS) $(TIC80_DLL) $(TIC_O) bin/html.o bin/res.o | ||||||
| run: mingw | run: mingw | ||||||
| 	$(MINGW_OUTPUT) | 	$(MINGW_OUTPUT) | ||||||
| 
 | 
 | ||||||
| linux64: | linux64-flto: | ||||||
| 	$(CC) $(LINUX_INCLUDES) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) $(OPT) $(INCLUDES) $(LINUX64_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic | 	$(CC) $(LINUX_INCLUDES) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) $(OPT) $(INCLUDES) $(LINUX64_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic | ||||||
| 
 | 
 | ||||||
| linux32: | linux32-flto: | ||||||
| 	$(CC) $(LINUX_INCLUDES) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) $(OPT) $(INCLUDES) $(LINUX32_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic | 	$(CC) $(LINUX_INCLUDES) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) $(OPT) $(INCLUDES) $(LINUX32_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic | ||||||
| 
 | 
 | ||||||
| arm: | arm-flto: | ||||||
| 	$(CC) $(OPT_ARM) $(SOURCES) $(TIC80_SRC) $(OPT) $(INCLUDES) $(LINUX_ARM_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic | 	$(CC) $(OPT_ARM) $(SOURCES) $(TIC80_SRC) $(OPT) $(INCLUDES) $(LINUX_ARM_LIBS) $(LINUX_LINKER_FLAGS) -flto -o bin/tic | ||||||
| 
 | 
 | ||||||
|  | linux:  | ||||||
|  | 	$(CC) $(LINUX_INCLUDES) $(SOURCES) $(LPEG_SRC) $(SOURCES_EXT) $(TIC80_SRC) $(OPT) $(INCLUDES) $(LINUX_LIBS) $(LINUX_LINKER_FLAGS) -o bin/tic | ||||||
|  | 
 | ||||||
| macosx: | macosx: | ||||||
| 	$(CC) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) src/ext/file_dialog.m $(OPT) $(MACOSX_OPT) $(INCLUDES) $(MACOSX_LIBS) -o bin/tic | 	$(CC) $(SOURCES) $(TIC80_SRC) $(SOURCES_EXT) src/ext/file_dialog.m $(OPT) $(MACOSX_OPT) $(INCLUDES) $(MACOSX_LIBS) -o bin/tic | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -31,10 +31,10 @@ made by [@matheuslessarodrigues](https://github.com/matheuslessarodrigues) | ||||||
| ## Linux | ## Linux | ||||||
| run the following commands in the Terminal | run the following commands in the Terminal | ||||||
| ``` | ``` | ||||||
| sudo apt-get install git build-essential libgtk-3-dev | sudo apt-get install git build-essential libgtk-3-dev libsdl2-dev lua5.3-dev libgif-dev lua-lpeg-dev zlib1g-dev | ||||||
| git clone https://github.com/nesbox/TIC-80 | git clone https://github.com/nesbox/TIC-80 | ||||||
| cd TIC-80 | cd TIC-80 | ||||||
| make linux32 (or linux64/arm depending on your system) | make linux | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## iOS / tvOS | ## iOS / tvOS | ||||||
|  |  | ||||||
|  | @ -0,0 +1,96 @@ | ||||||
|  | HISTORY for LPeg 1.0 | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.12 to 1.0 | ||||||
|  |   --------------------------------- | ||||||
|  |   + group "names" can be any Lua value | ||||||
|  |   + some bugs fixed | ||||||
|  |   + other small improvements | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.11 to 0.12 | ||||||
|  |   --------------------------------- | ||||||
|  |   + no "unsigned short" limit for pattern sizes | ||||||
|  |   + mathtime captures considered nullable | ||||||
|  |   + some bugs fixed | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.10 to 0.11 | ||||||
|  |   -------------------------------   | ||||||
|  |   + complete reimplementation of the code generator | ||||||
|  |   + new syntax for table captures | ||||||
|  |   + new functions in module 're' | ||||||
|  |   + other small improvements | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.9 to 0.10 | ||||||
|  |   -------------------------------   | ||||||
|  |   + backtrack stack has configurable size | ||||||
|  |   + better error messages | ||||||
|  |   + Notation for non-terminals in 're' back to A instead o <A> | ||||||
|  |   + experimental look-behind pattern | ||||||
|  |   + support for external extensions | ||||||
|  |   + works with Lua 5.2 | ||||||
|  |   + consumes less C stack | ||||||
|  | 
 | ||||||
|  |   - "and" predicates do not keep captures | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.8 to 0.9 | ||||||
|  |   -------------------------------   | ||||||
|  |   + The accumulator capture was replaced by a fold capture; | ||||||
|  |     programs that used the old 'lpeg.Ca' will need small changes. | ||||||
|  |   + Some support for character classes from old C locales. | ||||||
|  |   + A new named-group capture. | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.7 to 0.8 | ||||||
|  |   -------------------------------   | ||||||
|  |   + New "match-time" capture. | ||||||
|  |   + New "argument capture" that allows passing arguments into the pattern. | ||||||
|  |   + Better documentation for 're'. | ||||||
|  |   + Several small improvements for 're'. | ||||||
|  |   + The 're' module has an incompatibility with previous versions:  | ||||||
|  |     now, any use of a non-terminal must be enclosed in angle brackets  | ||||||
|  |     (like <B>). | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.6 to 0.7 | ||||||
|  |   -------------------------------   | ||||||
|  |   + Several improvements in module 're': | ||||||
|  |     - better documentation; | ||||||
|  |     - support for most captures (all but accumulator); | ||||||
|  |     - limited repetitions p{n,m}. | ||||||
|  |   + Small improvements in efficiency. | ||||||
|  |   + Several small bugs corrected (special thanks to Hans Hagen  | ||||||
|  |     and Taco Hoekwater). | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.5 to 0.6 | ||||||
|  |   -------------------------------   | ||||||
|  |   + Support for non-numeric indices in grammars. | ||||||
|  |   + Some bug fixes (thanks to the luatex team). | ||||||
|  |   + Some new optimizations; (thanks to Mike Pall). | ||||||
|  |   + A new page layout (thanks to Andre Carregal). | ||||||
|  |   + Minimal documentation for module 're'. | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.4 to 0.5 | ||||||
|  |   -------------------------------   | ||||||
|  |   + Several optimizations. | ||||||
|  |   + lpeg.P now accepts booleans. | ||||||
|  |   + Some new examples. | ||||||
|  |   + A proper license. | ||||||
|  |   + Several small improvements. | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.3 to 0.4 | ||||||
|  |   -------------------------------   | ||||||
|  |   + Static check for loops in repetitions and grammars. | ||||||
|  |   + Removed label option in captures. | ||||||
|  |   + The implementation of captures uses less memory. | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.2 to 0.3 | ||||||
|  |   -------------------------------   | ||||||
|  |   + User-defined patterns in Lua. | ||||||
|  |   + Several new captures. | ||||||
|  | 
 | ||||||
|  | * Changes from version 0.1 to 0.2 | ||||||
|  |   -------------------------------   | ||||||
|  |   + Several small corrections. | ||||||
|  |   + Handles embedded zeros like any other character. | ||||||
|  |   + Capture "name" can be any Lua value. | ||||||
|  |   + Unlimited number of captures. | ||||||
|  |   + Match gets an optional initial position. | ||||||
|  | 
 | ||||||
|  | (end of HISTORY) | ||||||
|  | @ -0,0 +1,537 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ | ||||||
|  | ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license) | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #include "lua.h" | ||||||
|  | #include "lauxlib.h" | ||||||
|  | 
 | ||||||
|  | #include "lpcap.h" | ||||||
|  | #include "lptypes.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define captype(cap)	((cap)->kind) | ||||||
|  | 
 | ||||||
|  | #define isclosecap(cap)	(captype(cap) == Cclose) | ||||||
|  | 
 | ||||||
|  | #define closeaddr(c)	((c)->s + (c)->siz - 1) | ||||||
|  | 
 | ||||||
|  | #define isfullcap(cap)	((cap)->siz != 0) | ||||||
|  | 
 | ||||||
|  | #define getfromktable(cs,v)	lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) | ||||||
|  | 
 | ||||||
|  | #define pushluaval(cs)		getfromktable(cs, (cs)->cap->idx) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Put at the cache for Lua values the value indexed by 'v' in ktable | ||||||
|  | ** of the running pattern (if it is not there yet); returns its index. | ||||||
|  | */ | ||||||
|  | static int updatecache (CapState *cs, int v) { | ||||||
|  |   int idx = cs->ptop + 1;  /* stack index of cache for Lua values */ | ||||||
|  |   if (v != cs->valuecached) {  /* not there? */ | ||||||
|  |     getfromktable(cs, v);  /* get value from 'ktable' */ | ||||||
|  |     lua_replace(cs->L, idx);  /* put it at reserved stack position */ | ||||||
|  |     cs->valuecached = v;  /* keep track of what is there */ | ||||||
|  |   } | ||||||
|  |   return idx; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static int pushcapture (CapState *cs); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Goes back in a list of captures looking for an open capture | ||||||
|  | ** corresponding to a close | ||||||
|  | */ | ||||||
|  | static Capture *findopen (Capture *cap) { | ||||||
|  |   int n = 0;  /* number of closes waiting an open */ | ||||||
|  |   for (;;) { | ||||||
|  |     cap--; | ||||||
|  |     if (isclosecap(cap)) n++;  /* one more open to skip */ | ||||||
|  |     else if (!isfullcap(cap)) | ||||||
|  |       if (n-- == 0) return cap; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Go to the next capture | ||||||
|  | */ | ||||||
|  | static void nextcap (CapState *cs) { | ||||||
|  |   Capture *cap = cs->cap; | ||||||
|  |   if (!isfullcap(cap)) {  /* not a single capture? */ | ||||||
|  |     int n = 0;  /* number of opens waiting a close */ | ||||||
|  |     for (;;) {  /* look for corresponding close */ | ||||||
|  |       cap++; | ||||||
|  |       if (isclosecap(cap)) { | ||||||
|  |         if (n-- == 0) break; | ||||||
|  |       } | ||||||
|  |       else if (!isfullcap(cap)) n++; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   cs->cap = cap + 1;  /* + 1 to skip last close (or entire single capture) */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Push on the Lua stack all values generated by nested captures inside | ||||||
|  | ** the current capture. Returns number of values pushed. 'addextra' | ||||||
|  | ** makes it push the entire match after all captured values. The | ||||||
|  | ** entire match is pushed also if there are no other nested values, | ||||||
|  | ** so the function never returns zero. | ||||||
|  | */ | ||||||
|  | static int pushnestedvalues (CapState *cs, int addextra) { | ||||||
|  |   Capture *co = cs->cap; | ||||||
|  |   if (isfullcap(cs->cap++)) {  /* no nested captures? */ | ||||||
|  |     lua_pushlstring(cs->L, co->s, co->siz - 1);  /* push whole match */ | ||||||
|  |     return 1;  /* that is it */ | ||||||
|  |   } | ||||||
|  |   else { | ||||||
|  |     int n = 0; | ||||||
|  |     while (!isclosecap(cs->cap))  /* repeat for all nested patterns */ | ||||||
|  |       n += pushcapture(cs); | ||||||
|  |     if (addextra || n == 0) {  /* need extra? */ | ||||||
|  |       lua_pushlstring(cs->L, co->s, cs->cap->s - co->s);  /* push whole match */ | ||||||
|  |       n++; | ||||||
|  |     } | ||||||
|  |     cs->cap++;  /* skip close entry */ | ||||||
|  |     return n; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Push only the first value generated by nested captures | ||||||
|  | */ | ||||||
|  | static void pushonenestedvalue (CapState *cs) { | ||||||
|  |   int n = pushnestedvalues(cs, 0); | ||||||
|  |   if (n > 1) | ||||||
|  |     lua_pop(cs->L, n - 1);  /* pop extra values */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Try to find a named group capture with the name given at the top of | ||||||
|  | ** the stack; goes backward from 'cap'. | ||||||
|  | */ | ||||||
|  | static Capture *findback (CapState *cs, Capture *cap) { | ||||||
|  |   lua_State *L = cs->L; | ||||||
|  |   while (cap-- > cs->ocap) {  /* repeat until end of list */ | ||||||
|  |     if (isclosecap(cap)) | ||||||
|  |       cap = findopen(cap);  /* skip nested captures */ | ||||||
|  |     else if (!isfullcap(cap)) | ||||||
|  |       continue; /* opening an enclosing capture: skip and get previous */ | ||||||
|  |     if (captype(cap) == Cgroup) { | ||||||
|  |       getfromktable(cs, cap->idx);  /* get group name */ | ||||||
|  |       if (lp_equal(L, -2, -1)) {  /* right group? */ | ||||||
|  |         lua_pop(L, 2);  /* remove reference name and group name */ | ||||||
|  |         return cap; | ||||||
|  |       } | ||||||
|  |       else lua_pop(L, 1);  /* remove group name */ | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); | ||||||
|  |   return NULL;  /* to avoid warnings */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Back-reference capture. Return number of values pushed. | ||||||
|  | */ | ||||||
|  | static int backrefcap (CapState *cs) { | ||||||
|  |   int n; | ||||||
|  |   Capture *curr = cs->cap; | ||||||
|  |   pushluaval(cs);  /* reference name */ | ||||||
|  |   cs->cap = findback(cs, curr);  /* find corresponding group */ | ||||||
|  |   n = pushnestedvalues(cs, 0);  /* push group's values */ | ||||||
|  |   cs->cap = curr + 1; | ||||||
|  |   return n; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Table capture: creates a new table and populates it with nested | ||||||
|  | ** captures. | ||||||
|  | */ | ||||||
|  | static int tablecap (CapState *cs) { | ||||||
|  |   lua_State *L = cs->L; | ||||||
|  |   int n = 0; | ||||||
|  |   lua_newtable(L); | ||||||
|  |   if (isfullcap(cs->cap++)) | ||||||
|  |     return 1;  /* table is empty */ | ||||||
|  |   while (!isclosecap(cs->cap)) { | ||||||
|  |     if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) {  /* named group? */ | ||||||
|  |       pushluaval(cs);  /* push group name */ | ||||||
|  |       pushonenestedvalue(cs); | ||||||
|  |       lua_settable(L, -3); | ||||||
|  |     } | ||||||
|  |     else {  /* not a named group */ | ||||||
|  |       int i; | ||||||
|  |       int k = pushcapture(cs); | ||||||
|  |       for (i = k; i > 0; i--)  /* store all values into table */ | ||||||
|  |         lua_rawseti(L, -(i + 1), n + i); | ||||||
|  |       n += k; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   cs->cap++;  /* skip close entry */ | ||||||
|  |   return 1;  /* number of values pushed (only the table) */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Table-query capture | ||||||
|  | */ | ||||||
|  | static int querycap (CapState *cs) { | ||||||
|  |   int idx = cs->cap->idx; | ||||||
|  |   pushonenestedvalue(cs);  /* get nested capture */ | ||||||
|  |   lua_gettable(cs->L, updatecache(cs, idx));  /* query cap. value at table */ | ||||||
|  |   if (!lua_isnil(cs->L, -1)) | ||||||
|  |     return 1; | ||||||
|  |   else {  /* no value */ | ||||||
|  |     lua_pop(cs->L, 1);  /* remove nil */ | ||||||
|  |     return 0; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Fold capture | ||||||
|  | */ | ||||||
|  | static int foldcap (CapState *cs) { | ||||||
|  |   int n; | ||||||
|  |   lua_State *L = cs->L; | ||||||
|  |   int idx = cs->cap->idx; | ||||||
|  |   if (isfullcap(cs->cap++) ||  /* no nested captures? */ | ||||||
|  |       isclosecap(cs->cap) ||  /* no nested captures (large subject)? */ | ||||||
|  |       (n = pushcapture(cs)) == 0)  /* nested captures with no values? */ | ||||||
|  |     return luaL_error(L, "no initial value for fold capture"); | ||||||
|  |   if (n > 1) | ||||||
|  |     lua_pop(L, n - 1);  /* leave only one result for accumulator */ | ||||||
|  |   while (!isclosecap(cs->cap)) { | ||||||
|  |     lua_pushvalue(L, updatecache(cs, idx));  /* get folding function */ | ||||||
|  |     lua_insert(L, -2);  /* put it before accumulator */ | ||||||
|  |     n = pushcapture(cs);  /* get next capture's values */ | ||||||
|  |     lua_call(L, n + 1, 1);  /* call folding function */ | ||||||
|  |   } | ||||||
|  |   cs->cap++;  /* skip close entry */ | ||||||
|  |   return 1;  /* only accumulator left on the stack */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Function capture | ||||||
|  | */ | ||||||
|  | static int functioncap (CapState *cs) { | ||||||
|  |   int n; | ||||||
|  |   int top = lua_gettop(cs->L); | ||||||
|  |   pushluaval(cs);  /* push function */ | ||||||
|  |   n = pushnestedvalues(cs, 0);  /* push nested captures */ | ||||||
|  |   lua_call(cs->L, n, LUA_MULTRET);  /* call function */ | ||||||
|  |   return lua_gettop(cs->L) - top;  /* return function's results */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Select capture | ||||||
|  | */ | ||||||
|  | static int numcap (CapState *cs) { | ||||||
|  |   int idx = cs->cap->idx;  /* value to select */ | ||||||
|  |   if (idx == 0) {  /* no values? */ | ||||||
|  |     nextcap(cs);  /* skip entire capture */ | ||||||
|  |     return 0;  /* no value produced */ | ||||||
|  |   } | ||||||
|  |   else { | ||||||
|  |     int n = pushnestedvalues(cs, 0); | ||||||
|  |     if (n < idx)  /* invalid index? */ | ||||||
|  |       return luaL_error(cs->L, "no capture '%d'", idx); | ||||||
|  |     else { | ||||||
|  |       lua_pushvalue(cs->L, -(n - idx + 1));  /* get selected capture */ | ||||||
|  |       lua_replace(cs->L, -(n + 1));  /* put it in place of 1st capture */ | ||||||
|  |       lua_pop(cs->L, n - 1);  /* remove other captures */ | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Return the stack index of the first runtime capture in the given | ||||||
|  | ** list of captures (or zero if no runtime captures) | ||||||
|  | */ | ||||||
|  | int finddyncap (Capture *cap, Capture *last) { | ||||||
|  |   for (; cap < last; cap++) { | ||||||
|  |     if (cap->kind == Cruntime) | ||||||
|  |       return cap->idx;  /* stack position of first capture */ | ||||||
|  |   } | ||||||
|  |   return 0;  /* no dynamic captures in this segment */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Calls a runtime capture. Returns number of captures removed by | ||||||
|  | ** the call, including the initial Cgroup. (Captures to be added are | ||||||
|  | ** on the Lua stack.) | ||||||
|  | */ | ||||||
|  | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { | ||||||
|  |   int n, id; | ||||||
|  |   lua_State *L = cs->L; | ||||||
|  |   int otop = lua_gettop(L); | ||||||
|  |   Capture *open = findopen(close); | ||||||
|  |   assert(captype(open) == Cgroup); | ||||||
|  |   id = finddyncap(open, close);  /* get first dynamic capture argument */ | ||||||
|  |   close->kind = Cclose;  /* closes the group */ | ||||||
|  |   close->s = s; | ||||||
|  |   cs->cap = open; cs->valuecached = 0;  /* prepare capture state */ | ||||||
|  |   luaL_checkstack(L, 4, "too many runtime captures"); | ||||||
|  |   pushluaval(cs);  /* push function to be called */ | ||||||
|  |   lua_pushvalue(L, SUBJIDX);  /* push original subject */ | ||||||
|  |   lua_pushinteger(L, s - cs->s + 1);  /* push current position */ | ||||||
|  |   n = pushnestedvalues(cs, 0);  /* push nested captures */ | ||||||
|  |   lua_call(L, n + 2, LUA_MULTRET);  /* call dynamic function */ | ||||||
|  |   if (id > 0) {  /* are there old dynamic captures to be removed? */ | ||||||
|  |     int i; | ||||||
|  |     for (i = id; i <= otop; i++) | ||||||
|  |       lua_remove(L, id);  /* remove old dynamic captures */ | ||||||
|  |     *rem = otop - id + 1;  /* total number of dynamic captures removed */ | ||||||
|  |   } | ||||||
|  |   else | ||||||
|  |     *rem = 0;  /* no dynamic captures removed */ | ||||||
|  |   return close - open;  /* number of captures of all kinds removed */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Auxiliary structure for substitution and string captures: keep | ||||||
|  | ** information about nested captures for future use, avoiding to push | ||||||
|  | ** string results into Lua | ||||||
|  | */ | ||||||
|  | typedef struct StrAux { | ||||||
|  |   int isstring;  /* whether capture is a string */ | ||||||
|  |   union { | ||||||
|  |     Capture *cp;  /* if not a string, respective capture */ | ||||||
|  |     struct {  /* if it is a string... */ | ||||||
|  |       const char *s;  /* ... starts here */ | ||||||
|  |       const char *e;  /* ... ends here */ | ||||||
|  |     } s; | ||||||
|  |   } u; | ||||||
|  | } StrAux; | ||||||
|  | 
 | ||||||
|  | #define MAXSTRCAPS	10 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Collect values from current capture into array 'cps'. Current | ||||||
|  | ** capture must be Cstring (first call) or Csimple (recursive calls). | ||||||
|  | ** (In first call, fills %0 with whole match for Cstring.) | ||||||
|  | ** Returns number of elements in the array that were filled. | ||||||
|  | */ | ||||||
|  | static int getstrcaps (CapState *cs, StrAux *cps, int n) { | ||||||
|  |   int k = n++; | ||||||
|  |   cps[k].isstring = 1;  /* get string value */ | ||||||
|  |   cps[k].u.s.s = cs->cap->s;  /* starts here */ | ||||||
|  |   if (!isfullcap(cs->cap++)) {  /* nested captures? */ | ||||||
|  |     while (!isclosecap(cs->cap)) {  /* traverse them */ | ||||||
|  |       if (n >= MAXSTRCAPS)  /* too many captures? */ | ||||||
|  |         nextcap(cs);  /* skip extra captures (will not need them) */ | ||||||
|  |       else if (captype(cs->cap) == Csimple)  /* string? */ | ||||||
|  |         n = getstrcaps(cs, cps, n);  /* put info. into array */ | ||||||
|  |       else { | ||||||
|  |         cps[n].isstring = 0;  /* not a string */ | ||||||
|  |         cps[n].u.cp = cs->cap;  /* keep original capture */ | ||||||
|  |         nextcap(cs); | ||||||
|  |         n++; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     cs->cap++;  /* skip close */ | ||||||
|  |   } | ||||||
|  |   cps[k].u.s.e = closeaddr(cs->cap - 1);  /* ends here */ | ||||||
|  |   return n; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** add next capture value (which should be a string) to buffer 'b' | ||||||
|  | */ | ||||||
|  | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** String capture: add result to buffer 'b' (instead of pushing | ||||||
|  | ** it into the stack) | ||||||
|  | */ | ||||||
|  | static void stringcap (luaL_Buffer *b, CapState *cs) { | ||||||
|  |   StrAux cps[MAXSTRCAPS]; | ||||||
|  |   int n; | ||||||
|  |   size_t len, i; | ||||||
|  |   const char *fmt;  /* format string */ | ||||||
|  |   fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); | ||||||
|  |   n = getstrcaps(cs, cps, 0) - 1;  /* collect nested captures */ | ||||||
|  |   for (i = 0; i < len; i++) {  /* traverse them */ | ||||||
|  |     if (fmt[i] != '%')  /* not an escape? */ | ||||||
|  |       luaL_addchar(b, fmt[i]);  /* add it to buffer */ | ||||||
|  |     else if (fmt[++i] < '0' || fmt[i] > '9')  /* not followed by a digit? */ | ||||||
|  |       luaL_addchar(b, fmt[i]);  /* add to buffer */ | ||||||
|  |     else { | ||||||
|  |       int l = fmt[i] - '0';  /* capture index */ | ||||||
|  |       if (l > n) | ||||||
|  |         luaL_error(cs->L, "invalid capture index (%d)", l); | ||||||
|  |       else if (cps[l].isstring) | ||||||
|  |         luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); | ||||||
|  |       else { | ||||||
|  |         Capture *curr = cs->cap; | ||||||
|  |         cs->cap = cps[l].u.cp;  /* go back to evaluate that nested capture */ | ||||||
|  |         if (!addonestring(b, cs, "capture")) | ||||||
|  |           luaL_error(cs->L, "no values in capture index %d", l); | ||||||
|  |         cs->cap = curr;  /* continue from where it stopped */ | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Substitution capture: add result to buffer 'b' | ||||||
|  | */ | ||||||
|  | static void substcap (luaL_Buffer *b, CapState *cs) { | ||||||
|  |   const char *curr = cs->cap->s; | ||||||
|  |   if (isfullcap(cs->cap))  /* no nested captures? */ | ||||||
|  |     luaL_addlstring(b, curr, cs->cap->siz - 1);  /* keep original text */ | ||||||
|  |   else { | ||||||
|  |     cs->cap++;  /* skip open entry */ | ||||||
|  |     while (!isclosecap(cs->cap)) {  /* traverse nested captures */ | ||||||
|  |       const char *next = cs->cap->s; | ||||||
|  |       luaL_addlstring(b, curr, next - curr);  /* add text up to capture */ | ||||||
|  |       if (addonestring(b, cs, "replacement")) | ||||||
|  |         curr = closeaddr(cs->cap - 1);  /* continue after match */ | ||||||
|  |       else  /* no capture value */ | ||||||
|  |         curr = next;  /* keep original text in final result */ | ||||||
|  |     } | ||||||
|  |     luaL_addlstring(b, curr, cs->cap->s - curr);  /* add last piece of text */ | ||||||
|  |   } | ||||||
|  |   cs->cap++;  /* go to next capture */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Evaluates a capture and adds its first value to buffer 'b'; returns | ||||||
|  | ** whether there was a value | ||||||
|  | */ | ||||||
|  | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { | ||||||
|  |   switch (captype(cs->cap)) { | ||||||
|  |     case Cstring: | ||||||
|  |       stringcap(b, cs);  /* add capture directly to buffer */ | ||||||
|  |       return 1; | ||||||
|  |     case Csubst: | ||||||
|  |       substcap(b, cs);  /* add capture directly to buffer */ | ||||||
|  |       return 1; | ||||||
|  |     default: { | ||||||
|  |       lua_State *L = cs->L; | ||||||
|  |       int n = pushcapture(cs); | ||||||
|  |       if (n > 0) { | ||||||
|  |         if (n > 1) lua_pop(L, n - 1);  /* only one result */ | ||||||
|  |         if (!lua_isstring(L, -1)) | ||||||
|  |           luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); | ||||||
|  |         luaL_addvalue(b); | ||||||
|  |       } | ||||||
|  |       return n; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Push all values of the current capture into the stack; returns | ||||||
|  | ** number of values pushed | ||||||
|  | */ | ||||||
|  | static int pushcapture (CapState *cs) { | ||||||
|  |   lua_State *L = cs->L; | ||||||
|  |   luaL_checkstack(L, 4, "too many captures"); | ||||||
|  |   switch (captype(cs->cap)) { | ||||||
|  |     case Cposition: { | ||||||
|  |       lua_pushinteger(L, cs->cap->s - cs->s + 1); | ||||||
|  |       cs->cap++; | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |     case Cconst: { | ||||||
|  |       pushluaval(cs); | ||||||
|  |       cs->cap++; | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |     case Carg: { | ||||||
|  |       int arg = (cs->cap++)->idx; | ||||||
|  |       if (arg + FIXEDARGS > cs->ptop) | ||||||
|  |         return luaL_error(L, "reference to absent extra argument #%d", arg); | ||||||
|  |       lua_pushvalue(L, arg + FIXEDARGS); | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |     case Csimple: { | ||||||
|  |       int k = pushnestedvalues(cs, 1); | ||||||
|  |       lua_insert(L, -k);  /* make whole match be first result */ | ||||||
|  |       return k; | ||||||
|  |     } | ||||||
|  |     case Cruntime: { | ||||||
|  |       lua_pushvalue(L, (cs->cap++)->idx);  /* value is in the stack */ | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |     case Cstring: { | ||||||
|  |       luaL_Buffer b; | ||||||
|  |       luaL_buffinit(L, &b); | ||||||
|  |       stringcap(&b, cs); | ||||||
|  |       luaL_pushresult(&b); | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |     case Csubst: { | ||||||
|  |       luaL_Buffer b; | ||||||
|  |       luaL_buffinit(L, &b); | ||||||
|  |       substcap(&b, cs); | ||||||
|  |       luaL_pushresult(&b); | ||||||
|  |       return 1; | ||||||
|  |     } | ||||||
|  |     case Cgroup: { | ||||||
|  |       if (cs->cap->idx == 0)  /* anonymous group? */ | ||||||
|  |         return pushnestedvalues(cs, 0);  /* add all nested values */ | ||||||
|  |       else {  /* named group: add no values */ | ||||||
|  |         nextcap(cs);  /* skip capture */ | ||||||
|  |         return 0; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     case Cbackref: return backrefcap(cs); | ||||||
|  |     case Ctable: return tablecap(cs); | ||||||
|  |     case Cfunction: return functioncap(cs); | ||||||
|  |     case Cnum: return numcap(cs); | ||||||
|  |     case Cquery: return querycap(cs); | ||||||
|  |     case Cfold: return foldcap(cs); | ||||||
|  |     default: assert(0); return 0; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Prepare a CapState structure and traverse the entire list of | ||||||
|  | ** captures in the stack pushing its results. 's' is the subject | ||||||
|  | ** string, 'r' is the final position of the match, and 'ptop'  | ||||||
|  | ** the index in the stack where some useful values were pushed. | ||||||
|  | ** Returns the number of results pushed. (If the list produces no | ||||||
|  | ** results, push the final position of the match.) | ||||||
|  | */ | ||||||
|  | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { | ||||||
|  |   Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); | ||||||
|  |   int n = 0; | ||||||
|  |   if (!isclosecap(capture)) {  /* is there any capture? */ | ||||||
|  |     CapState cs; | ||||||
|  |     cs.ocap = cs.cap = capture; cs.L = L; | ||||||
|  |     cs.s = s; cs.valuecached = 0; cs.ptop = ptop; | ||||||
|  |     do {  /* collect their values */ | ||||||
|  |       n += pushcapture(&cs); | ||||||
|  |     } while (!isclosecap(cs.cap)); | ||||||
|  |   } | ||||||
|  |   if (n == 0) {  /* no capture values? */ | ||||||
|  |     lua_pushinteger(L, r - s + 1);  /* return only end position */ | ||||||
|  |     n = 1; | ||||||
|  |   } | ||||||
|  |   return n; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,56 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #if !defined(lpcap_h) | ||||||
|  | #define lpcap_h | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #include "lptypes.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* kinds of captures */ | ||||||
|  | typedef enum CapKind { | ||||||
|  |   Cclose,  /* not used in trees */ | ||||||
|  |   Cposition, | ||||||
|  |   Cconst,  /* ktable[key] is Lua constant */ | ||||||
|  |   Cbackref,  /* ktable[key] is "name" of group to get capture */ | ||||||
|  |   Carg,  /* 'key' is arg's number */ | ||||||
|  |   Csimple,  /* next node is pattern */ | ||||||
|  |   Ctable,  /* next node is pattern */ | ||||||
|  |   Cfunction,  /* ktable[key] is function; next node is pattern */ | ||||||
|  |   Cquery,  /* ktable[key] is table; next node is pattern */ | ||||||
|  |   Cstring,  /* ktable[key] is string; next node is pattern */ | ||||||
|  |   Cnum,  /* numbered capture; 'key' is number of value to return */ | ||||||
|  |   Csubst,  /* substitution capture; next node is pattern */ | ||||||
|  |   Cfold,  /* ktable[key] is function; next node is pattern */ | ||||||
|  |   Cruntime,  /* not used in trees (is uses another type for tree) */ | ||||||
|  |   Cgroup  /* ktable[key] is group's "name" */ | ||||||
|  | } CapKind; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | typedef struct Capture { | ||||||
|  |   const char *s;  /* subject position */ | ||||||
|  |   unsigned short idx;  /* extra info (group name, arg index, etc.) */ | ||||||
|  |   byte kind;  /* kind of capture */ | ||||||
|  |   byte siz;  /* size of full capture + 1 (0 = not a full capture) */ | ||||||
|  | } Capture; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | typedef struct CapState { | ||||||
|  |   Capture *cap;  /* current capture */ | ||||||
|  |   Capture *ocap;  /* (original) capture list */ | ||||||
|  |   lua_State *L; | ||||||
|  |   int ptop;  /* index of last argument to 'match' */ | ||||||
|  |   const char *s;  /* original string */ | ||||||
|  |   int valuecached;  /* value stored in cache slot */ | ||||||
|  | } CapState; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); | ||||||
|  | int getcaptures (lua_State *L, const char *s, const char *r, int ptop); | ||||||
|  | int finddyncap (Capture *cap, Capture *last); | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,40 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #if !defined(lpcode_h) | ||||||
|  | #define lpcode_h | ||||||
|  | 
 | ||||||
|  | #include "lua.h" | ||||||
|  | 
 | ||||||
|  | #include "lptypes.h" | ||||||
|  | #include "lptree.h" | ||||||
|  | #include "lpvm.h" | ||||||
|  | 
 | ||||||
|  | int tocharset (TTree *tree, Charset *cs); | ||||||
|  | int checkaux (TTree *tree, int pred); | ||||||
|  | int fixedlen (TTree *tree); | ||||||
|  | int hascaptures (TTree *tree); | ||||||
|  | int lp_gc (lua_State *L); | ||||||
|  | Instruction *compile (lua_State *L, Pattern *p); | ||||||
|  | void realloccode (lua_State *L, Pattern *p, int nsize); | ||||||
|  | int sizei (const Instruction *i); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define PEnullable      0 | ||||||
|  | #define PEnofail        1 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** nofail(t) implies that 't' cannot fail with any input | ||||||
|  | */ | ||||||
|  | #define nofail(t)	checkaux(t, PEnofail) | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** (not nullable(t)) implies 't' cannot match without consuming | ||||||
|  | ** something | ||||||
|  | */ | ||||||
|  | #define nullable(t)	checkaux(t, PEnullable) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 4.8 KiB | 
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,244 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ | ||||||
|  | ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license) | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #include <ctype.h> | ||||||
|  | #include <limits.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #include "lptypes.h" | ||||||
|  | #include "lpprint.h" | ||||||
|  | #include "lpcode.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #if defined(LPEG_DEBUG) | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** {====================================================== | ||||||
|  | ** Printing patterns (for debugging) | ||||||
|  | ** ======================================================= | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printcharset (const byte *st) { | ||||||
|  |   int i; | ||||||
|  |   printf("["); | ||||||
|  |   for (i = 0; i <= UCHAR_MAX; i++) { | ||||||
|  |     int first = i; | ||||||
|  |     while (testchar(st, i) && i <= UCHAR_MAX) i++; | ||||||
|  |     if (i - 1 == first)  /* unary range? */ | ||||||
|  |       printf("(%02x)", first); | ||||||
|  |     else if (i - 1 > first)  /* non-empty range? */ | ||||||
|  |       printf("(%02x-%02x)", first, i - 1); | ||||||
|  |   } | ||||||
|  |   printf("]"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static const char *capkind (int kind) { | ||||||
|  |   const char *const modes[] = { | ||||||
|  |     "close", "position", "constant", "backref", | ||||||
|  |     "argument", "simple", "table", "function", | ||||||
|  |     "query", "string", "num", "substitution", "fold", | ||||||
|  |     "runtime", "group"}; | ||||||
|  |   return modes[kind]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static void printjmp (const Instruction *op, const Instruction *p) { | ||||||
|  |   printf("-> %d", (int)(p + (p + 1)->offset - op)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printinst (const Instruction *op, const Instruction *p) { | ||||||
|  |   const char *const names[] = { | ||||||
|  |     "any", "char", "set", | ||||||
|  |     "testany", "testchar", "testset", | ||||||
|  |     "span", "behind", | ||||||
|  |     "ret", "end", | ||||||
|  |     "choice", "jmp", "call", "open_call", | ||||||
|  |     "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | ||||||
|  |      "fullcapture", "opencapture", "closecapture", "closeruntime" | ||||||
|  |   }; | ||||||
|  |   printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | ||||||
|  |   switch ((Opcode)p->i.code) { | ||||||
|  |     case IChar: { | ||||||
|  |       printf("'%c'", p->i.aux); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case ITestChar: { | ||||||
|  |       printf("'%c'", p->i.aux); printjmp(op, p); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case IFullCapture: { | ||||||
|  |       printf("%s (size = %d)  (idx = %d)", | ||||||
|  |              capkind(getkind(p)), getoff(p), p->i.key); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case IOpenCapture: { | ||||||
|  |       printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case ISet: { | ||||||
|  |       printcharset((p+1)->buff); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case ITestSet: { | ||||||
|  |       printcharset((p+2)->buff); printjmp(op, p); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case ISpan: { | ||||||
|  |       printcharset((p+1)->buff); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case IOpenCall: { | ||||||
|  |       printf("-> %d", (p + 1)->offset); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case IBehind: { | ||||||
|  |       printf("%d", p->i.aux); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case IJmp: case ICall: case ICommit: case IChoice: | ||||||
|  |     case IPartialCommit: case IBackCommit: case ITestAny: { | ||||||
|  |       printjmp(op, p); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     default: break; | ||||||
|  |   } | ||||||
|  |   printf("\n"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printpatt (Instruction *p, int n) { | ||||||
|  |   Instruction *op = p; | ||||||
|  |   while (p < op + n) { | ||||||
|  |     printinst(op, p); | ||||||
|  |     p += sizei(p); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #if defined(LPEG_DEBUG) | ||||||
|  | static void printcap (Capture *cap) { | ||||||
|  |   printf("%s (idx: %d - size: %d) -> %p\n", | ||||||
|  |          capkind(cap->kind), cap->idx, cap->siz, cap->s); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printcaplist (Capture *cap, Capture *limit) { | ||||||
|  |   printf(">======\n"); | ||||||
|  |   for (; cap->s && (limit == NULL || cap < limit); cap++) | ||||||
|  |     printcap(cap); | ||||||
|  |   printf("=======\n"); | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /* }====================================================== */ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** {====================================================== | ||||||
|  | ** Printing trees (for debugging) | ||||||
|  | ** ======================================================= | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | static const char *tagnames[] = { | ||||||
|  |   "char", "set", "any", | ||||||
|  |   "true", "false", | ||||||
|  |   "rep", | ||||||
|  |   "seq", "choice", | ||||||
|  |   "not", "and", | ||||||
|  |   "call", "opencall", "rule", "grammar", | ||||||
|  |   "behind", | ||||||
|  |   "capture", "run-time" | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printtree (TTree *tree, int ident) { | ||||||
|  |   int i; | ||||||
|  |   for (i = 0; i < ident; i++) printf(" "); | ||||||
|  |   printf("%s", tagnames[tree->tag]); | ||||||
|  |   switch (tree->tag) { | ||||||
|  |     case TChar: { | ||||||
|  |       int c = tree->u.n; | ||||||
|  |       if (isprint(c)) | ||||||
|  |         printf(" '%c'\n", c); | ||||||
|  |       else | ||||||
|  |         printf(" (%02X)\n", c); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case TSet: { | ||||||
|  |       printcharset(treebuffer(tree)); | ||||||
|  |       printf("\n"); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case TOpenCall: case TCall: { | ||||||
|  |       assert(sib2(tree)->tag == TRule); | ||||||
|  |       printf(" key: %d  (rule: %d)\n", tree->key, sib2(tree)->cap); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case TBehind: { | ||||||
|  |       printf(" %d\n", tree->u.n); | ||||||
|  |         printtree(sib1(tree), ident + 2); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case TCapture: { | ||||||
|  |       printf(" kind: '%s'  key: %d\n", capkind(tree->cap), tree->key); | ||||||
|  |       printtree(sib1(tree), ident + 2); | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     case TRule: { | ||||||
|  |       printf(" n: %d  key: %d\n", tree->cap, tree->key); | ||||||
|  |       printtree(sib1(tree), ident + 2); | ||||||
|  |       break;  /* do not print next rule as a sibling */ | ||||||
|  |     } | ||||||
|  |     case TGrammar: { | ||||||
|  |       TTree *rule = sib1(tree); | ||||||
|  |       printf(" %d\n", tree->u.n);  /* number of rules */ | ||||||
|  |       for (i = 0; i < tree->u.n; i++) { | ||||||
|  |         printtree(rule, ident + 2); | ||||||
|  |         rule = sib2(rule); | ||||||
|  |       } | ||||||
|  |       assert(rule->tag == TTrue);  /* sentinel */ | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |     default: { | ||||||
|  |       int sibs = numsiblings[tree->tag]; | ||||||
|  |       printf("\n"); | ||||||
|  |       if (sibs >= 1) { | ||||||
|  |         printtree(sib1(tree), ident + 2); | ||||||
|  |         if (sibs >= 2) | ||||||
|  |           printtree(sib2(tree), ident + 2); | ||||||
|  |       } | ||||||
|  |       break; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printktable (lua_State *L, int idx) { | ||||||
|  |   int n, i; | ||||||
|  |   lua_getuservalue(L, idx); | ||||||
|  |   if (lua_isnil(L, -1))  /* no ktable? */ | ||||||
|  |     return; | ||||||
|  |   n = lua_rawlen(L, -1); | ||||||
|  |   printf("["); | ||||||
|  |   for (i = 1; i <= n; i++) { | ||||||
|  |     printf("%d = ", i); | ||||||
|  |     lua_rawgeti(L, -1, i); | ||||||
|  |     if (lua_isstring(L, -1)) | ||||||
|  |       printf("%s  ", lua_tostring(L, -1)); | ||||||
|  |     else | ||||||
|  |       printf("%s  ", lua_typename(L, lua_type(L, -1))); | ||||||
|  |     lua_pop(L, 1); | ||||||
|  |   } | ||||||
|  |   printf("]\n"); | ||||||
|  |   /* leave ktable at the stack */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* }====================================================== */ | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | @ -0,0 +1,36 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $ | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #if !defined(lpprint_h) | ||||||
|  | #define lpprint_h | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #include "lptree.h" | ||||||
|  | #include "lpvm.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #if defined(LPEG_DEBUG) | ||||||
|  | 
 | ||||||
|  | void printpatt (Instruction *p, int n); | ||||||
|  | void printtree (TTree *tree, int ident); | ||||||
|  | void printktable (lua_State *L, int idx); | ||||||
|  | void printcharset (const byte *st); | ||||||
|  | void printcaplist (Capture *cap, Capture *limit); | ||||||
|  | void printinst (const Instruction *op, const Instruction *p); | ||||||
|  | 
 | ||||||
|  | #else | ||||||
|  | 
 | ||||||
|  | #define printktable(L,idx)  \ | ||||||
|  | 	luaL_error(L, "function only implemented in debug mode") | ||||||
|  | #define printtree(tree,i)  \ | ||||||
|  | 	luaL_error(L, "function only implemented in debug mode") | ||||||
|  | #define printpatt(p,n)  \ | ||||||
|  | 	luaL_error(L, "function only implemented in debug mode") | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,82 @@ | ||||||
|  | /*  
 | ||||||
|  | ** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #if !defined(lptree_h) | ||||||
|  | #define lptree_h | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #include "lptypes.h"  | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** types of trees | ||||||
|  | */ | ||||||
|  | typedef enum TTag { | ||||||
|  |   TChar = 0,  /* 'n' = char */ | ||||||
|  |   TSet,  /* the set is stored in next CHARSETSIZE bytes */ | ||||||
|  |   TAny, | ||||||
|  |   TTrue, | ||||||
|  |   TFalse, | ||||||
|  |   TRep,  /* 'sib1'* */ | ||||||
|  |   TSeq,  /* 'sib1' 'sib2' */ | ||||||
|  |   TChoice,  /* 'sib1' / 'sib2' */ | ||||||
|  |   TNot,  /* !'sib1' */ | ||||||
|  |   TAnd,  /* &'sib1' */ | ||||||
|  |   TCall,  /* ktable[key] is rule's key; 'sib2' is rule being called */ | ||||||
|  |   TOpenCall,  /* ktable[key] is rule's key */ | ||||||
|  |   TRule,  /* ktable[key] is rule's key (but key == 0 for unused rules);
 | ||||||
|  |              'sib1' is rule's pattern; | ||||||
|  |              'sib2' is next rule; 'cap' is rule's sequential number */ | ||||||
|  |   TGrammar,  /* 'sib1' is initial (and first) rule */ | ||||||
|  |   TBehind,  /* 'sib1' is pattern, 'n' is how much to go back */ | ||||||
|  |   TCapture,  /* captures: 'cap' is kind of capture (enum 'CapKind');
 | ||||||
|  |                 ktable[key] is Lua value associated with capture; | ||||||
|  |                 'sib1' is capture body */ | ||||||
|  |   TRunTime  /* run-time capture: 'key' is Lua function;
 | ||||||
|  |                'sib1' is capture body */ | ||||||
|  | } TTag; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Tree trees | ||||||
|  | ** The first child of a tree (if there is one) is immediately after | ||||||
|  | ** the tree.  A reference to a second child (ps) is its position | ||||||
|  | ** relative to the position of the tree itself. | ||||||
|  | */ | ||||||
|  | typedef struct TTree { | ||||||
|  |   byte tag; | ||||||
|  |   byte cap;  /* kind of capture (if it is a capture) */ | ||||||
|  |   unsigned short key;  /* key in ktable for Lua data (0 if no key) */ | ||||||
|  |   union { | ||||||
|  |     int ps;  /* occasional second child */ | ||||||
|  |     int n;  /* occasional counter */ | ||||||
|  |   } u; | ||||||
|  | } TTree; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** A complete pattern has its tree plus, if already compiled, | ||||||
|  | ** its corresponding code | ||||||
|  | */ | ||||||
|  | typedef struct Pattern { | ||||||
|  |   union Instruction *code; | ||||||
|  |   int codesize; | ||||||
|  |   TTree tree[1]; | ||||||
|  | } Pattern; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* number of children for each tree */ | ||||||
|  | extern const byte numsiblings[]; | ||||||
|  | 
 | ||||||
|  | /* access to children */ | ||||||
|  | #define sib1(t)         ((t) + 1) | ||||||
|  | #define sib2(t)         ((t) + (t)->u.ps) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,149 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ | ||||||
|  | ** LPeg - PEG pattern matching for Lua | ||||||
|  | ** Copyright 2007-2017, Lua.org & PUC-Rio  (see 'lpeg.html' for license) | ||||||
|  | ** written by Roberto Ierusalimschy | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #if !defined(lptypes_h) | ||||||
|  | #define lptypes_h | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #if !defined(LPEG_DEBUG) | ||||||
|  | #define NDEBUG | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #include <assert.h> | ||||||
|  | #include <limits.h> | ||||||
|  | 
 | ||||||
|  | #include "lua.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define VERSION         "1.0.1" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define PATTERN_T	"lpeg-pattern" | ||||||
|  | #define MAXSTACKIDX	"lpeg-maxstack" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** compatibility with Lua 5.1 | ||||||
|  | */ | ||||||
|  | #if (LUA_VERSION_NUM == 501) | ||||||
|  | 
 | ||||||
|  | #define lp_equal	lua_equal | ||||||
|  | 
 | ||||||
|  | #define lua_getuservalue	lua_getfenv | ||||||
|  | #define lua_setuservalue	lua_setfenv | ||||||
|  | 
 | ||||||
|  | #define lua_rawlen		lua_objlen | ||||||
|  | 
 | ||||||
|  | #define luaL_setfuncs(L,f,n)	luaL_register(L,NULL,f) | ||||||
|  | #define luaL_newlib(L,f)	luaL_register(L,"lpeg",f) | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #if !defined(lp_equal) | ||||||
|  | #define lp_equal(L,idx1,idx2)  lua_compare(L,(idx1),(idx2),LUA_OPEQ) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* default maximum size for call/backtrack stack */ | ||||||
|  | #if !defined(MAXBACK) | ||||||
|  | #define MAXBACK         400 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* maximum number of rules in a grammar (limited by 'unsigned char') */ | ||||||
|  | #if !defined(MAXRULES) | ||||||
|  | #define MAXRULES        250 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* initial size for capture's list */ | ||||||
|  | #define INITCAPSIZE	32 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* index, on Lua stack, for subject */ | ||||||
|  | #define SUBJIDX		2 | ||||||
|  | 
 | ||||||
|  | /* number of fixed arguments to 'match' (before capture arguments) */ | ||||||
|  | #define FIXEDARGS	3 | ||||||
|  | 
 | ||||||
|  | /* index, on Lua stack, for capture list */ | ||||||
|  | #define caplistidx(ptop)	((ptop) + 2) | ||||||
|  | 
 | ||||||
|  | /* index, on Lua stack, for pattern's ktable */ | ||||||
|  | #define ktableidx(ptop)		((ptop) + 3) | ||||||
|  | 
 | ||||||
|  | /* index, on Lua stack, for backtracking stack */ | ||||||
|  | #define stackidx(ptop)	((ptop) + 4) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | typedef unsigned char byte; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define BITSPERCHAR		8 | ||||||
|  | 
 | ||||||
|  | #define CHARSETSIZE		((UCHAR_MAX/BITSPERCHAR) + 1) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | typedef struct Charset { | ||||||
|  |   byte cs[CHARSETSIZE]; | ||||||
|  | } Charset; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define loopset(v,b)    { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } | ||||||
|  | 
 | ||||||
|  | /* access to charset */ | ||||||
|  | #define treebuffer(t)      ((byte *)((t) + 1)) | ||||||
|  | 
 | ||||||
|  | /* number of slots needed for 'n' bytes */ | ||||||
|  | #define bytes2slots(n)  (((n) - 1) / sizeof(TTree) + 1) | ||||||
|  | 
 | ||||||
|  | /* set 'b' bit in charset 'cs' */ | ||||||
|  | #define setchar(cs,b)   ((cs)[(b) >> 3] |= (1 << ((b) & 7))) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** in capture instructions, 'kind' of capture and its offset are | ||||||
|  | ** packed in field 'aux', 4 bits for each | ||||||
|  | */ | ||||||
|  | #define getkind(op)		((op)->i.aux & 0xF) | ||||||
|  | #define getoff(op)		(((op)->i.aux >> 4) & 0xF) | ||||||
|  | #define joinkindoff(k,o)	((k) | ((o) << 4)) | ||||||
|  | 
 | ||||||
|  | #define MAXOFF		0xF | ||||||
|  | #define MAXAUX		0xFF | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* maximum number of bytes to look behind */ | ||||||
|  | #define MAXBEHIND	MAXAUX | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* maximum size (in elements) for a pattern */ | ||||||
|  | #define MAXPATTSIZE	(SHRT_MAX - 10) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* size (in elements) for an instruction plus extra l bytes */ | ||||||
|  | #define instsize(l)  (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* size (in elements) for a ISet instruction */ | ||||||
|  | #define CHARSETINSTSIZE		instsize(CHARSETSIZE) | ||||||
|  | 
 | ||||||
|  | /* size (in elements) for a IFunc instruction */ | ||||||
|  | #define funcinstsize(p)		((p)->i.aux + 2) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define testchar(st,c)	(((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,364 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ | ||||||
|  | ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license) | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #include <limits.h> | ||||||
|  | #include <string.h> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #include "lua.h" | ||||||
|  | #include "lauxlib.h" | ||||||
|  | 
 | ||||||
|  | #include "lpcap.h" | ||||||
|  | #include "lptypes.h" | ||||||
|  | #include "lpvm.h" | ||||||
|  | #include "lpprint.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* initial size for call/backtrack stack */ | ||||||
|  | #if !defined(INITBACK) | ||||||
|  | #define INITBACK	MAXBACK | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define getoffset(p)	(((p) + 1)->offset) | ||||||
|  | 
 | ||||||
|  | static const Instruction giveup = {{IGiveup, 0, 0}}; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** {====================================================== | ||||||
|  | ** Virtual Machine | ||||||
|  | ** ======================================================= | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | typedef struct Stack { | ||||||
|  |   const char *s;  /* saved position (or NULL for calls) */ | ||||||
|  |   const Instruction *p;  /* next instruction */ | ||||||
|  |   int caplevel; | ||||||
|  | } Stack; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #define getstackbase(L, ptop)	((Stack *)lua_touserdata(L, stackidx(ptop))) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Make the size of the array of captures 'cap' twice as large as needed | ||||||
|  | ** (which is 'captop'). ('n' is the number of new elements.) | ||||||
|  | */ | ||||||
|  | static Capture *doublecap (lua_State *L, Capture *cap, int captop, | ||||||
|  |                                          int n, int ptop) { | ||||||
|  |   Capture *newc; | ||||||
|  |   if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | ||||||
|  |     luaL_error(L, "too many captures"); | ||||||
|  |   newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | ||||||
|  |   memcpy(newc, cap, (captop - n) * sizeof(Capture)); | ||||||
|  |   lua_replace(L, caplistidx(ptop)); | ||||||
|  |   return newc; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Double the size of the stack | ||||||
|  | */ | ||||||
|  | static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { | ||||||
|  |   Stack *stack = getstackbase(L, ptop); | ||||||
|  |   Stack *newstack; | ||||||
|  |   int n = *stacklimit - stack;  /* current stack size */ | ||||||
|  |   int max, newn; | ||||||
|  |   lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||||||
|  |   max = lua_tointeger(L, -1);  /* maximum allowed size */ | ||||||
|  |   lua_pop(L, 1); | ||||||
|  |   if (n >= max)  /* already at maximum size? */ | ||||||
|  |     luaL_error(L, "backtrack stack overflow (current limit is %d)", max); | ||||||
|  |   newn = 2 * n;  /* new size */ | ||||||
|  |   if (newn > max) newn = max; | ||||||
|  |   newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); | ||||||
|  |   memcpy(newstack, stack, n * sizeof(Stack)); | ||||||
|  |   lua_replace(L, stackidx(ptop)); | ||||||
|  |   *stacklimit = newstack + newn; | ||||||
|  |   return newstack + n;  /* return next position */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Interpret the result of a dynamic capture: false -> fail; | ||||||
|  | ** true -> keep current position; number -> next position. | ||||||
|  | ** Return new subject position. 'fr' is stack index where | ||||||
|  | ** is the result; 'curr' is current subject position; 'limit' | ||||||
|  | ** is subject's size. | ||||||
|  | */ | ||||||
|  | static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | ||||||
|  |   lua_Integer res; | ||||||
|  |   if (!lua_toboolean(L, fr)) {  /* false value? */ | ||||||
|  |     lua_settop(L, fr - 1);  /* remove results */ | ||||||
|  |     return -1;  /* and fail */ | ||||||
|  |   } | ||||||
|  |   else if (lua_isboolean(L, fr))  /* true? */ | ||||||
|  |     res = curr;  /* keep current position */ | ||||||
|  |   else { | ||||||
|  |     res = lua_tointeger(L, fr) - 1;  /* new position */ | ||||||
|  |     if (res < curr || res > limit) | ||||||
|  |       luaL_error(L, "invalid position returned by match-time capture"); | ||||||
|  |   } | ||||||
|  |   lua_remove(L, fr);  /* remove first result (offset) */ | ||||||
|  |   return res; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Add capture values returned by a dynamic capture to the capture list | ||||||
|  | ** 'base', nested inside a group capture. 'fd' indexes the first capture | ||||||
|  | ** value, 'n' is the number of values (at least 1). | ||||||
|  | */ | ||||||
|  | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | ||||||
|  |   int i; | ||||||
|  |   base[0].kind = Cgroup;  /* create group capture */ | ||||||
|  |   base[0].siz = 0; | ||||||
|  |   base[0].idx = 0;  /* make it an anonymous group */ | ||||||
|  |   for (i = 1; i <= n; i++) {  /* add runtime captures */ | ||||||
|  |     base[i].kind = Cruntime; | ||||||
|  |     base[i].siz = 1;  /* mark it as closed */ | ||||||
|  |     base[i].idx = fd + i - 1;  /* stack index of capture value */ | ||||||
|  |     base[i].s = s; | ||||||
|  |   } | ||||||
|  |   base[i].kind = Cclose;  /* close group */ | ||||||
|  |   base[i].siz = 1; | ||||||
|  |   base[i].s = s; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Remove dynamic captures from the Lua stack (called in case of failure) | ||||||
|  | */ | ||||||
|  | static int removedyncap (lua_State *L, Capture *capture, | ||||||
|  |                          int level, int last) { | ||||||
|  |   int id = finddyncap(capture + level, capture + last);  /* index of 1st cap. */ | ||||||
|  |   int top = lua_gettop(L); | ||||||
|  |   if (id == 0) return 0;  /* no dynamic captures? */ | ||||||
|  |   lua_settop(L, id - 1);  /* remove captures */ | ||||||
|  |   return top - id + 1;  /* number of values removed */ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  | ** Opcode interpreter | ||||||
|  | */ | ||||||
|  | const char *match (lua_State *L, const char *o, const char *s, const char *e, | ||||||
|  |                    Instruction *op, Capture *capture, int ptop) { | ||||||
|  |   Stack stackbase[INITBACK]; | ||||||
|  |   Stack *stacklimit = stackbase + INITBACK; | ||||||
|  |   Stack *stack = stackbase;  /* point to first empty slot in stack */ | ||||||
|  |   int capsize = INITCAPSIZE; | ||||||
|  |   int captop = 0;  /* point to first empty slot in captures */ | ||||||
|  |   int ndyncap = 0;  /* number of dynamic captures (in Lua stack) */ | ||||||
|  |   const Instruction *p = op;  /* current instruction */ | ||||||
|  |   stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; | ||||||
|  |   lua_pushlightuserdata(L, stackbase); | ||||||
|  |   for (;;) { | ||||||
|  | #if defined(DEBUG) | ||||||
|  |       printf("-------------------------------------\n"); | ||||||
|  |       printcaplist(capture, capture + captop); | ||||||
|  |       printf("s: |%s| stck:%d, dyncaps:%d, caps:%d  ", | ||||||
|  |              s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | ||||||
|  |       printinst(op, p); | ||||||
|  | #endif | ||||||
|  |     assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | ||||||
|  |     switch ((Opcode)p->i.code) { | ||||||
|  |       case IEnd: { | ||||||
|  |         assert(stack == getstackbase(L, ptop) + 1); | ||||||
|  |         capture[captop].kind = Cclose; | ||||||
|  |         capture[captop].s = NULL; | ||||||
|  |         return s; | ||||||
|  |       } | ||||||
|  |       case IGiveup: { | ||||||
|  |         assert(stack == getstackbase(L, ptop)); | ||||||
|  |         return NULL; | ||||||
|  |       } | ||||||
|  |       case IRet: { | ||||||
|  |         assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); | ||||||
|  |         p = (--stack)->p; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IAny: { | ||||||
|  |         if (s < e) { p++; s++; } | ||||||
|  |         else goto fail; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ITestAny: { | ||||||
|  |         if (s < e) p += 2; | ||||||
|  |         else p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IChar: { | ||||||
|  |         if ((byte)*s == p->i.aux && s < e) { p++; s++; } | ||||||
|  |         else goto fail; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ITestChar: { | ||||||
|  |         if ((byte)*s == p->i.aux && s < e) p += 2; | ||||||
|  |         else p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ISet: { | ||||||
|  |         int c = (byte)*s; | ||||||
|  |         if (testchar((p+1)->buff, c) && s < e) | ||||||
|  |           { p += CHARSETINSTSIZE; s++; } | ||||||
|  |         else goto fail; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ITestSet: { | ||||||
|  |         int c = (byte)*s; | ||||||
|  |         if (testchar((p + 2)->buff, c) && s < e) | ||||||
|  |           p += 1 + CHARSETINSTSIZE; | ||||||
|  |         else p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IBehind: { | ||||||
|  |         int n = p->i.aux; | ||||||
|  |         if (n > s - o) goto fail; | ||||||
|  |         s -= n; p++; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ISpan: { | ||||||
|  |         for (; s < e; s++) { | ||||||
|  |           int c = (byte)*s; | ||||||
|  |           if (!testchar((p+1)->buff, c)) break; | ||||||
|  |         } | ||||||
|  |         p += CHARSETINSTSIZE; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IJmp: { | ||||||
|  |         p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IChoice: { | ||||||
|  |         if (stack == stacklimit) | ||||||
|  |           stack = doublestack(L, &stacklimit, ptop); | ||||||
|  |         stack->p = p + getoffset(p); | ||||||
|  |         stack->s = s; | ||||||
|  |         stack->caplevel = captop; | ||||||
|  |         stack++; | ||||||
|  |         p += 2; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ICall: { | ||||||
|  |         if (stack == stacklimit) | ||||||
|  |           stack = doublestack(L, &stacklimit, ptop); | ||||||
|  |         stack->s = NULL; | ||||||
|  |         stack->p = p + 2;  /* save return address */ | ||||||
|  |         stack++; | ||||||
|  |         p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ICommit: { | ||||||
|  |         assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||||||
|  |         stack--; | ||||||
|  |         p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IPartialCommit: { | ||||||
|  |         assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||||||
|  |         (stack - 1)->s = s; | ||||||
|  |         (stack - 1)->caplevel = captop; | ||||||
|  |         p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IBackCommit: { | ||||||
|  |         assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||||||
|  |         s = (--stack)->s; | ||||||
|  |         captop = stack->caplevel; | ||||||
|  |         p += getoffset(p); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case IFailTwice: | ||||||
|  |         assert(stack > getstackbase(L, ptop)); | ||||||
|  |         stack--; | ||||||
|  |         /* go through */ | ||||||
|  |       case IFail: | ||||||
|  |       fail: { /* pattern failed: try to backtrack */ | ||||||
|  |         do {  /* remove pending calls */ | ||||||
|  |           assert(stack > getstackbase(L, ptop)); | ||||||
|  |           s = (--stack)->s; | ||||||
|  |         } while (s == NULL); | ||||||
|  |         if (ndyncap > 0)  /* is there matchtime captures? */ | ||||||
|  |           ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | ||||||
|  |         captop = stack->caplevel; | ||||||
|  |         p = stack->p; | ||||||
|  | #if defined(DEBUG) | ||||||
|  |         printf("**FAIL**\n"); | ||||||
|  | #endif | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ICloseRunTime: { | ||||||
|  |         CapState cs; | ||||||
|  |         int rem, res, n; | ||||||
|  |         int fr = lua_gettop(L) + 1;  /* stack index of first result */ | ||||||
|  |         cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | ||||||
|  |         n = runtimecap(&cs, capture + captop, s, &rem);  /* call function */ | ||||||
|  |         captop -= n;  /* remove nested captures */ | ||||||
|  |         ndyncap -= rem;  /* update number of dynamic captures */ | ||||||
|  |         fr -= rem;  /* 'rem' items were popped from Lua stack */ | ||||||
|  |         res = resdyncaptures(L, fr, s - o, e - o);  /* get result */ | ||||||
|  |         if (res == -1)  /* fail? */ | ||||||
|  |           goto fail; | ||||||
|  |         s = o + res;  /* else update current position */ | ||||||
|  |         n = lua_gettop(L) - fr + 1;  /* number of new captures */ | ||||||
|  |         ndyncap += n;  /* update number of dynamic captures */ | ||||||
|  |         if (n > 0) {  /* any new capture? */ | ||||||
|  |           if (fr + n >= SHRT_MAX) | ||||||
|  |             luaL_error(L, "too many results in match-time capture"); | ||||||
|  |           if ((captop += n + 2) >= capsize) { | ||||||
|  |             capture = doublecap(L, capture, captop, n + 2, ptop); | ||||||
|  |             capsize = 2 * captop; | ||||||
|  |           } | ||||||
|  |           /* add new captures to 'capture' list */ | ||||||
|  |           adddyncaptures(s, capture + captop - n - 2, n, fr);  | ||||||
|  |         } | ||||||
|  |         p++; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       case ICloseCapture: { | ||||||
|  |         const char *s1 = s; | ||||||
|  |         assert(captop > 0); | ||||||
|  |         /* if possible, turn capture into a full capture */ | ||||||
|  |         if (capture[captop - 1].siz == 0 && | ||||||
|  |             s1 - capture[captop - 1].s < UCHAR_MAX) { | ||||||
|  |           capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | ||||||
|  |           p++; | ||||||
|  |           continue; | ||||||
|  |         } | ||||||
|  |         else { | ||||||
|  |           capture[captop].siz = 1;  /* mark entry as closed */ | ||||||
|  |           capture[captop].s = s; | ||||||
|  |           goto pushcapture; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |       case IOpenCapture: | ||||||
|  |         capture[captop].siz = 0;  /* mark entry as open */ | ||||||
|  |         capture[captop].s = s; | ||||||
|  |         goto pushcapture; | ||||||
|  |       case IFullCapture: | ||||||
|  |         capture[captop].siz = getoff(p) + 1;  /* save capture size */ | ||||||
|  |         capture[captop].s = s - getoff(p); | ||||||
|  |         /* goto pushcapture; */ | ||||||
|  |       pushcapture: { | ||||||
|  |         capture[captop].idx = p->i.key; | ||||||
|  |         capture[captop].kind = getkind(p); | ||||||
|  |         if (++captop >= capsize) { | ||||||
|  |           capture = doublecap(L, capture, captop, 0, ptop); | ||||||
|  |           capsize = 2 * captop; | ||||||
|  |         } | ||||||
|  |         p++; | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |       default: assert(0); return NULL; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* }====================================================== */ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,58 @@ | ||||||
|  | /*
 | ||||||
|  | ** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $ | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | #if !defined(lpvm_h) | ||||||
|  | #define lpvm_h | ||||||
|  | 
 | ||||||
|  | #include "lpcap.h" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | /* Virtual Machine's instructions */ | ||||||
|  | typedef enum Opcode { | ||||||
|  |   IAny, /* if no char, fail */ | ||||||
|  |   IChar,  /* if char != aux, fail */ | ||||||
|  |   ISet,  /* if char not in buff, fail */ | ||||||
|  |   ITestAny,  /* in no char, jump to 'offset' */ | ||||||
|  |   ITestChar,  /* if char != aux, jump to 'offset' */ | ||||||
|  |   ITestSet,  /* if char not in buff, jump to 'offset' */ | ||||||
|  |   ISpan,  /* read a span of chars in buff */ | ||||||
|  |   IBehind,  /* walk back 'aux' characters (fail if not possible) */ | ||||||
|  |   IRet,  /* return from a rule */ | ||||||
|  |   IEnd,  /* end of pattern */ | ||||||
|  |   IChoice,  /* stack a choice; next fail will jump to 'offset' */ | ||||||
|  |   IJmp,  /* jump to 'offset' */ | ||||||
|  |   ICall,  /* call rule at 'offset' */ | ||||||
|  |   IOpenCall,  /* call rule number 'key' (must be closed to a ICall) */ | ||||||
|  |   ICommit,  /* pop choice and jump to 'offset' */ | ||||||
|  |   IPartialCommit,  /* update top choice to current position and jump */ | ||||||
|  |   IBackCommit,  /* "fails" but jump to its own 'offset' */ | ||||||
|  |   IFailTwice,  /* pop one choice and then fail */ | ||||||
|  |   IFail,  /* go back to saved state on choice and jump to saved offset */ | ||||||
|  |   IGiveup,  /* internal use */ | ||||||
|  |   IFullCapture,  /* complete capture of last 'off' chars */ | ||||||
|  |   IOpenCapture,  /* start a capture */ | ||||||
|  |   ICloseCapture, | ||||||
|  |   ICloseRunTime | ||||||
|  | } Opcode; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | typedef union Instruction { | ||||||
|  |   struct Inst { | ||||||
|  |     byte code; | ||||||
|  |     byte aux; | ||||||
|  |     short key; | ||||||
|  |   } i; | ||||||
|  |   int offset; | ||||||
|  |   byte buff[1]; | ||||||
|  | } Instruction; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | void printpatt (Instruction *p, int n); | ||||||
|  | const char *match (lua_State *L, const char *o, const char *s, const char *e, | ||||||
|  |                    Instruction *op, Capture *capture, int ptop); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,55 @@ | ||||||
|  | LIBNAME = lpeg | ||||||
|  | LUADIR = ../lua/ | ||||||
|  | 
 | ||||||
|  | COPT = -O2 | ||||||
|  | # COPT = -DLPEG_DEBUG -g
 | ||||||
|  | 
 | ||||||
|  | CWARNS = -Wall -Wextra -pedantic \
 | ||||||
|  | 	-Waggregate-return \
 | ||||||
|  | 	-Wcast-align \
 | ||||||
|  | 	-Wcast-qual \
 | ||||||
|  | 	-Wdisabled-optimization \
 | ||||||
|  | 	-Wpointer-arith \
 | ||||||
|  | 	-Wshadow \
 | ||||||
|  | 	-Wsign-compare \
 | ||||||
|  | 	-Wundef \
 | ||||||
|  | 	-Wwrite-strings \
 | ||||||
|  | 	-Wbad-function-cast \
 | ||||||
|  | 	-Wdeclaration-after-statement \
 | ||||||
|  | 	-Wmissing-prototypes \
 | ||||||
|  | 	-Wnested-externs \
 | ||||||
|  | 	-Wstrict-prototypes \
 | ||||||
|  | # -Wunreachable-code \
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC | ||||||
|  | CC = gcc | ||||||
|  | 
 | ||||||
|  | FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o | ||||||
|  | 
 | ||||||
|  | # For Linux
 | ||||||
|  | linux: | ||||||
|  | 	make lpeg.so "DLLFLAGS = -shared -fPIC" | ||||||
|  | 
 | ||||||
|  | # For Mac OS
 | ||||||
|  | macosx: | ||||||
|  | 	make lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup" | ||||||
|  | 
 | ||||||
|  | lpeg.so: $(FILES) | ||||||
|  | 	env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so | ||||||
|  | 
 | ||||||
|  | $(FILES): makefile | ||||||
|  | 
 | ||||||
|  | test: test.lua re.lua lpeg.so | ||||||
|  | 	./test.lua | ||||||
|  | 
 | ||||||
|  | clean: | ||||||
|  | 	rm -f $(FILES) lpeg.so | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | lpcap.o: lpcap.c lpcap.h lptypes.h | ||||||
|  | lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h | ||||||
|  | lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h | ||||||
|  | lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h | ||||||
|  | lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,498 @@ | ||||||
|  | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | ||||||
|  |    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||||||
|  | <html> | ||||||
|  | <head> | ||||||
|  |     <title>LPeg.re - Regex syntax for LPEG</title> | ||||||
|  |     <link rel="stylesheet" | ||||||
|  |           href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css" | ||||||
|  |           type="text/css"/> | ||||||
|  | 	<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> | ||||||
|  | </head> | ||||||
|  | <body> | ||||||
|  | 
 | ||||||
|  | <!-- $Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $ --> | ||||||
|  | 
 | ||||||
|  | <div id="container"> | ||||||
|  | 	 | ||||||
|  | <div id="product"> | ||||||
|  |   <div id="product_logo"> | ||||||
|  |     <a href="http://www.inf.puc-rio.br/~roberto/lpeg/"> | ||||||
|  |     <img alt="LPeg logo" src="lpeg-128.gif"/> | ||||||
|  |     </a> | ||||||
|  |   </div> | ||||||
|  |   <div id="product_name"><big><strong>LPeg.re</strong></big></div> | ||||||
|  |   <div id="product_description"> | ||||||
|  |      Regex syntax for LPEG | ||||||
|  |   </div> | ||||||
|  | </div> <!-- id="product" --> | ||||||
|  | 
 | ||||||
|  | <div id="main"> | ||||||
|  | 	 | ||||||
|  | <div id="navigation"> | ||||||
|  | <h1>re</h1> | ||||||
|  | 
 | ||||||
|  | <ul> | ||||||
|  |   <li><a href="#basic">Basic Constructions</a></li> | ||||||
|  |   <li><a href="#func">Functions</a></li> | ||||||
|  |   <li><a href="#ex">Some Examples</a></li> | ||||||
|  |   <li><a href="#license">License</a></li> | ||||||
|  |   </ul> | ||||||
|  |   </li> | ||||||
|  | </ul> | ||||||
|  | </div> <!-- id="navigation" --> | ||||||
|  | 
 | ||||||
|  | <div id="content"> | ||||||
|  | 
 | ||||||
|  | <h2><a name="basic"></a>The <code>re</code> Module</h2> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | The <code>re</code> module | ||||||
|  | (provided by file <code>re.lua</code> in the distribution) | ||||||
|  | supports a somewhat conventional regex syntax | ||||||
|  | for pattern usage within <a href="lpeg.html">LPeg</a>. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | The next table summarizes <code>re</code>'s syntax. | ||||||
|  | A <code>p</code> represents an arbitrary pattern; | ||||||
|  | <code>num</code> represents a number (<code>[0-9]+</code>); | ||||||
|  | <code>name</code> represents an identifier | ||||||
|  | (<code>[a-zA-Z][a-zA-Z0-9_]*</code>). | ||||||
|  | Constructions are listed in order of decreasing precedence. | ||||||
|  | <table border="1"> | ||||||
|  | <tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr> | ||||||
|  | <tr><td><code>( p )</code></td> <td>grouping</td></tr> | ||||||
|  | <tr><td><code>'string'</code></td> <td>literal string</td></tr> | ||||||
|  | <tr><td><code>"string"</code></td> <td>literal string</td></tr> | ||||||
|  | <tr><td><code>[class]</code></td> <td>character class</td></tr> | ||||||
|  | <tr><td><code>.</code></td> <td>any character</td></tr> | ||||||
|  | <tr><td><code>%name</code></td> | ||||||
|  |   <td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr> | ||||||
|  | <tr><td><code>name</code></td><td>non terminal</td></tr> | ||||||
|  | <tr><td><code><name></code></td><td>non terminal</td></tr> | ||||||
|  | <tr><td><code>{}</code></td> <td>position capture</td></tr> | ||||||
|  | <tr><td><code>{ p }</code></td> <td>simple capture</td></tr> | ||||||
|  | <tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr> | ||||||
|  | <tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr> | ||||||
|  | <tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr> | ||||||
|  | <tr><td><code>{| p |}</code></td> <td>table capture</td></tr> | ||||||
|  | <tr><td><code>=name</code></td> <td>back reference | ||||||
|  | </td></tr> | ||||||
|  | <tr><td><code>p ?</code></td> <td>optional match</td></tr> | ||||||
|  | <tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr> | ||||||
|  | <tr><td><code>p +</code></td> <td>one or more repetitions</td></tr> | ||||||
|  | <tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr> | ||||||
|  | <tr><td><code>p^+num</code></td> | ||||||
|  |       <td>at least <code>n</code> repetitions</td></tr> | ||||||
|  | <tr><td><code>p^-num</code></td> | ||||||
|  |       <td>at most <code>n</code> repetitions</td></tr> | ||||||
|  | <tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr> | ||||||
|  | <tr><td><code>p -> "string"</code></td> <td>string capture</td></tr> | ||||||
|  | <tr><td><code>p -> num</code></td> <td>numbered capture</td></tr> | ||||||
|  | <tr><td><code>p -> name</code></td> <td>function/query/string capture | ||||||
|  | equivalent to <code>p / defs[name]</code></td></tr> | ||||||
|  | <tr><td><code>p => name</code></td> <td>match-time capture | ||||||
|  | equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr> | ||||||
|  | <tr><td><code>& p</code></td> <td>and predicate</td></tr> | ||||||
|  | <tr><td><code>! p</code></td> <td>not predicate</td></tr> | ||||||
|  | <tr><td><code>p1 p2</code></td> <td>concatenation</td></tr> | ||||||
|  | <tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr> | ||||||
|  | <tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr> | ||||||
|  | </tbody></table> | ||||||
|  | <p> | ||||||
|  | Any space appearing in a syntax description can be | ||||||
|  | replaced by zero or more space characters and Lua-style comments | ||||||
|  | (<code>--</code> until end of line). | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | Character classes define sets of characters. | ||||||
|  | An initial <code>^</code> complements the resulting set. | ||||||
|  | A range <em>x</em><code>-</code><em>y</em> includes in the set | ||||||
|  | all characters with codes between the codes of <em>x</em> and <em>y</em>. | ||||||
|  | A pre-defined class <code>%</code><em>name</em> includes all | ||||||
|  | characters of that class. | ||||||
|  | A simple character includes itself in the set. | ||||||
|  | The only special characters inside a class are <code>^</code> | ||||||
|  | (special only if it is the first character); | ||||||
|  | <code>]</code> | ||||||
|  | (can be included in the set as the first character, | ||||||
|  | after the optional <code>^</code>); | ||||||
|  | <code>%</code> (special only if followed by a letter); | ||||||
|  | and <code>-</code> | ||||||
|  | (can be included in the set as the first or the last character). | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | Currently the pre-defined classes are similar to those from the | ||||||
|  | Lua's string library | ||||||
|  | (<code>%a</code> for letters, | ||||||
|  | <code>%A</code> for non letters, etc.). | ||||||
|  | There is also a class <code>%nl</code> | ||||||
|  | containing only the newline character, | ||||||
|  | which is particularly handy for grammars written inside long strings, | ||||||
|  | as long strings do not interpret escape sequences like <code>\n</code>. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | <h2><a name="func">Functions</a></h2> | ||||||
|  | 
 | ||||||
|  | <h3><code>re.compile (string, [, defs])</code></h3> | ||||||
|  | <p> | ||||||
|  | Compiles the given string and | ||||||
|  | returns an equivalent LPeg pattern. | ||||||
|  | The given string may define either an expression or a grammar. | ||||||
|  | The optional <code>defs</code> table provides extra Lua values | ||||||
|  | to be used by the pattern. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <h3><code>re.find (subject, pattern [, init])</code></h3> | ||||||
|  | <p> | ||||||
|  | Searches the given pattern in the given subject. | ||||||
|  | If it finds a match, | ||||||
|  | returns the index where this occurrence starts and | ||||||
|  | the index where it ends. | ||||||
|  | Otherwise, returns nil. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | An optional numeric argument <code>init</code> makes the search | ||||||
|  | starts at that position in the subject string. | ||||||
|  | As usual in Lua libraries, | ||||||
|  | a negative value counts from the end. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <h3><code>re.gsub (subject, pattern, replacement)</code></h3> | ||||||
|  | <p> | ||||||
|  | Does a <em>global substitution</em>, | ||||||
|  | replacing all occurrences of <code>pattern</code> | ||||||
|  | in the given <code>subject</code> by <code>replacement</code>. | ||||||
|  | 
 | ||||||
|  | <h3><code>re.match (subject, pattern)</code></h3> | ||||||
|  | <p> | ||||||
|  | Matches the given pattern against the given subject, | ||||||
|  | returning all captures. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <h3><code>re.updatelocale ()</code></h3> | ||||||
|  | <p> | ||||||
|  | Updates the pre-defined character classes to the current locale. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | <h2><a name="ex">Some Examples</a></h2> | ||||||
|  | 
 | ||||||
|  | <h3>A complete simple program</h3> | ||||||
|  | <p> | ||||||
|  | The next code shows a simple complete Lua program using | ||||||
|  | the <code>re</code> module: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | local re = require"re" | ||||||
|  | 
 | ||||||
|  | -- find the position of the first numeral in a string | ||||||
|  | print(re.find("the number 423 is odd", "[0-9]+"))  --> 12    14 | ||||||
|  | 
 | ||||||
|  | -- returns all words in a string | ||||||
|  | print(re.match("the number 423 is odd", "({%a+} / .)*")) | ||||||
|  | --> the    number    is    odd | ||||||
|  | 
 | ||||||
|  | -- returns the first numeral in a string | ||||||
|  | print(re.match("the number 423 is odd", "s <- {%d+} / . s")) | ||||||
|  | --> 423 | ||||||
|  | 
 | ||||||
|  | print(re.gsub("hello World", "[aeiou]", ".")) | ||||||
|  | --> h.ll. W.rld | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | <h3>Balanced parentheses</h3> | ||||||
|  | <p> | ||||||
|  | The following call will produce the same pattern produced by the | ||||||
|  | Lua expression in the | ||||||
|  | <a href="lpeg.html#balanced">balanced parentheses</a> example: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | b = re.compile[[  balanced <- "(" ([^()] / balanced)* ")"  ]] | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | <h3>String reversal</h3> | ||||||
|  | <p> | ||||||
|  | The next example reverses a string: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']] | ||||||
|  | print(rev:match"0123456789")   --> 9876543210 | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | <h3>CSV decoder</h3> | ||||||
|  | <p> | ||||||
|  | The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | record = re.compile[[ | ||||||
|  |   record <- {| field (',' field)* |} (%nl / !.) | ||||||
|  |   field <- escaped / nonescaped | ||||||
|  |   nonescaped <- { [^,"%nl]* } | ||||||
|  |   escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"' | ||||||
|  | ]] | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | <h3>Lua's long strings</h3> | ||||||
|  | <p> | ||||||
|  | The next example matches Lua long strings: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | c = re.compile([[ | ||||||
|  |   longstring <- ('[' {:eq: '='* :} '[' close) | ||||||
|  |   close <- ']' =eq ']' / . close | ||||||
|  | ]]) | ||||||
|  | 
 | ||||||
|  | print(c:match'[==[]]===]]]]==]===[]')   --> 17 | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | <h3>Abstract Syntax Trees</h3> | ||||||
|  | <p> | ||||||
|  | This example shows a simple way to build an | ||||||
|  | abstract syntax tree (AST) for a given grammar. | ||||||
|  | To keep our example simple, | ||||||
|  | let us consider the following grammar | ||||||
|  | for lists of names: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | p = re.compile[[ | ||||||
|  |       listname <- (name s)* | ||||||
|  |       name <- [a-z][a-z]* | ||||||
|  |       s <- %s* | ||||||
|  | ]] | ||||||
|  | </pre> | ||||||
|  | <p> | ||||||
|  | Now, we will add captures to build a corresponding AST. | ||||||
|  | As a first step, the pattern will build a table to | ||||||
|  | represent each non terminal; | ||||||
|  | terminals will be represented by their corresponding strings: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | c = re.compile[[ | ||||||
|  |       listname <- {| (name s)* |} | ||||||
|  |       name <- {| {[a-z][a-z]*} |} | ||||||
|  |       s <- %s* | ||||||
|  | ]] | ||||||
|  | </pre> | ||||||
|  | <p> | ||||||
|  | Now, a match against <code>"hi hello bye"</code> | ||||||
|  | results in the table | ||||||
|  | <code>{{"hi"}, {"hello"}, {"bye"}}</code>. | ||||||
|  | </p> | ||||||
|  | <p> | ||||||
|  | For such a simple grammar, | ||||||
|  | this AST is more than enough; | ||||||
|  | actually, the tables around each single name | ||||||
|  | are already overkilling. | ||||||
|  | More complex grammars, | ||||||
|  | however, may need some more structure. | ||||||
|  | Specifically, | ||||||
|  | it would be useful if each table had | ||||||
|  | a <code>tag</code> field telling what non terminal | ||||||
|  | that table represents. | ||||||
|  | We can add such a tag using | ||||||
|  | <a href="lpeg.html#cap-g">named group captures</a>: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | x = re.compile[[ | ||||||
|  |       listname <- {| {:tag: '' -> 'list':} (name s)* |} | ||||||
|  |       name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |} | ||||||
|  |       s <- ' '* | ||||||
|  | ]] | ||||||
|  | </pre> | ||||||
|  | <p> | ||||||
|  | With these group captures, | ||||||
|  | a match against <code>"hi hello bye"</code> | ||||||
|  | results in the following table: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | {tag="list", | ||||||
|  |   {tag="id", "hi"}, | ||||||
|  |   {tag="id", "hello"}, | ||||||
|  |   {tag="id", "bye"} | ||||||
|  | } | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | <h3>Indented blocks</h3> | ||||||
|  | <p> | ||||||
|  | This example breaks indented blocks into tables, | ||||||
|  | respecting the indentation: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | p = re.compile[[ | ||||||
|  |   block <- {| {:ident:' '*:} line | ||||||
|  |            ((=ident !' ' line) / &(=ident ' ') block)* |} | ||||||
|  |   line <- {[^%nl]*} %nl | ||||||
|  | ]] | ||||||
|  | </pre> | ||||||
|  | <p> | ||||||
|  | As an example, | ||||||
|  | consider the following text: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | t = p:match[[ | ||||||
|  | first line | ||||||
|  |   subline 1 | ||||||
|  |   subline 2 | ||||||
|  | second line | ||||||
|  | third line | ||||||
|  |   subline 3.1 | ||||||
|  |     subline 3.1.1 | ||||||
|  |   subline 3.2 | ||||||
|  | ]] | ||||||
|  | </pre> | ||||||
|  | <p> | ||||||
|  | The resulting table <code>t</code> will be like this: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  |    {'first line'; {'subline 1'; 'subline 2'; ident = '  '}; | ||||||
|  |     'second line'; | ||||||
|  |     'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = '    '}; | ||||||
|  |                     'subline 3.2'; ident = '  '}; | ||||||
|  |     ident = ''} | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | <h3>Macro expander</h3> | ||||||
|  | <p> | ||||||
|  | This example implements a simple macro expander. | ||||||
|  | Macros must be defined as part of the pattern, | ||||||
|  | following some simple rules: | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | p = re.compile[[ | ||||||
|  |       text <- {~ item* ~} | ||||||
|  |       item <- macro / [^()] / '(' item* ')' | ||||||
|  |       arg <- ' '* {~ (!',' item)* ~} | ||||||
|  |       args <- '(' arg (',' arg)* ')' | ||||||
|  |       -- now we define some macros | ||||||
|  |       macro <- ('apply' args) -> '%1(%2)' | ||||||
|  |              / ('add' args) -> '%1 + %2' | ||||||
|  |              / ('mul' args) -> '%1 * %2' | ||||||
|  | ]] | ||||||
|  | 
 | ||||||
|  | print(p:match"add(mul(a,b), apply(f,x))")   --> a * b + f(x) | ||||||
|  | </pre> | ||||||
|  | <p> | ||||||
|  | A <code>text</code> is a sequence of items, | ||||||
|  | wherein we apply a substitution capture to expand any macros. | ||||||
|  | An <code>item</code> is either a macro, | ||||||
|  | any character different from parentheses, | ||||||
|  | or a parenthesized expression. | ||||||
|  | A macro argument (<code>arg</code>) is a sequence | ||||||
|  | of items different from a comma. | ||||||
|  | (Note that a comma may appear inside an item, | ||||||
|  | e.g., inside a parenthesized expression.) | ||||||
|  | Again we do a substitution capture to expand any macro | ||||||
|  | in the argument before expanding the outer macro. | ||||||
|  | <code>args</code> is a list of arguments separated by commas. | ||||||
|  | Finally we define the macros. | ||||||
|  | Each macro is a string substitution; | ||||||
|  | it replaces the macro name and its arguments by its corresponding string, | ||||||
|  | with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <h3>Patterns</h3> | ||||||
|  | <p> | ||||||
|  | This example shows the complete syntax | ||||||
|  | of patterns accepted by <code>re</code>. | ||||||
|  | </p> | ||||||
|  | <pre class="example"> | ||||||
|  | p = [=[ | ||||||
|  | 
 | ||||||
|  | pattern         <- exp !. | ||||||
|  | exp             <- S (grammar / alternative) | ||||||
|  | 
 | ||||||
|  | alternative     <- seq ('/' S seq)* | ||||||
|  | seq             <- prefix* | ||||||
|  | prefix          <- '&' S prefix / '!' S prefix / suffix | ||||||
|  | suffix          <- primary S (([+*?] | ||||||
|  |                             / '^' [+-]? num | ||||||
|  |                             / '->' S (string / '{}' / name) | ||||||
|  |                             / '=>' S name) S)* | ||||||
|  | 
 | ||||||
|  | primary         <- '(' exp ')' / string / class / defined | ||||||
|  |                  / '{:' (name ':')? exp ':}' | ||||||
|  |                  / '=' name | ||||||
|  |                  / '{}' | ||||||
|  |                  / '{~' exp '~}' | ||||||
|  |                  / '{' exp '}' | ||||||
|  |                  / '.' | ||||||
|  |                  / name S !arrow | ||||||
|  |                  / '<' name '>'          -- old-style non terminals | ||||||
|  | 
 | ||||||
|  | grammar         <- definition+ | ||||||
|  | definition      <- name S arrow exp | ||||||
|  | 
 | ||||||
|  | class           <- '[' '^'? item (!']' item)* ']' | ||||||
|  | item            <- defined / range / . | ||||||
|  | range           <- . '-' [^]] | ||||||
|  | 
 | ||||||
|  | S               <- (%s / '--' [^%nl]*)*   -- spaces and comments | ||||||
|  | name            <- [A-Za-z][A-Za-z0-9_]* | ||||||
|  | arrow           <- '<-' | ||||||
|  | num             <- [0-9]+ | ||||||
|  | string          <- '"' [^"]* '"' / "'" [^']* "'" | ||||||
|  | defined         <- '%' name | ||||||
|  | 
 | ||||||
|  | ]=] | ||||||
|  | 
 | ||||||
|  | print(re.match(p, p))   -- a self description must match itself | ||||||
|  | </pre> | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | <h2><a name="license">License</a></h2> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | Copyright © 2008-2015 Lua.org, PUC-Rio. | ||||||
|  | </p> | ||||||
|  | <p> | ||||||
|  | Permission is hereby granted, free of charge, | ||||||
|  | to any person obtaining a copy of this software and | ||||||
|  | associated documentation files (the "Software"), | ||||||
|  | to deal in the Software without restriction, | ||||||
|  | including without limitation the rights to use, | ||||||
|  | copy, modify, merge, publish, distribute, sublicense, | ||||||
|  | and/or sell copies of the Software, | ||||||
|  | and to permit persons to whom the Software is | ||||||
|  | furnished to do so, | ||||||
|  | subject to the following conditions: | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | The above copyright notice and this permission notice | ||||||
|  | shall be included in all copies or substantial portions of the Software. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | <p> | ||||||
|  | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||||||
|  | EXPRESS OR IMPLIED, | ||||||
|  | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||||
|  | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||||||
|  | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||||||
|  | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||||||
|  | TORT OR OTHERWISE, ARISING FROM, | ||||||
|  | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||||||
|  | THE SOFTWARE. | ||||||
|  | </p> | ||||||
|  | 
 | ||||||
|  | </div> <!-- id="content" --> | ||||||
|  | 
 | ||||||
|  | </div> <!-- id="main" --> | ||||||
|  | 
 | ||||||
|  | <div id="about"> | ||||||
|  | <p><small> | ||||||
|  | $Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $ | ||||||
|  | </small></p> | ||||||
|  | </div> <!-- id="about" --> | ||||||
|  | 
 | ||||||
|  | </div> <!-- id="container" --> | ||||||
|  | 
 | ||||||
|  | </body> | ||||||
|  | </html>  | ||||||
|  | @ -0,0 +1,259 @@ | ||||||
|  | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ | ||||||
|  | 
 | ||||||
|  | -- imported functions and modules | ||||||
|  | local tonumber, type, print, error = tonumber, type, print, error | ||||||
|  | local setmetatable = setmetatable | ||||||
|  | local m = require"lpeg" | ||||||
|  | 
 | ||||||
|  | -- 'm' will be used to parse expressions, and 'mm' will be used to | ||||||
|  | -- create expressions; that is, 're' runs on 'm', creating patterns | ||||||
|  | -- on 'mm' | ||||||
|  | local mm = m | ||||||
|  | 
 | ||||||
|  | -- pattern's metatable | ||||||
|  | local mt = getmetatable(mm.P(0)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- No more global accesses after this point | ||||||
|  | local version = _VERSION | ||||||
|  | if version == "Lua 5.2" then _ENV = nil end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local any = m.P(1) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- Pre-defined names | ||||||
|  | local Predef = { nl = m.P"\n" } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local mem | ||||||
|  | local fmem | ||||||
|  | local gmem | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local function updatelocale () | ||||||
|  |   mm.locale(Predef) | ||||||
|  |   Predef.a = Predef.alpha | ||||||
|  |   Predef.c = Predef.cntrl | ||||||
|  |   Predef.d = Predef.digit | ||||||
|  |   Predef.g = Predef.graph | ||||||
|  |   Predef.l = Predef.lower | ||||||
|  |   Predef.p = Predef.punct | ||||||
|  |   Predef.s = Predef.space | ||||||
|  |   Predef.u = Predef.upper | ||||||
|  |   Predef.w = Predef.alnum | ||||||
|  |   Predef.x = Predef.xdigit | ||||||
|  |   Predef.A = any - Predef.a | ||||||
|  |   Predef.C = any - Predef.c | ||||||
|  |   Predef.D = any - Predef.d | ||||||
|  |   Predef.G = any - Predef.g | ||||||
|  |   Predef.L = any - Predef.l | ||||||
|  |   Predef.P = any - Predef.p | ||||||
|  |   Predef.S = any - Predef.s | ||||||
|  |   Predef.U = any - Predef.u | ||||||
|  |   Predef.W = any - Predef.w | ||||||
|  |   Predef.X = any - Predef.x | ||||||
|  |   mem = {}    -- restart memoization | ||||||
|  |   fmem = {} | ||||||
|  |   gmem = {} | ||||||
|  |   local mt = {__mode = "v"} | ||||||
|  |   setmetatable(mem, mt) | ||||||
|  |   setmetatable(fmem, mt) | ||||||
|  |   setmetatable(gmem, mt) | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | updatelocale() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local function getdef (id, defs) | ||||||
|  |   local c = defs and defs[id] | ||||||
|  |   if not c then error("undefined name: " .. id) end | ||||||
|  |   return c | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local function patt_error (s, i) | ||||||
|  |   local msg = (#s < i + 20) and s:sub(i) | ||||||
|  |                              or s:sub(i,i+20) .. "..." | ||||||
|  |   msg = ("pattern error near '%s'"):format(msg) | ||||||
|  |   error(msg, 2) | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local function mult (p, n) | ||||||
|  |   local np = mm.P(true) | ||||||
|  |   while n >= 1 do | ||||||
|  |     if n%2 >= 1 then np = np * p end | ||||||
|  |     p = p * p | ||||||
|  |     n = n/2 | ||||||
|  |   end | ||||||
|  |   return np | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local function equalcap (s, i, c) | ||||||
|  |   if type(c) ~= "string" then return nil end | ||||||
|  |   local e = #c + i | ||||||
|  |   if s:sub(i, e - 1) == c then return e else return nil end | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 | ||||||
|  | 
 | ||||||
|  | local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0 | ||||||
|  | 
 | ||||||
|  | local arrow = S * "<-" | ||||||
|  | 
 | ||||||
|  | local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1 | ||||||
|  | 
 | ||||||
|  | name = m.C(name) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- a defined name only have meaning in a given environment | ||||||
|  | local Def = name * m.Carg(1) | ||||||
|  | 
 | ||||||
|  | local num = m.C(m.R"09"^1) * S / tonumber | ||||||
|  | 
 | ||||||
|  | local String = "'" * m.C((any - "'")^0) * "'" + | ||||||
|  |                '"' * m.C((any - '"')^0) * '"' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local defined = "%" * Def / function (c,Defs) | ||||||
|  |   local cat =  Defs and Defs[c] or Predef[c] | ||||||
|  |   if not cat then error ("name '" .. c .. "' undefined") end | ||||||
|  |   return cat | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R | ||||||
|  | 
 | ||||||
|  | local item = defined + Range + m.C(any) | ||||||
|  | 
 | ||||||
|  | local Class = | ||||||
|  |     "[" | ||||||
|  |   * (m.C(m.P"^"^-1))    -- optional complement symbol | ||||||
|  |   * m.Cf(item * (item - "]")^0, mt.__add) / | ||||||
|  |                           function (c, p) return c == "^" and any - p or p end | ||||||
|  |   * "]" | ||||||
|  | 
 | ||||||
|  | local function adddef (t, k, exp) | ||||||
|  |   if t[k] then | ||||||
|  |     error("'"..k.."' already defined as a rule") | ||||||
|  |   else | ||||||
|  |     t[k] = exp | ||||||
|  |   end | ||||||
|  |   return t | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local function firstdef (n, r) return adddef({n}, n, r) end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local function NT (n, b) | ||||||
|  |   if not b then | ||||||
|  |     error("rule '"..n.."' used outside a grammar") | ||||||
|  |   else return mm.V(n) | ||||||
|  |   end | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local exp = m.P{ "Exp", | ||||||
|  |   Exp = S * ( m.V"Grammar" | ||||||
|  |             + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) ); | ||||||
|  |   Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul) | ||||||
|  |         * (#seq_follow + patt_error); | ||||||
|  |   Prefix = "&" * S * m.V"Prefix" / mt.__len | ||||||
|  |          + "!" * S * m.V"Prefix" / mt.__unm | ||||||
|  |          + m.V"Suffix"; | ||||||
|  |   Suffix = m.Cf(m.V"Primary" * S * | ||||||
|  |           ( ( m.P"+" * m.Cc(1, mt.__pow) | ||||||
|  |             + m.P"*" * m.Cc(0, mt.__pow) | ||||||
|  |             + m.P"?" * m.Cc(-1, mt.__pow) | ||||||
|  |             + "^" * ( m.Cg(num * m.Cc(mult)) | ||||||
|  |                     + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow)) | ||||||
|  |                     ) | ||||||
|  |             + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div)) | ||||||
|  |                          + m.P"{}" * m.Cc(nil, m.Ct) | ||||||
|  |                          + m.Cg(Def / getdef * m.Cc(mt.__div)) | ||||||
|  |                          ) | ||||||
|  |             + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt)) | ||||||
|  |             ) * S | ||||||
|  |           )^0, function (a,b,f) return f(a,b) end ); | ||||||
|  |   Primary = "(" * m.V"Exp" * ")" | ||||||
|  |             + String / mm.P | ||||||
|  |             + Class | ||||||
|  |             + defined | ||||||
|  |             + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" / | ||||||
|  |                      function (n, p) return mm.Cg(p, n) end | ||||||
|  |             + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end | ||||||
|  |             + m.P"{}" / mm.Cp | ||||||
|  |             + "{~" * m.V"Exp" * "~}" / mm.Cs | ||||||
|  |             + "{|" * m.V"Exp" * "|}" / mm.Ct | ||||||
|  |             + "{" * m.V"Exp" * "}" / mm.C | ||||||
|  |             + m.P"." * m.Cc(any) | ||||||
|  |             + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT; | ||||||
|  |   Definition = name * arrow * m.V"Exp"; | ||||||
|  |   Grammar = m.Cg(m.Cc(true), "G") * | ||||||
|  |             m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, | ||||||
|  |               adddef) / mm.P | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | local function compile (p, defs) | ||||||
|  |   if mm.type(p) == "pattern" then return p end   -- already compiled | ||||||
|  |   local cp = pattern:match(p, 1, defs) | ||||||
|  |   if not cp then error("incorrect pattern", 3) end | ||||||
|  |   return cp | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local function match (s, p, i) | ||||||
|  |   local cp = mem[p] | ||||||
|  |   if not cp then | ||||||
|  |     cp = compile(p) | ||||||
|  |     mem[p] = cp | ||||||
|  |   end | ||||||
|  |   return cp:match(s, i or 1) | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local function find (s, p, i) | ||||||
|  |   local cp = fmem[p] | ||||||
|  |   if not cp then | ||||||
|  |     cp = compile(p) / 0 | ||||||
|  |     cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) } | ||||||
|  |     fmem[p] = cp | ||||||
|  |   end | ||||||
|  |   local i, e = cp:match(s, i or 1) | ||||||
|  |   if i then return i, e - 1 | ||||||
|  |   else return i | ||||||
|  |   end | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | local function gsub (s, p, rep) | ||||||
|  |   local g = gmem[p] or {}   -- ensure gmem[p] is not collected while here | ||||||
|  |   gmem[p] = g | ||||||
|  |   local cp = g[rep] | ||||||
|  |   if not cp then | ||||||
|  |     cp = compile(p) | ||||||
|  |     cp = mm.Cs((cp / rep + 1)^0) | ||||||
|  |     g[rep] = cp | ||||||
|  |   end | ||||||
|  |   return cp:match(s) | ||||||
|  | end | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | -- exported names | ||||||
|  | local re = { | ||||||
|  |   compile = compile, | ||||||
|  |   match = match, | ||||||
|  |   find = find, | ||||||
|  |   gsub = gsub, | ||||||
|  |   updatelocale = updatelocale, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | if version == "Lua 5.1" then _G.re = re end | ||||||
|  | 
 | ||||||
|  | return re | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	 tic
						tic