This commit is contained in:
		
							
								
								
									
										96
									
								
								src/ext/lpeg/HISTORY
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								src/ext/lpeg/HISTORY
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,96 @@
 | 
			
		||||
HISTORY for LPeg 1.0
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.12 to 1.0
 | 
			
		||||
  ---------------------------------
 | 
			
		||||
  + group "names" can be any Lua value
 | 
			
		||||
  + some bugs fixed
 | 
			
		||||
  + other small improvements
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.11 to 0.12
 | 
			
		||||
  ---------------------------------
 | 
			
		||||
  + no "unsigned short" limit for pattern sizes
 | 
			
		||||
  + mathtime captures considered nullable
 | 
			
		||||
  + some bugs fixed
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.10 to 0.11
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + complete reimplementation of the code generator
 | 
			
		||||
  + new syntax for table captures
 | 
			
		||||
  + new functions in module 're'
 | 
			
		||||
  + other small improvements
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.9 to 0.10
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + backtrack stack has configurable size
 | 
			
		||||
  + better error messages
 | 
			
		||||
  + Notation for non-terminals in 're' back to A instead o <A>
 | 
			
		||||
  + experimental look-behind pattern
 | 
			
		||||
  + support for external extensions
 | 
			
		||||
  + works with Lua 5.2
 | 
			
		||||
  + consumes less C stack
 | 
			
		||||
 | 
			
		||||
  - "and" predicates do not keep captures
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.8 to 0.9
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + The accumulator capture was replaced by a fold capture;
 | 
			
		||||
    programs that used the old 'lpeg.Ca' will need small changes.
 | 
			
		||||
  + Some support for character classes from old C locales.
 | 
			
		||||
  + A new named-group capture.
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.7 to 0.8
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + New "match-time" capture.
 | 
			
		||||
  + New "argument capture" that allows passing arguments into the pattern.
 | 
			
		||||
  + Better documentation for 're'.
 | 
			
		||||
  + Several small improvements for 're'.
 | 
			
		||||
  + The 're' module has an incompatibility with previous versions: 
 | 
			
		||||
    now, any use of a non-terminal must be enclosed in angle brackets 
 | 
			
		||||
    (like <B>).
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.6 to 0.7
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + Several improvements in module 're':
 | 
			
		||||
    - better documentation;
 | 
			
		||||
    - support for most captures (all but accumulator);
 | 
			
		||||
    - limited repetitions p{n,m}.
 | 
			
		||||
  + Small improvements in efficiency.
 | 
			
		||||
  + Several small bugs corrected (special thanks to Hans Hagen 
 | 
			
		||||
    and Taco Hoekwater).
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.5 to 0.6
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + Support for non-numeric indices in grammars.
 | 
			
		||||
  + Some bug fixes (thanks to the luatex team).
 | 
			
		||||
  + Some new optimizations; (thanks to Mike Pall).
 | 
			
		||||
  + A new page layout (thanks to Andre Carregal).
 | 
			
		||||
  + Minimal documentation for module 're'.
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.4 to 0.5
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + Several optimizations.
 | 
			
		||||
  + lpeg.P now accepts booleans.
 | 
			
		||||
  + Some new examples.
 | 
			
		||||
  + A proper license.
 | 
			
		||||
  + Several small improvements.
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.3 to 0.4
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + Static check for loops in repetitions and grammars.
 | 
			
		||||
  + Removed label option in captures.
 | 
			
		||||
  + The implementation of captures uses less memory.
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.2 to 0.3
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + User-defined patterns in Lua.
 | 
			
		||||
  + Several new captures.
 | 
			
		||||
 | 
			
		||||
* Changes from version 0.1 to 0.2
 | 
			
		||||
  -------------------------------  
 | 
			
		||||
  + Several small corrections.
 | 
			
		||||
  + Handles embedded zeros like any other character.
 | 
			
		||||
  + Capture "name" can be any Lua value.
 | 
			
		||||
  + Unlimited number of captures.
 | 
			
		||||
  + Match gets an optional initial position.
 | 
			
		||||
 | 
			
		||||
(end of HISTORY)
 | 
			
		||||
							
								
								
									
										537
									
								
								src/ext/lpeg/lpcap.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										537
									
								
								src/ext/lpeg/lpcap.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,537 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
 | 
			
		||||
** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#include "lua.h"
 | 
			
		||||
#include "lauxlib.h"
 | 
			
		||||
 | 
			
		||||
#include "lpcap.h"
 | 
			
		||||
#include "lptypes.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define captype(cap)	((cap)->kind)
 | 
			
		||||
 | 
			
		||||
#define isclosecap(cap)	(captype(cap) == Cclose)
 | 
			
		||||
 | 
			
		||||
#define closeaddr(c)	((c)->s + (c)->siz - 1)
 | 
			
		||||
 | 
			
		||||
#define isfullcap(cap)	((cap)->siz != 0)
 | 
			
		||||
 | 
			
		||||
#define getfromktable(cs,v)	lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
 | 
			
		||||
 | 
			
		||||
#define pushluaval(cs)		getfromktable(cs, (cs)->cap->idx)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Put at the cache for Lua values the value indexed by 'v' in ktable
 | 
			
		||||
** of the running pattern (if it is not there yet); returns its index.
 | 
			
		||||
*/
 | 
			
		||||
static int updatecache (CapState *cs, int v) {
 | 
			
		||||
  int idx = cs->ptop + 1;  /* stack index of cache for Lua values */
 | 
			
		||||
  if (v != cs->valuecached) {  /* not there? */
 | 
			
		||||
    getfromktable(cs, v);  /* get value from 'ktable' */
 | 
			
		||||
    lua_replace(cs->L, idx);  /* put it at reserved stack position */
 | 
			
		||||
    cs->valuecached = v;  /* keep track of what is there */
 | 
			
		||||
  }
 | 
			
		||||
  return idx;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static int pushcapture (CapState *cs);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Goes back in a list of captures looking for an open capture
 | 
			
		||||
** corresponding to a close
 | 
			
		||||
*/
 | 
			
		||||
static Capture *findopen (Capture *cap) {
 | 
			
		||||
  int n = 0;  /* number of closes waiting an open */
 | 
			
		||||
  for (;;) {
 | 
			
		||||
    cap--;
 | 
			
		||||
    if (isclosecap(cap)) n++;  /* one more open to skip */
 | 
			
		||||
    else if (!isfullcap(cap))
 | 
			
		||||
      if (n-- == 0) return cap;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Go to the next capture
 | 
			
		||||
*/
 | 
			
		||||
static void nextcap (CapState *cs) {
 | 
			
		||||
  Capture *cap = cs->cap;
 | 
			
		||||
  if (!isfullcap(cap)) {  /* not a single capture? */
 | 
			
		||||
    int n = 0;  /* number of opens waiting a close */
 | 
			
		||||
    for (;;) {  /* look for corresponding close */
 | 
			
		||||
      cap++;
 | 
			
		||||
      if (isclosecap(cap)) {
 | 
			
		||||
        if (n-- == 0) break;
 | 
			
		||||
      }
 | 
			
		||||
      else if (!isfullcap(cap)) n++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  cs->cap = cap + 1;  /* + 1 to skip last close (or entire single capture) */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Push on the Lua stack all values generated by nested captures inside
 | 
			
		||||
** the current capture. Returns number of values pushed. 'addextra'
 | 
			
		||||
** makes it push the entire match after all captured values. The
 | 
			
		||||
** entire match is pushed also if there are no other nested values,
 | 
			
		||||
** so the function never returns zero.
 | 
			
		||||
*/
 | 
			
		||||
static int pushnestedvalues (CapState *cs, int addextra) {
 | 
			
		||||
  Capture *co = cs->cap;
 | 
			
		||||
  if (isfullcap(cs->cap++)) {  /* no nested captures? */
 | 
			
		||||
    lua_pushlstring(cs->L, co->s, co->siz - 1);  /* push whole match */
 | 
			
		||||
    return 1;  /* that is it */
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    int n = 0;
 | 
			
		||||
    while (!isclosecap(cs->cap))  /* repeat for all nested patterns */
 | 
			
		||||
      n += pushcapture(cs);
 | 
			
		||||
    if (addextra || n == 0) {  /* need extra? */
 | 
			
		||||
      lua_pushlstring(cs->L, co->s, cs->cap->s - co->s);  /* push whole match */
 | 
			
		||||
      n++;
 | 
			
		||||
    }
 | 
			
		||||
    cs->cap++;  /* skip close entry */
 | 
			
		||||
    return n;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Push only the first value generated by nested captures
 | 
			
		||||
*/
 | 
			
		||||
static void pushonenestedvalue (CapState *cs) {
 | 
			
		||||
  int n = pushnestedvalues(cs, 0);
 | 
			
		||||
  if (n > 1)
 | 
			
		||||
    lua_pop(cs->L, n - 1);  /* pop extra values */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Try to find a named group capture with the name given at the top of
 | 
			
		||||
** the stack; goes backward from 'cap'.
 | 
			
		||||
*/
 | 
			
		||||
static Capture *findback (CapState *cs, Capture *cap) {
 | 
			
		||||
  lua_State *L = cs->L;
 | 
			
		||||
  while (cap-- > cs->ocap) {  /* repeat until end of list */
 | 
			
		||||
    if (isclosecap(cap))
 | 
			
		||||
      cap = findopen(cap);  /* skip nested captures */
 | 
			
		||||
    else if (!isfullcap(cap))
 | 
			
		||||
      continue; /* opening an enclosing capture: skip and get previous */
 | 
			
		||||
    if (captype(cap) == Cgroup) {
 | 
			
		||||
      getfromktable(cs, cap->idx);  /* get group name */
 | 
			
		||||
      if (lp_equal(L, -2, -1)) {  /* right group? */
 | 
			
		||||
        lua_pop(L, 2);  /* remove reference name and group name */
 | 
			
		||||
        return cap;
 | 
			
		||||
      }
 | 
			
		||||
      else lua_pop(L, 1);  /* remove group name */
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
 | 
			
		||||
  return NULL;  /* to avoid warnings */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Back-reference capture. Return number of values pushed.
 | 
			
		||||
*/
 | 
			
		||||
static int backrefcap (CapState *cs) {
 | 
			
		||||
  int n;
 | 
			
		||||
  Capture *curr = cs->cap;
 | 
			
		||||
  pushluaval(cs);  /* reference name */
 | 
			
		||||
  cs->cap = findback(cs, curr);  /* find corresponding group */
 | 
			
		||||
  n = pushnestedvalues(cs, 0);  /* push group's values */
 | 
			
		||||
  cs->cap = curr + 1;
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Table capture: creates a new table and populates it with nested
 | 
			
		||||
** captures.
 | 
			
		||||
*/
 | 
			
		||||
static int tablecap (CapState *cs) {
 | 
			
		||||
  lua_State *L = cs->L;
 | 
			
		||||
  int n = 0;
 | 
			
		||||
  lua_newtable(L);
 | 
			
		||||
  if (isfullcap(cs->cap++))
 | 
			
		||||
    return 1;  /* table is empty */
 | 
			
		||||
  while (!isclosecap(cs->cap)) {
 | 
			
		||||
    if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) {  /* named group? */
 | 
			
		||||
      pushluaval(cs);  /* push group name */
 | 
			
		||||
      pushonenestedvalue(cs);
 | 
			
		||||
      lua_settable(L, -3);
 | 
			
		||||
    }
 | 
			
		||||
    else {  /* not a named group */
 | 
			
		||||
      int i;
 | 
			
		||||
      int k = pushcapture(cs);
 | 
			
		||||
      for (i = k; i > 0; i--)  /* store all values into table */
 | 
			
		||||
        lua_rawseti(L, -(i + 1), n + i);
 | 
			
		||||
      n += k;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  cs->cap++;  /* skip close entry */
 | 
			
		||||
  return 1;  /* number of values pushed (only the table) */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Table-query capture
 | 
			
		||||
*/
 | 
			
		||||
static int querycap (CapState *cs) {
 | 
			
		||||
  int idx = cs->cap->idx;
 | 
			
		||||
  pushonenestedvalue(cs);  /* get nested capture */
 | 
			
		||||
  lua_gettable(cs->L, updatecache(cs, idx));  /* query cap. value at table */
 | 
			
		||||
  if (!lua_isnil(cs->L, -1))
 | 
			
		||||
    return 1;
 | 
			
		||||
  else {  /* no value */
 | 
			
		||||
    lua_pop(cs->L, 1);  /* remove nil */
 | 
			
		||||
    return 0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Fold capture
 | 
			
		||||
*/
 | 
			
		||||
static int foldcap (CapState *cs) {
 | 
			
		||||
  int n;
 | 
			
		||||
  lua_State *L = cs->L;
 | 
			
		||||
  int idx = cs->cap->idx;
 | 
			
		||||
  if (isfullcap(cs->cap++) ||  /* no nested captures? */
 | 
			
		||||
      isclosecap(cs->cap) ||  /* no nested captures (large subject)? */
 | 
			
		||||
      (n = pushcapture(cs)) == 0)  /* nested captures with no values? */
 | 
			
		||||
    return luaL_error(L, "no initial value for fold capture");
 | 
			
		||||
  if (n > 1)
 | 
			
		||||
    lua_pop(L, n - 1);  /* leave only one result for accumulator */
 | 
			
		||||
  while (!isclosecap(cs->cap)) {
 | 
			
		||||
    lua_pushvalue(L, updatecache(cs, idx));  /* get folding function */
 | 
			
		||||
    lua_insert(L, -2);  /* put it before accumulator */
 | 
			
		||||
    n = pushcapture(cs);  /* get next capture's values */
 | 
			
		||||
    lua_call(L, n + 1, 1);  /* call folding function */
 | 
			
		||||
  }
 | 
			
		||||
  cs->cap++;  /* skip close entry */
 | 
			
		||||
  return 1;  /* only accumulator left on the stack */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Function capture
 | 
			
		||||
*/
 | 
			
		||||
static int functioncap (CapState *cs) {
 | 
			
		||||
  int n;
 | 
			
		||||
  int top = lua_gettop(cs->L);
 | 
			
		||||
  pushluaval(cs);  /* push function */
 | 
			
		||||
  n = pushnestedvalues(cs, 0);  /* push nested captures */
 | 
			
		||||
  lua_call(cs->L, n, LUA_MULTRET);  /* call function */
 | 
			
		||||
  return lua_gettop(cs->L) - top;  /* return function's results */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Select capture
 | 
			
		||||
*/
 | 
			
		||||
static int numcap (CapState *cs) {
 | 
			
		||||
  int idx = cs->cap->idx;  /* value to select */
 | 
			
		||||
  if (idx == 0) {  /* no values? */
 | 
			
		||||
    nextcap(cs);  /* skip entire capture */
 | 
			
		||||
    return 0;  /* no value produced */
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    int n = pushnestedvalues(cs, 0);
 | 
			
		||||
    if (n < idx)  /* invalid index? */
 | 
			
		||||
      return luaL_error(cs->L, "no capture '%d'", idx);
 | 
			
		||||
    else {
 | 
			
		||||
      lua_pushvalue(cs->L, -(n - idx + 1));  /* get selected capture */
 | 
			
		||||
      lua_replace(cs->L, -(n + 1));  /* put it in place of 1st capture */
 | 
			
		||||
      lua_pop(cs->L, n - 1);  /* remove other captures */
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Return the stack index of the first runtime capture in the given
 | 
			
		||||
** list of captures (or zero if no runtime captures)
 | 
			
		||||
*/
 | 
			
		||||
int finddyncap (Capture *cap, Capture *last) {
 | 
			
		||||
  for (; cap < last; cap++) {
 | 
			
		||||
    if (cap->kind == Cruntime)
 | 
			
		||||
      return cap->idx;  /* stack position of first capture */
 | 
			
		||||
  }
 | 
			
		||||
  return 0;  /* no dynamic captures in this segment */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Calls a runtime capture. Returns number of captures removed by
 | 
			
		||||
** the call, including the initial Cgroup. (Captures to be added are
 | 
			
		||||
** on the Lua stack.)
 | 
			
		||||
*/
 | 
			
		||||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
 | 
			
		||||
  int n, id;
 | 
			
		||||
  lua_State *L = cs->L;
 | 
			
		||||
  int otop = lua_gettop(L);
 | 
			
		||||
  Capture *open = findopen(close);
 | 
			
		||||
  assert(captype(open) == Cgroup);
 | 
			
		||||
  id = finddyncap(open, close);  /* get first dynamic capture argument */
 | 
			
		||||
  close->kind = Cclose;  /* closes the group */
 | 
			
		||||
  close->s = s;
 | 
			
		||||
  cs->cap = open; cs->valuecached = 0;  /* prepare capture state */
 | 
			
		||||
  luaL_checkstack(L, 4, "too many runtime captures");
 | 
			
		||||
  pushluaval(cs);  /* push function to be called */
 | 
			
		||||
  lua_pushvalue(L, SUBJIDX);  /* push original subject */
 | 
			
		||||
  lua_pushinteger(L, s - cs->s + 1);  /* push current position */
 | 
			
		||||
  n = pushnestedvalues(cs, 0);  /* push nested captures */
 | 
			
		||||
  lua_call(L, n + 2, LUA_MULTRET);  /* call dynamic function */
 | 
			
		||||
  if (id > 0) {  /* are there old dynamic captures to be removed? */
 | 
			
		||||
    int i;
 | 
			
		||||
    for (i = id; i <= otop; i++)
 | 
			
		||||
      lua_remove(L, id);  /* remove old dynamic captures */
 | 
			
		||||
    *rem = otop - id + 1;  /* total number of dynamic captures removed */
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
    *rem = 0;  /* no dynamic captures removed */
 | 
			
		||||
  return close - open;  /* number of captures of all kinds removed */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Auxiliary structure for substitution and string captures: keep
 | 
			
		||||
** information about nested captures for future use, avoiding to push
 | 
			
		||||
** string results into Lua
 | 
			
		||||
*/
 | 
			
		||||
typedef struct StrAux {
 | 
			
		||||
  int isstring;  /* whether capture is a string */
 | 
			
		||||
  union {
 | 
			
		||||
    Capture *cp;  /* if not a string, respective capture */
 | 
			
		||||
    struct {  /* if it is a string... */
 | 
			
		||||
      const char *s;  /* ... starts here */
 | 
			
		||||
      const char *e;  /* ... ends here */
 | 
			
		||||
    } s;
 | 
			
		||||
  } u;
 | 
			
		||||
} StrAux;
 | 
			
		||||
 | 
			
		||||
#define MAXSTRCAPS	10
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Collect values from current capture into array 'cps'. Current
 | 
			
		||||
** capture must be Cstring (first call) or Csimple (recursive calls).
 | 
			
		||||
** (In first call, fills %0 with whole match for Cstring.)
 | 
			
		||||
** Returns number of elements in the array that were filled.
 | 
			
		||||
*/
 | 
			
		||||
static int getstrcaps (CapState *cs, StrAux *cps, int n) {
 | 
			
		||||
  int k = n++;
 | 
			
		||||
  cps[k].isstring = 1;  /* get string value */
 | 
			
		||||
  cps[k].u.s.s = cs->cap->s;  /* starts here */
 | 
			
		||||
  if (!isfullcap(cs->cap++)) {  /* nested captures? */
 | 
			
		||||
    while (!isclosecap(cs->cap)) {  /* traverse them */
 | 
			
		||||
      if (n >= MAXSTRCAPS)  /* too many captures? */
 | 
			
		||||
        nextcap(cs);  /* skip extra captures (will not need them) */
 | 
			
		||||
      else if (captype(cs->cap) == Csimple)  /* string? */
 | 
			
		||||
        n = getstrcaps(cs, cps, n);  /* put info. into array */
 | 
			
		||||
      else {
 | 
			
		||||
        cps[n].isstring = 0;  /* not a string */
 | 
			
		||||
        cps[n].u.cp = cs->cap;  /* keep original capture */
 | 
			
		||||
        nextcap(cs);
 | 
			
		||||
        n++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    cs->cap++;  /* skip close */
 | 
			
		||||
  }
 | 
			
		||||
  cps[k].u.s.e = closeaddr(cs->cap - 1);  /* ends here */
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** add next capture value (which should be a string) to buffer 'b'
 | 
			
		||||
*/
 | 
			
		||||
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** String capture: add result to buffer 'b' (instead of pushing
 | 
			
		||||
** it into the stack)
 | 
			
		||||
*/
 | 
			
		||||
static void stringcap (luaL_Buffer *b, CapState *cs) {
 | 
			
		||||
  StrAux cps[MAXSTRCAPS];
 | 
			
		||||
  int n;
 | 
			
		||||
  size_t len, i;
 | 
			
		||||
  const char *fmt;  /* format string */
 | 
			
		||||
  fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
 | 
			
		||||
  n = getstrcaps(cs, cps, 0) - 1;  /* collect nested captures */
 | 
			
		||||
  for (i = 0; i < len; i++) {  /* traverse them */
 | 
			
		||||
    if (fmt[i] != '%')  /* not an escape? */
 | 
			
		||||
      luaL_addchar(b, fmt[i]);  /* add it to buffer */
 | 
			
		||||
    else if (fmt[++i] < '0' || fmt[i] > '9')  /* not followed by a digit? */
 | 
			
		||||
      luaL_addchar(b, fmt[i]);  /* add to buffer */
 | 
			
		||||
    else {
 | 
			
		||||
      int l = fmt[i] - '0';  /* capture index */
 | 
			
		||||
      if (l > n)
 | 
			
		||||
        luaL_error(cs->L, "invalid capture index (%d)", l);
 | 
			
		||||
      else if (cps[l].isstring)
 | 
			
		||||
        luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
 | 
			
		||||
      else {
 | 
			
		||||
        Capture *curr = cs->cap;
 | 
			
		||||
        cs->cap = cps[l].u.cp;  /* go back to evaluate that nested capture */
 | 
			
		||||
        if (!addonestring(b, cs, "capture"))
 | 
			
		||||
          luaL_error(cs->L, "no values in capture index %d", l);
 | 
			
		||||
        cs->cap = curr;  /* continue from where it stopped */
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Substitution capture: add result to buffer 'b'
 | 
			
		||||
*/
 | 
			
		||||
static void substcap (luaL_Buffer *b, CapState *cs) {
 | 
			
		||||
  const char *curr = cs->cap->s;
 | 
			
		||||
  if (isfullcap(cs->cap))  /* no nested captures? */
 | 
			
		||||
    luaL_addlstring(b, curr, cs->cap->siz - 1);  /* keep original text */
 | 
			
		||||
  else {
 | 
			
		||||
    cs->cap++;  /* skip open entry */
 | 
			
		||||
    while (!isclosecap(cs->cap)) {  /* traverse nested captures */
 | 
			
		||||
      const char *next = cs->cap->s;
 | 
			
		||||
      luaL_addlstring(b, curr, next - curr);  /* add text up to capture */
 | 
			
		||||
      if (addonestring(b, cs, "replacement"))
 | 
			
		||||
        curr = closeaddr(cs->cap - 1);  /* continue after match */
 | 
			
		||||
      else  /* no capture value */
 | 
			
		||||
        curr = next;  /* keep original text in final result */
 | 
			
		||||
    }
 | 
			
		||||
    luaL_addlstring(b, curr, cs->cap->s - curr);  /* add last piece of text */
 | 
			
		||||
  }
 | 
			
		||||
  cs->cap++;  /* go to next capture */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Evaluates a capture and adds its first value to buffer 'b'; returns
 | 
			
		||||
** whether there was a value
 | 
			
		||||
*/
 | 
			
		||||
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
 | 
			
		||||
  switch (captype(cs->cap)) {
 | 
			
		||||
    case Cstring:
 | 
			
		||||
      stringcap(b, cs);  /* add capture directly to buffer */
 | 
			
		||||
      return 1;
 | 
			
		||||
    case Csubst:
 | 
			
		||||
      substcap(b, cs);  /* add capture directly to buffer */
 | 
			
		||||
      return 1;
 | 
			
		||||
    default: {
 | 
			
		||||
      lua_State *L = cs->L;
 | 
			
		||||
      int n = pushcapture(cs);
 | 
			
		||||
      if (n > 0) {
 | 
			
		||||
        if (n > 1) lua_pop(L, n - 1);  /* only one result */
 | 
			
		||||
        if (!lua_isstring(L, -1))
 | 
			
		||||
          luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
 | 
			
		||||
        luaL_addvalue(b);
 | 
			
		||||
      }
 | 
			
		||||
      return n;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Push all values of the current capture into the stack; returns
 | 
			
		||||
** number of values pushed
 | 
			
		||||
*/
 | 
			
		||||
static int pushcapture (CapState *cs) {
 | 
			
		||||
  lua_State *L = cs->L;
 | 
			
		||||
  luaL_checkstack(L, 4, "too many captures");
 | 
			
		||||
  switch (captype(cs->cap)) {
 | 
			
		||||
    case Cposition: {
 | 
			
		||||
      lua_pushinteger(L, cs->cap->s - cs->s + 1);
 | 
			
		||||
      cs->cap++;
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    case Cconst: {
 | 
			
		||||
      pushluaval(cs);
 | 
			
		||||
      cs->cap++;
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    case Carg: {
 | 
			
		||||
      int arg = (cs->cap++)->idx;
 | 
			
		||||
      if (arg + FIXEDARGS > cs->ptop)
 | 
			
		||||
        return luaL_error(L, "reference to absent extra argument #%d", arg);
 | 
			
		||||
      lua_pushvalue(L, arg + FIXEDARGS);
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    case Csimple: {
 | 
			
		||||
      int k = pushnestedvalues(cs, 1);
 | 
			
		||||
      lua_insert(L, -k);  /* make whole match be first result */
 | 
			
		||||
      return k;
 | 
			
		||||
    }
 | 
			
		||||
    case Cruntime: {
 | 
			
		||||
      lua_pushvalue(L, (cs->cap++)->idx);  /* value is in the stack */
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    case Cstring: {
 | 
			
		||||
      luaL_Buffer b;
 | 
			
		||||
      luaL_buffinit(L, &b);
 | 
			
		||||
      stringcap(&b, cs);
 | 
			
		||||
      luaL_pushresult(&b);
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    case Csubst: {
 | 
			
		||||
      luaL_Buffer b;
 | 
			
		||||
      luaL_buffinit(L, &b);
 | 
			
		||||
      substcap(&b, cs);
 | 
			
		||||
      luaL_pushresult(&b);
 | 
			
		||||
      return 1;
 | 
			
		||||
    }
 | 
			
		||||
    case Cgroup: {
 | 
			
		||||
      if (cs->cap->idx == 0)  /* anonymous group? */
 | 
			
		||||
        return pushnestedvalues(cs, 0);  /* add all nested values */
 | 
			
		||||
      else {  /* named group: add no values */
 | 
			
		||||
        nextcap(cs);  /* skip capture */
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    case Cbackref: return backrefcap(cs);
 | 
			
		||||
    case Ctable: return tablecap(cs);
 | 
			
		||||
    case Cfunction: return functioncap(cs);
 | 
			
		||||
    case Cnum: return numcap(cs);
 | 
			
		||||
    case Cquery: return querycap(cs);
 | 
			
		||||
    case Cfold: return foldcap(cs);
 | 
			
		||||
    default: assert(0); return 0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Prepare a CapState structure and traverse the entire list of
 | 
			
		||||
** captures in the stack pushing its results. 's' is the subject
 | 
			
		||||
** string, 'r' is the final position of the match, and 'ptop' 
 | 
			
		||||
** the index in the stack where some useful values were pushed.
 | 
			
		||||
** Returns the number of results pushed. (If the list produces no
 | 
			
		||||
** results, push the final position of the match.)
 | 
			
		||||
*/
 | 
			
		||||
int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
 | 
			
		||||
  Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
 | 
			
		||||
  int n = 0;
 | 
			
		||||
  if (!isclosecap(capture)) {  /* is there any capture? */
 | 
			
		||||
    CapState cs;
 | 
			
		||||
    cs.ocap = cs.cap = capture; cs.L = L;
 | 
			
		||||
    cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
 | 
			
		||||
    do {  /* collect their values */
 | 
			
		||||
      n += pushcapture(&cs);
 | 
			
		||||
    } while (!isclosecap(cs.cap));
 | 
			
		||||
  }
 | 
			
		||||
  if (n == 0) {  /* no capture values? */
 | 
			
		||||
    lua_pushinteger(L, r - s + 1);  /* return only end position */
 | 
			
		||||
    n = 1;
 | 
			
		||||
  }
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										56
									
								
								src/ext/lpeg/lpcap.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								src/ext/lpeg/lpcap.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#if !defined(lpcap_h)
 | 
			
		||||
#define lpcap_h
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "lptypes.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* kinds of captures */
 | 
			
		||||
typedef enum CapKind {
 | 
			
		||||
  Cclose,  /* not used in trees */
 | 
			
		||||
  Cposition,
 | 
			
		||||
  Cconst,  /* ktable[key] is Lua constant */
 | 
			
		||||
  Cbackref,  /* ktable[key] is "name" of group to get capture */
 | 
			
		||||
  Carg,  /* 'key' is arg's number */
 | 
			
		||||
  Csimple,  /* next node is pattern */
 | 
			
		||||
  Ctable,  /* next node is pattern */
 | 
			
		||||
  Cfunction,  /* ktable[key] is function; next node is pattern */
 | 
			
		||||
  Cquery,  /* ktable[key] is table; next node is pattern */
 | 
			
		||||
  Cstring,  /* ktable[key] is string; next node is pattern */
 | 
			
		||||
  Cnum,  /* numbered capture; 'key' is number of value to return */
 | 
			
		||||
  Csubst,  /* substitution capture; next node is pattern */
 | 
			
		||||
  Cfold,  /* ktable[key] is function; next node is pattern */
 | 
			
		||||
  Cruntime,  /* not used in trees (is uses another type for tree) */
 | 
			
		||||
  Cgroup  /* ktable[key] is group's "name" */
 | 
			
		||||
} CapKind;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct Capture {
 | 
			
		||||
  const char *s;  /* subject position */
 | 
			
		||||
  unsigned short idx;  /* extra info (group name, arg index, etc.) */
 | 
			
		||||
  byte kind;  /* kind of capture */
 | 
			
		||||
  byte siz;  /* size of full capture + 1 (0 = not a full capture) */
 | 
			
		||||
} Capture;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct CapState {
 | 
			
		||||
  Capture *cap;  /* current capture */
 | 
			
		||||
  Capture *ocap;  /* (original) capture list */
 | 
			
		||||
  lua_State *L;
 | 
			
		||||
  int ptop;  /* index of last argument to 'match' */
 | 
			
		||||
  const char *s;  /* original string */
 | 
			
		||||
  int valuecached;  /* value stored in cache slot */
 | 
			
		||||
} CapState;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
 | 
			
		||||
int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
 | 
			
		||||
int finddyncap (Capture *cap, Capture *last);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1014
									
								
								src/ext/lpeg/lpcode.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1014
									
								
								src/ext/lpeg/lpcode.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										40
									
								
								src/ext/lpeg/lpcode.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								src/ext/lpeg/lpcode.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#if !defined(lpcode_h)
 | 
			
		||||
#define lpcode_h
 | 
			
		||||
 | 
			
		||||
#include "lua.h"
 | 
			
		||||
 | 
			
		||||
#include "lptypes.h"
 | 
			
		||||
#include "lptree.h"
 | 
			
		||||
#include "lpvm.h"
 | 
			
		||||
 | 
			
		||||
int tocharset (TTree *tree, Charset *cs);
 | 
			
		||||
int checkaux (TTree *tree, int pred);
 | 
			
		||||
int fixedlen (TTree *tree);
 | 
			
		||||
int hascaptures (TTree *tree);
 | 
			
		||||
int lp_gc (lua_State *L);
 | 
			
		||||
Instruction *compile (lua_State *L, Pattern *p);
 | 
			
		||||
void realloccode (lua_State *L, Pattern *p, int nsize);
 | 
			
		||||
int sizei (const Instruction *i);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PEnullable      0
 | 
			
		||||
#define PEnofail        1
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** nofail(t) implies that 't' cannot fail with any input
 | 
			
		||||
*/
 | 
			
		||||
#define nofail(t)	checkaux(t, PEnofail)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** (not nullable(t)) implies 't' cannot match without consuming
 | 
			
		||||
** something
 | 
			
		||||
*/
 | 
			
		||||
#define nullable(t)	checkaux(t, PEnullable)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								src/ext/lpeg/lpeg-128.gif
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/ext/lpeg/lpeg-128.gif
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 4.8 KiB  | 
							
								
								
									
										1445
									
								
								src/ext/lpeg/lpeg.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1445
									
								
								src/ext/lpeg/lpeg.html
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										244
									
								
								src/ext/lpeg/lpprint.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										244
									
								
								src/ext/lpeg/lpprint.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,244 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $
 | 
			
		||||
** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "lptypes.h"
 | 
			
		||||
#include "lpprint.h"
 | 
			
		||||
#include "lpcode.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(LPEG_DEBUG)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** {======================================================
 | 
			
		||||
** Printing patterns (for debugging)
 | 
			
		||||
** =======================================================
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printcharset (const byte *st) {
 | 
			
		||||
  int i;
 | 
			
		||||
  printf("[");
 | 
			
		||||
  for (i = 0; i <= UCHAR_MAX; i++) {
 | 
			
		||||
    int first = i;
 | 
			
		||||
    while (testchar(st, i) && i <= UCHAR_MAX) i++;
 | 
			
		||||
    if (i - 1 == first)  /* unary range? */
 | 
			
		||||
      printf("(%02x)", first);
 | 
			
		||||
    else if (i - 1 > first)  /* non-empty range? */
 | 
			
		||||
      printf("(%02x-%02x)", first, i - 1);
 | 
			
		||||
  }
 | 
			
		||||
  printf("]");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static const char *capkind (int kind) {
 | 
			
		||||
  const char *const modes[] = {
 | 
			
		||||
    "close", "position", "constant", "backref",
 | 
			
		||||
    "argument", "simple", "table", "function",
 | 
			
		||||
    "query", "string", "num", "substitution", "fold",
 | 
			
		||||
    "runtime", "group"};
 | 
			
		||||
  return modes[kind];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void printjmp (const Instruction *op, const Instruction *p) {
 | 
			
		||||
  printf("-> %d", (int)(p + (p + 1)->offset - op));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printinst (const Instruction *op, const Instruction *p) {
 | 
			
		||||
  const char *const names[] = {
 | 
			
		||||
    "any", "char", "set",
 | 
			
		||||
    "testany", "testchar", "testset",
 | 
			
		||||
    "span", "behind",
 | 
			
		||||
    "ret", "end",
 | 
			
		||||
    "choice", "jmp", "call", "open_call",
 | 
			
		||||
    "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
 | 
			
		||||
     "fullcapture", "opencapture", "closecapture", "closeruntime"
 | 
			
		||||
  };
 | 
			
		||||
  printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
 | 
			
		||||
  switch ((Opcode)p->i.code) {
 | 
			
		||||
    case IChar: {
 | 
			
		||||
      printf("'%c'", p->i.aux);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case ITestChar: {
 | 
			
		||||
      printf("'%c'", p->i.aux); printjmp(op, p);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case IFullCapture: {
 | 
			
		||||
      printf("%s (size = %d)  (idx = %d)",
 | 
			
		||||
             capkind(getkind(p)), getoff(p), p->i.key);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case IOpenCapture: {
 | 
			
		||||
      printf("%s (idx = %d)", capkind(getkind(p)), p->i.key);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case ISet: {
 | 
			
		||||
      printcharset((p+1)->buff);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case ITestSet: {
 | 
			
		||||
      printcharset((p+2)->buff); printjmp(op, p);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case ISpan: {
 | 
			
		||||
      printcharset((p+1)->buff);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case IOpenCall: {
 | 
			
		||||
      printf("-> %d", (p + 1)->offset);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case IBehind: {
 | 
			
		||||
      printf("%d", p->i.aux);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case IJmp: case ICall: case ICommit: case IChoice:
 | 
			
		||||
    case IPartialCommit: case IBackCommit: case ITestAny: {
 | 
			
		||||
      printjmp(op, p);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    default: break;
 | 
			
		||||
  }
 | 
			
		||||
  printf("\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printpatt (Instruction *p, int n) {
 | 
			
		||||
  Instruction *op = p;
 | 
			
		||||
  while (p < op + n) {
 | 
			
		||||
    printinst(op, p);
 | 
			
		||||
    p += sizei(p);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(LPEG_DEBUG)
 | 
			
		||||
static void printcap (Capture *cap) {
 | 
			
		||||
  printf("%s (idx: %d - size: %d) -> %p\n",
 | 
			
		||||
         capkind(cap->kind), cap->idx, cap->siz, cap->s);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printcaplist (Capture *cap, Capture *limit) {
 | 
			
		||||
  printf(">======\n");
 | 
			
		||||
  for (; cap->s && (limit == NULL || cap < limit); cap++)
 | 
			
		||||
    printcap(cap);
 | 
			
		||||
  printf("=======\n");
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* }====================================================== */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** {======================================================
 | 
			
		||||
** Printing trees (for debugging)
 | 
			
		||||
** =======================================================
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
static const char *tagnames[] = {
 | 
			
		||||
  "char", "set", "any",
 | 
			
		||||
  "true", "false",
 | 
			
		||||
  "rep",
 | 
			
		||||
  "seq", "choice",
 | 
			
		||||
  "not", "and",
 | 
			
		||||
  "call", "opencall", "rule", "grammar",
 | 
			
		||||
  "behind",
 | 
			
		||||
  "capture", "run-time"
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printtree (TTree *tree, int ident) {
 | 
			
		||||
  int i;
 | 
			
		||||
  for (i = 0; i < ident; i++) printf(" ");
 | 
			
		||||
  printf("%s", tagnames[tree->tag]);
 | 
			
		||||
  switch (tree->tag) {
 | 
			
		||||
    case TChar: {
 | 
			
		||||
      int c = tree->u.n;
 | 
			
		||||
      if (isprint(c))
 | 
			
		||||
        printf(" '%c'\n", c);
 | 
			
		||||
      else
 | 
			
		||||
        printf(" (%02X)\n", c);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case TSet: {
 | 
			
		||||
      printcharset(treebuffer(tree));
 | 
			
		||||
      printf("\n");
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case TOpenCall: case TCall: {
 | 
			
		||||
      assert(sib2(tree)->tag == TRule);
 | 
			
		||||
      printf(" key: %d  (rule: %d)\n", tree->key, sib2(tree)->cap);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case TBehind: {
 | 
			
		||||
      printf(" %d\n", tree->u.n);
 | 
			
		||||
        printtree(sib1(tree), ident + 2);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case TCapture: {
 | 
			
		||||
      printf(" kind: '%s'  key: %d\n", capkind(tree->cap), tree->key);
 | 
			
		||||
      printtree(sib1(tree), ident + 2);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case TRule: {
 | 
			
		||||
      printf(" n: %d  key: %d\n", tree->cap, tree->key);
 | 
			
		||||
      printtree(sib1(tree), ident + 2);
 | 
			
		||||
      break;  /* do not print next rule as a sibling */
 | 
			
		||||
    }
 | 
			
		||||
    case TGrammar: {
 | 
			
		||||
      TTree *rule = sib1(tree);
 | 
			
		||||
      printf(" %d\n", tree->u.n);  /* number of rules */
 | 
			
		||||
      for (i = 0; i < tree->u.n; i++) {
 | 
			
		||||
        printtree(rule, ident + 2);
 | 
			
		||||
        rule = sib2(rule);
 | 
			
		||||
      }
 | 
			
		||||
      assert(rule->tag == TTrue);  /* sentinel */
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    default: {
 | 
			
		||||
      int sibs = numsiblings[tree->tag];
 | 
			
		||||
      printf("\n");
 | 
			
		||||
      if (sibs >= 1) {
 | 
			
		||||
        printtree(sib1(tree), ident + 2);
 | 
			
		||||
        if (sibs >= 2)
 | 
			
		||||
          printtree(sib2(tree), ident + 2);
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printktable (lua_State *L, int idx) {
 | 
			
		||||
  int n, i;
 | 
			
		||||
  lua_getuservalue(L, idx);
 | 
			
		||||
  if (lua_isnil(L, -1))  /* no ktable? */
 | 
			
		||||
    return;
 | 
			
		||||
  n = lua_rawlen(L, -1);
 | 
			
		||||
  printf("[");
 | 
			
		||||
  for (i = 1; i <= n; i++) {
 | 
			
		||||
    printf("%d = ", i);
 | 
			
		||||
    lua_rawgeti(L, -1, i);
 | 
			
		||||
    if (lua_isstring(L, -1))
 | 
			
		||||
      printf("%s  ", lua_tostring(L, -1));
 | 
			
		||||
    else
 | 
			
		||||
      printf("%s  ", lua_typename(L, lua_type(L, -1)));
 | 
			
		||||
    lua_pop(L, 1);
 | 
			
		||||
  }
 | 
			
		||||
  printf("]\n");
 | 
			
		||||
  /* leave ktable at the stack */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* }====================================================== */
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										36
									
								
								src/ext/lpeg/lpprint.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								src/ext/lpeg/lpprint.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if !defined(lpprint_h)
 | 
			
		||||
#define lpprint_h
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "lptree.h"
 | 
			
		||||
#include "lpvm.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(LPEG_DEBUG)
 | 
			
		||||
 | 
			
		||||
void printpatt (Instruction *p, int n);
 | 
			
		||||
void printtree (TTree *tree, int ident);
 | 
			
		||||
void printktable (lua_State *L, int idx);
 | 
			
		||||
void printcharset (const byte *st);
 | 
			
		||||
void printcaplist (Capture *cap, Capture *limit);
 | 
			
		||||
void printinst (const Instruction *op, const Instruction *p);
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define printktable(L,idx)  \
 | 
			
		||||
	luaL_error(L, "function only implemented in debug mode")
 | 
			
		||||
#define printtree(tree,i)  \
 | 
			
		||||
	luaL_error(L, "function only implemented in debug mode")
 | 
			
		||||
#define printpatt(p,n)  \
 | 
			
		||||
	luaL_error(L, "function only implemented in debug mode")
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1303
									
								
								src/ext/lpeg/lptree.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1303
									
								
								src/ext/lpeg/lptree.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										82
									
								
								src/ext/lpeg/lptree.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								src/ext/lpeg/lptree.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
			
		||||
/*  
 | 
			
		||||
** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#if !defined(lptree_h)
 | 
			
		||||
#define lptree_h
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "lptypes.h" 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** types of trees
 | 
			
		||||
*/
 | 
			
		||||
typedef enum TTag {
 | 
			
		||||
  TChar = 0,  /* 'n' = char */
 | 
			
		||||
  TSet,  /* the set is stored in next CHARSETSIZE bytes */
 | 
			
		||||
  TAny,
 | 
			
		||||
  TTrue,
 | 
			
		||||
  TFalse,
 | 
			
		||||
  TRep,  /* 'sib1'* */
 | 
			
		||||
  TSeq,  /* 'sib1' 'sib2' */
 | 
			
		||||
  TChoice,  /* 'sib1' / 'sib2' */
 | 
			
		||||
  TNot,  /* !'sib1' */
 | 
			
		||||
  TAnd,  /* &'sib1' */
 | 
			
		||||
  TCall,  /* ktable[key] is rule's key; 'sib2' is rule being called */
 | 
			
		||||
  TOpenCall,  /* ktable[key] is rule's key */
 | 
			
		||||
  TRule,  /* ktable[key] is rule's key (but key == 0 for unused rules);
 | 
			
		||||
             'sib1' is rule's pattern;
 | 
			
		||||
             'sib2' is next rule; 'cap' is rule's sequential number */
 | 
			
		||||
  TGrammar,  /* 'sib1' is initial (and first) rule */
 | 
			
		||||
  TBehind,  /* 'sib1' is pattern, 'n' is how much to go back */
 | 
			
		||||
  TCapture,  /* captures: 'cap' is kind of capture (enum 'CapKind');
 | 
			
		||||
                ktable[key] is Lua value associated with capture;
 | 
			
		||||
                'sib1' is capture body */
 | 
			
		||||
  TRunTime  /* run-time capture: 'key' is Lua function;
 | 
			
		||||
               'sib1' is capture body */
 | 
			
		||||
} TTag;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Tree trees
 | 
			
		||||
** The first child of a tree (if there is one) is immediately after
 | 
			
		||||
** the tree.  A reference to a second child (ps) is its position
 | 
			
		||||
** relative to the position of the tree itself.
 | 
			
		||||
*/
 | 
			
		||||
typedef struct TTree {
 | 
			
		||||
  byte tag;
 | 
			
		||||
  byte cap;  /* kind of capture (if it is a capture) */
 | 
			
		||||
  unsigned short key;  /* key in ktable for Lua data (0 if no key) */
 | 
			
		||||
  union {
 | 
			
		||||
    int ps;  /* occasional second child */
 | 
			
		||||
    int n;  /* occasional counter */
 | 
			
		||||
  } u;
 | 
			
		||||
} TTree;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** A complete pattern has its tree plus, if already compiled,
 | 
			
		||||
** its corresponding code
 | 
			
		||||
*/
 | 
			
		||||
typedef struct Pattern {
 | 
			
		||||
  union Instruction *code;
 | 
			
		||||
  int codesize;
 | 
			
		||||
  TTree tree[1];
 | 
			
		||||
} Pattern;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* number of children for each tree */
 | 
			
		||||
extern const byte numsiblings[];
 | 
			
		||||
 | 
			
		||||
/* access to children */
 | 
			
		||||
#define sib1(t)         ((t) + 1)
 | 
			
		||||
#define sib2(t)         ((t) + (t)->u.ps)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										149
									
								
								src/ext/lpeg/lptypes.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								src/ext/lpeg/lptypes.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,149 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $
 | 
			
		||||
** LPeg - PEG pattern matching for Lua
 | 
			
		||||
** Copyright 2007-2017, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 | 
			
		||||
** written by Roberto Ierusalimschy
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#if !defined(lptypes_h)
 | 
			
		||||
#define lptypes_h
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if !defined(LPEG_DEBUG)
 | 
			
		||||
#define NDEBUG
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <assert.h>
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
 | 
			
		||||
#include "lua.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define VERSION         "1.0.1"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PATTERN_T	"lpeg-pattern"
 | 
			
		||||
#define MAXSTACKIDX	"lpeg-maxstack"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** compatibility with Lua 5.1
 | 
			
		||||
*/
 | 
			
		||||
#if (LUA_VERSION_NUM == 501)
 | 
			
		||||
 | 
			
		||||
#define lp_equal	lua_equal
 | 
			
		||||
 | 
			
		||||
#define lua_getuservalue	lua_getfenv
 | 
			
		||||
#define lua_setuservalue	lua_setfenv
 | 
			
		||||
 | 
			
		||||
#define lua_rawlen		lua_objlen
 | 
			
		||||
 | 
			
		||||
#define luaL_setfuncs(L,f,n)	luaL_register(L,NULL,f)
 | 
			
		||||
#define luaL_newlib(L,f)	luaL_register(L,"lpeg",f)
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if !defined(lp_equal)
 | 
			
		||||
#define lp_equal(L,idx1,idx2)  lua_compare(L,(idx1),(idx2),LUA_OPEQ)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* default maximum size for call/backtrack stack */
 | 
			
		||||
#if !defined(MAXBACK)
 | 
			
		||||
#define MAXBACK         400
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* maximum number of rules in a grammar (limited by 'unsigned char') */
 | 
			
		||||
#if !defined(MAXRULES)
 | 
			
		||||
#define MAXRULES        250
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* initial size for capture's list */
 | 
			
		||||
#define INITCAPSIZE	32
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* index, on Lua stack, for subject */
 | 
			
		||||
#define SUBJIDX		2
 | 
			
		||||
 | 
			
		||||
/* number of fixed arguments to 'match' (before capture arguments) */
 | 
			
		||||
#define FIXEDARGS	3
 | 
			
		||||
 | 
			
		||||
/* index, on Lua stack, for capture list */
 | 
			
		||||
#define caplistidx(ptop)	((ptop) + 2)
 | 
			
		||||
 | 
			
		||||
/* index, on Lua stack, for pattern's ktable */
 | 
			
		||||
#define ktableidx(ptop)		((ptop) + 3)
 | 
			
		||||
 | 
			
		||||
/* index, on Lua stack, for backtracking stack */
 | 
			
		||||
#define stackidx(ptop)	((ptop) + 4)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef unsigned char byte;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define BITSPERCHAR		8
 | 
			
		||||
 | 
			
		||||
#define CHARSETSIZE		((UCHAR_MAX/BITSPERCHAR) + 1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct Charset {
 | 
			
		||||
  byte cs[CHARSETSIZE];
 | 
			
		||||
} Charset;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define loopset(v,b)    { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
 | 
			
		||||
 | 
			
		||||
/* access to charset */
 | 
			
		||||
#define treebuffer(t)      ((byte *)((t) + 1))
 | 
			
		||||
 | 
			
		||||
/* number of slots needed for 'n' bytes */
 | 
			
		||||
#define bytes2slots(n)  (((n) - 1) / sizeof(TTree) + 1)
 | 
			
		||||
 | 
			
		||||
/* set 'b' bit in charset 'cs' */
 | 
			
		||||
#define setchar(cs,b)   ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** in capture instructions, 'kind' of capture and its offset are
 | 
			
		||||
** packed in field 'aux', 4 bits for each
 | 
			
		||||
*/
 | 
			
		||||
#define getkind(op)		((op)->i.aux & 0xF)
 | 
			
		||||
#define getoff(op)		(((op)->i.aux >> 4) & 0xF)
 | 
			
		||||
#define joinkindoff(k,o)	((k) | ((o) << 4))
 | 
			
		||||
 | 
			
		||||
#define MAXOFF		0xF
 | 
			
		||||
#define MAXAUX		0xFF
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* maximum number of bytes to look behind */
 | 
			
		||||
#define MAXBEHIND	MAXAUX
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* maximum size (in elements) for a pattern */
 | 
			
		||||
#define MAXPATTSIZE	(SHRT_MAX - 10)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* size (in elements) for an instruction plus extra l bytes */
 | 
			
		||||
#define instsize(l)  (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* size (in elements) for a ISet instruction */
 | 
			
		||||
#define CHARSETINSTSIZE		instsize(CHARSETSIZE)
 | 
			
		||||
 | 
			
		||||
/* size (in elements) for a IFunc instruction */
 | 
			
		||||
#define funcinstsize(p)		((p)->i.aux + 2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define testchar(st,c)	(((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										364
									
								
								src/ext/lpeg/lpvm.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										364
									
								
								src/ext/lpeg/lpvm.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,364 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $
 | 
			
		||||
** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "lua.h"
 | 
			
		||||
#include "lauxlib.h"
 | 
			
		||||
 | 
			
		||||
#include "lpcap.h"
 | 
			
		||||
#include "lptypes.h"
 | 
			
		||||
#include "lpvm.h"
 | 
			
		||||
#include "lpprint.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* initial size for call/backtrack stack */
 | 
			
		||||
#if !defined(INITBACK)
 | 
			
		||||
#define INITBACK	MAXBACK
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define getoffset(p)	(((p) + 1)->offset)
 | 
			
		||||
 | 
			
		||||
static const Instruction giveup = {{IGiveup, 0, 0}};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** {======================================================
 | 
			
		||||
** Virtual Machine
 | 
			
		||||
** =======================================================
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct Stack {
 | 
			
		||||
  const char *s;  /* saved position (or NULL for calls) */
 | 
			
		||||
  const Instruction *p;  /* next instruction */
 | 
			
		||||
  int caplevel;
 | 
			
		||||
} Stack;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define getstackbase(L, ptop)	((Stack *)lua_touserdata(L, stackidx(ptop)))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Make the size of the array of captures 'cap' twice as large as needed
 | 
			
		||||
** (which is 'captop'). ('n' is the number of new elements.)
 | 
			
		||||
*/
 | 
			
		||||
static Capture *doublecap (lua_State *L, Capture *cap, int captop,
 | 
			
		||||
                                         int n, int ptop) {
 | 
			
		||||
  Capture *newc;
 | 
			
		||||
  if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
 | 
			
		||||
    luaL_error(L, "too many captures");
 | 
			
		||||
  newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
 | 
			
		||||
  memcpy(newc, cap, (captop - n) * sizeof(Capture));
 | 
			
		||||
  lua_replace(L, caplistidx(ptop));
 | 
			
		||||
  return newc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Double the size of the stack
 | 
			
		||||
*/
 | 
			
		||||
static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
 | 
			
		||||
  Stack *stack = getstackbase(L, ptop);
 | 
			
		||||
  Stack *newstack;
 | 
			
		||||
  int n = *stacklimit - stack;  /* current stack size */
 | 
			
		||||
  int max, newn;
 | 
			
		||||
  lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
 | 
			
		||||
  max = lua_tointeger(L, -1);  /* maximum allowed size */
 | 
			
		||||
  lua_pop(L, 1);
 | 
			
		||||
  if (n >= max)  /* already at maximum size? */
 | 
			
		||||
    luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
 | 
			
		||||
  newn = 2 * n;  /* new size */
 | 
			
		||||
  if (newn > max) newn = max;
 | 
			
		||||
  newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
 | 
			
		||||
  memcpy(newstack, stack, n * sizeof(Stack));
 | 
			
		||||
  lua_replace(L, stackidx(ptop));
 | 
			
		||||
  *stacklimit = newstack + newn;
 | 
			
		||||
  return newstack + n;  /* return next position */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Interpret the result of a dynamic capture: false -> fail;
 | 
			
		||||
** true -> keep current position; number -> next position.
 | 
			
		||||
** Return new subject position. 'fr' is stack index where
 | 
			
		||||
** is the result; 'curr' is current subject position; 'limit'
 | 
			
		||||
** is subject's size.
 | 
			
		||||
*/
 | 
			
		||||
static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
 | 
			
		||||
  lua_Integer res;
 | 
			
		||||
  if (!lua_toboolean(L, fr)) {  /* false value? */
 | 
			
		||||
    lua_settop(L, fr - 1);  /* remove results */
 | 
			
		||||
    return -1;  /* and fail */
 | 
			
		||||
  }
 | 
			
		||||
  else if (lua_isboolean(L, fr))  /* true? */
 | 
			
		||||
    res = curr;  /* keep current position */
 | 
			
		||||
  else {
 | 
			
		||||
    res = lua_tointeger(L, fr) - 1;  /* new position */
 | 
			
		||||
    if (res < curr || res > limit)
 | 
			
		||||
      luaL_error(L, "invalid position returned by match-time capture");
 | 
			
		||||
  }
 | 
			
		||||
  lua_remove(L, fr);  /* remove first result (offset) */
 | 
			
		||||
  return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Add capture values returned by a dynamic capture to the capture list
 | 
			
		||||
** 'base', nested inside a group capture. 'fd' indexes the first capture
 | 
			
		||||
** value, 'n' is the number of values (at least 1).
 | 
			
		||||
*/
 | 
			
		||||
static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
 | 
			
		||||
  int i;
 | 
			
		||||
  base[0].kind = Cgroup;  /* create group capture */
 | 
			
		||||
  base[0].siz = 0;
 | 
			
		||||
  base[0].idx = 0;  /* make it an anonymous group */
 | 
			
		||||
  for (i = 1; i <= n; i++) {  /* add runtime captures */
 | 
			
		||||
    base[i].kind = Cruntime;
 | 
			
		||||
    base[i].siz = 1;  /* mark it as closed */
 | 
			
		||||
    base[i].idx = fd + i - 1;  /* stack index of capture value */
 | 
			
		||||
    base[i].s = s;
 | 
			
		||||
  }
 | 
			
		||||
  base[i].kind = Cclose;  /* close group */
 | 
			
		||||
  base[i].siz = 1;
 | 
			
		||||
  base[i].s = s;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Remove dynamic captures from the Lua stack (called in case of failure)
 | 
			
		||||
*/
 | 
			
		||||
static int removedyncap (lua_State *L, Capture *capture,
 | 
			
		||||
                         int level, int last) {
 | 
			
		||||
  int id = finddyncap(capture + level, capture + last);  /* index of 1st cap. */
 | 
			
		||||
  int top = lua_gettop(L);
 | 
			
		||||
  if (id == 0) return 0;  /* no dynamic captures? */
 | 
			
		||||
  lua_settop(L, id - 1);  /* remove captures */
 | 
			
		||||
  return top - id + 1;  /* number of values removed */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
** Opcode interpreter
 | 
			
		||||
*/
 | 
			
		||||
const char *match (lua_State *L, const char *o, const char *s, const char *e,
 | 
			
		||||
                   Instruction *op, Capture *capture, int ptop) {
 | 
			
		||||
  Stack stackbase[INITBACK];
 | 
			
		||||
  Stack *stacklimit = stackbase + INITBACK;
 | 
			
		||||
  Stack *stack = stackbase;  /* point to first empty slot in stack */
 | 
			
		||||
  int capsize = INITCAPSIZE;
 | 
			
		||||
  int captop = 0;  /* point to first empty slot in captures */
 | 
			
		||||
  int ndyncap = 0;  /* number of dynamic captures (in Lua stack) */
 | 
			
		||||
  const Instruction *p = op;  /* current instruction */
 | 
			
		||||
  stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
 | 
			
		||||
  lua_pushlightuserdata(L, stackbase);
 | 
			
		||||
  for (;;) {
 | 
			
		||||
#if defined(DEBUG)
 | 
			
		||||
      printf("-------------------------------------\n");
 | 
			
		||||
      printcaplist(capture, capture + captop);
 | 
			
		||||
      printf("s: |%s| stck:%d, dyncaps:%d, caps:%d  ",
 | 
			
		||||
             s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
 | 
			
		||||
      printinst(op, p);
 | 
			
		||||
#endif
 | 
			
		||||
    assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
 | 
			
		||||
    switch ((Opcode)p->i.code) {
 | 
			
		||||
      case IEnd: {
 | 
			
		||||
        assert(stack == getstackbase(L, ptop) + 1);
 | 
			
		||||
        capture[captop].kind = Cclose;
 | 
			
		||||
        capture[captop].s = NULL;
 | 
			
		||||
        return s;
 | 
			
		||||
      }
 | 
			
		||||
      case IGiveup: {
 | 
			
		||||
        assert(stack == getstackbase(L, ptop));
 | 
			
		||||
        return NULL;
 | 
			
		||||
      }
 | 
			
		||||
      case IRet: {
 | 
			
		||||
        assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
 | 
			
		||||
        p = (--stack)->p;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IAny: {
 | 
			
		||||
        if (s < e) { p++; s++; }
 | 
			
		||||
        else goto fail;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ITestAny: {
 | 
			
		||||
        if (s < e) p += 2;
 | 
			
		||||
        else p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IChar: {
 | 
			
		||||
        if ((byte)*s == p->i.aux && s < e) { p++; s++; }
 | 
			
		||||
        else goto fail;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ITestChar: {
 | 
			
		||||
        if ((byte)*s == p->i.aux && s < e) p += 2;
 | 
			
		||||
        else p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ISet: {
 | 
			
		||||
        int c = (byte)*s;
 | 
			
		||||
        if (testchar((p+1)->buff, c) && s < e)
 | 
			
		||||
          { p += CHARSETINSTSIZE; s++; }
 | 
			
		||||
        else goto fail;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ITestSet: {
 | 
			
		||||
        int c = (byte)*s;
 | 
			
		||||
        if (testchar((p + 2)->buff, c) && s < e)
 | 
			
		||||
          p += 1 + CHARSETINSTSIZE;
 | 
			
		||||
        else p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IBehind: {
 | 
			
		||||
        int n = p->i.aux;
 | 
			
		||||
        if (n > s - o) goto fail;
 | 
			
		||||
        s -= n; p++;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ISpan: {
 | 
			
		||||
        for (; s < e; s++) {
 | 
			
		||||
          int c = (byte)*s;
 | 
			
		||||
          if (!testchar((p+1)->buff, c)) break;
 | 
			
		||||
        }
 | 
			
		||||
        p += CHARSETINSTSIZE;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IJmp: {
 | 
			
		||||
        p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IChoice: {
 | 
			
		||||
        if (stack == stacklimit)
 | 
			
		||||
          stack = doublestack(L, &stacklimit, ptop);
 | 
			
		||||
        stack->p = p + getoffset(p);
 | 
			
		||||
        stack->s = s;
 | 
			
		||||
        stack->caplevel = captop;
 | 
			
		||||
        stack++;
 | 
			
		||||
        p += 2;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ICall: {
 | 
			
		||||
        if (stack == stacklimit)
 | 
			
		||||
          stack = doublestack(L, &stacklimit, ptop);
 | 
			
		||||
        stack->s = NULL;
 | 
			
		||||
        stack->p = p + 2;  /* save return address */
 | 
			
		||||
        stack++;
 | 
			
		||||
        p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ICommit: {
 | 
			
		||||
        assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
 | 
			
		||||
        stack--;
 | 
			
		||||
        p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IPartialCommit: {
 | 
			
		||||
        assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
 | 
			
		||||
        (stack - 1)->s = s;
 | 
			
		||||
        (stack - 1)->caplevel = captop;
 | 
			
		||||
        p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IBackCommit: {
 | 
			
		||||
        assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
 | 
			
		||||
        s = (--stack)->s;
 | 
			
		||||
        captop = stack->caplevel;
 | 
			
		||||
        p += getoffset(p);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case IFailTwice:
 | 
			
		||||
        assert(stack > getstackbase(L, ptop));
 | 
			
		||||
        stack--;
 | 
			
		||||
        /* go through */
 | 
			
		||||
      case IFail:
 | 
			
		||||
      fail: { /* pattern failed: try to backtrack */
 | 
			
		||||
        do {  /* remove pending calls */
 | 
			
		||||
          assert(stack > getstackbase(L, ptop));
 | 
			
		||||
          s = (--stack)->s;
 | 
			
		||||
        } while (s == NULL);
 | 
			
		||||
        if (ndyncap > 0)  /* is there matchtime captures? */
 | 
			
		||||
          ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
 | 
			
		||||
        captop = stack->caplevel;
 | 
			
		||||
        p = stack->p;
 | 
			
		||||
#if defined(DEBUG)
 | 
			
		||||
        printf("**FAIL**\n");
 | 
			
		||||
#endif
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ICloseRunTime: {
 | 
			
		||||
        CapState cs;
 | 
			
		||||
        int rem, res, n;
 | 
			
		||||
        int fr = lua_gettop(L) + 1;  /* stack index of first result */
 | 
			
		||||
        cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
 | 
			
		||||
        n = runtimecap(&cs, capture + captop, s, &rem);  /* call function */
 | 
			
		||||
        captop -= n;  /* remove nested captures */
 | 
			
		||||
        ndyncap -= rem;  /* update number of dynamic captures */
 | 
			
		||||
        fr -= rem;  /* 'rem' items were popped from Lua stack */
 | 
			
		||||
        res = resdyncaptures(L, fr, s - o, e - o);  /* get result */
 | 
			
		||||
        if (res == -1)  /* fail? */
 | 
			
		||||
          goto fail;
 | 
			
		||||
        s = o + res;  /* else update current position */
 | 
			
		||||
        n = lua_gettop(L) - fr + 1;  /* number of new captures */
 | 
			
		||||
        ndyncap += n;  /* update number of dynamic captures */
 | 
			
		||||
        if (n > 0) {  /* any new capture? */
 | 
			
		||||
          if (fr + n >= SHRT_MAX)
 | 
			
		||||
            luaL_error(L, "too many results in match-time capture");
 | 
			
		||||
          if ((captop += n + 2) >= capsize) {
 | 
			
		||||
            capture = doublecap(L, capture, captop, n + 2, ptop);
 | 
			
		||||
            capsize = 2 * captop;
 | 
			
		||||
          }
 | 
			
		||||
          /* add new captures to 'capture' list */
 | 
			
		||||
          adddyncaptures(s, capture + captop - n - 2, n, fr); 
 | 
			
		||||
        }
 | 
			
		||||
        p++;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      case ICloseCapture: {
 | 
			
		||||
        const char *s1 = s;
 | 
			
		||||
        assert(captop > 0);
 | 
			
		||||
        /* if possible, turn capture into a full capture */
 | 
			
		||||
        if (capture[captop - 1].siz == 0 &&
 | 
			
		||||
            s1 - capture[captop - 1].s < UCHAR_MAX) {
 | 
			
		||||
          capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
 | 
			
		||||
          p++;
 | 
			
		||||
          continue;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
          capture[captop].siz = 1;  /* mark entry as closed */
 | 
			
		||||
          capture[captop].s = s;
 | 
			
		||||
          goto pushcapture;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      case IOpenCapture:
 | 
			
		||||
        capture[captop].siz = 0;  /* mark entry as open */
 | 
			
		||||
        capture[captop].s = s;
 | 
			
		||||
        goto pushcapture;
 | 
			
		||||
      case IFullCapture:
 | 
			
		||||
        capture[captop].siz = getoff(p) + 1;  /* save capture size */
 | 
			
		||||
        capture[captop].s = s - getoff(p);
 | 
			
		||||
        /* goto pushcapture; */
 | 
			
		||||
      pushcapture: {
 | 
			
		||||
        capture[captop].idx = p->i.key;
 | 
			
		||||
        capture[captop].kind = getkind(p);
 | 
			
		||||
        if (++captop >= capsize) {
 | 
			
		||||
          capture = doublecap(L, capture, captop, 0, ptop);
 | 
			
		||||
          capsize = 2 * captop;
 | 
			
		||||
        }
 | 
			
		||||
        p++;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      default: assert(0); return NULL;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* }====================================================== */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										58
									
								
								src/ext/lpeg/lpvm.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								src/ext/lpeg/lpvm.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
/*
 | 
			
		||||
** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#if !defined(lpvm_h)
 | 
			
		||||
#define lpvm_h
 | 
			
		||||
 | 
			
		||||
#include "lpcap.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Virtual Machine's instructions */
 | 
			
		||||
typedef enum Opcode {
 | 
			
		||||
  IAny, /* if no char, fail */
 | 
			
		||||
  IChar,  /* if char != aux, fail */
 | 
			
		||||
  ISet,  /* if char not in buff, fail */
 | 
			
		||||
  ITestAny,  /* in no char, jump to 'offset' */
 | 
			
		||||
  ITestChar,  /* if char != aux, jump to 'offset' */
 | 
			
		||||
  ITestSet,  /* if char not in buff, jump to 'offset' */
 | 
			
		||||
  ISpan,  /* read a span of chars in buff */
 | 
			
		||||
  IBehind,  /* walk back 'aux' characters (fail if not possible) */
 | 
			
		||||
  IRet,  /* return from a rule */
 | 
			
		||||
  IEnd,  /* end of pattern */
 | 
			
		||||
  IChoice,  /* stack a choice; next fail will jump to 'offset' */
 | 
			
		||||
  IJmp,  /* jump to 'offset' */
 | 
			
		||||
  ICall,  /* call rule at 'offset' */
 | 
			
		||||
  IOpenCall,  /* call rule number 'key' (must be closed to a ICall) */
 | 
			
		||||
  ICommit,  /* pop choice and jump to 'offset' */
 | 
			
		||||
  IPartialCommit,  /* update top choice to current position and jump */
 | 
			
		||||
  IBackCommit,  /* "fails" but jump to its own 'offset' */
 | 
			
		||||
  IFailTwice,  /* pop one choice and then fail */
 | 
			
		||||
  IFail,  /* go back to saved state on choice and jump to saved offset */
 | 
			
		||||
  IGiveup,  /* internal use */
 | 
			
		||||
  IFullCapture,  /* complete capture of last 'off' chars */
 | 
			
		||||
  IOpenCapture,  /* start a capture */
 | 
			
		||||
  ICloseCapture,
 | 
			
		||||
  ICloseRunTime
 | 
			
		||||
} Opcode;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef union Instruction {
 | 
			
		||||
  struct Inst {
 | 
			
		||||
    byte code;
 | 
			
		||||
    byte aux;
 | 
			
		||||
    short key;
 | 
			
		||||
  } i;
 | 
			
		||||
  int offset;
 | 
			
		||||
  byte buff[1];
 | 
			
		||||
} Instruction;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void printpatt (Instruction *p, int n);
 | 
			
		||||
const char *match (lua_State *L, const char *o, const char *s, const char *e,
 | 
			
		||||
                   Instruction *op, Capture *capture, int ptop);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										55
									
								
								src/ext/lpeg/makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								src/ext/lpeg/makefile
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,55 @@
 | 
			
		||||
LIBNAME = lpeg
 | 
			
		||||
LUADIR = ../lua/
 | 
			
		||||
 | 
			
		||||
COPT = -O2
 | 
			
		||||
# COPT = -DLPEG_DEBUG -g
 | 
			
		||||
 | 
			
		||||
CWARNS = -Wall -Wextra -pedantic \
 | 
			
		||||
	-Waggregate-return \
 | 
			
		||||
	-Wcast-align \
 | 
			
		||||
	-Wcast-qual \
 | 
			
		||||
	-Wdisabled-optimization \
 | 
			
		||||
	-Wpointer-arith \
 | 
			
		||||
	-Wshadow \
 | 
			
		||||
	-Wsign-compare \
 | 
			
		||||
	-Wundef \
 | 
			
		||||
	-Wwrite-strings \
 | 
			
		||||
	-Wbad-function-cast \
 | 
			
		||||
	-Wdeclaration-after-statement \
 | 
			
		||||
	-Wmissing-prototypes \
 | 
			
		||||
	-Wnested-externs \
 | 
			
		||||
	-Wstrict-prototypes \
 | 
			
		||||
# -Wunreachable-code \
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
 | 
			
		||||
CC = gcc
 | 
			
		||||
 | 
			
		||||
FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
 | 
			
		||||
 | 
			
		||||
# For Linux
 | 
			
		||||
linux:
 | 
			
		||||
	make lpeg.so "DLLFLAGS = -shared -fPIC"
 | 
			
		||||
 | 
			
		||||
# For Mac OS
 | 
			
		||||
macosx:
 | 
			
		||||
	make lpeg.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
 | 
			
		||||
 | 
			
		||||
lpeg.so: $(FILES)
 | 
			
		||||
	env $(CC) $(DLLFLAGS) $(FILES) -o lpeg.so
 | 
			
		||||
 | 
			
		||||
$(FILES): makefile
 | 
			
		||||
 | 
			
		||||
test: test.lua re.lua lpeg.so
 | 
			
		||||
	./test.lua
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
	rm -f $(FILES) lpeg.so
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
lpcap.o: lpcap.c lpcap.h lptypes.h
 | 
			
		||||
lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
 | 
			
		||||
lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
 | 
			
		||||
lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
 | 
			
		||||
lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										498
									
								
								src/ext/lpeg/re.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										498
									
								
								src/ext/lpeg/re.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,498 @@
 | 
			
		||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 | 
			
		||||
   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
    <title>LPeg.re - Regex syntax for LPEG</title>
 | 
			
		||||
    <link rel="stylesheet"
 | 
			
		||||
          href="http://www.inf.puc-rio.br/~roberto/lpeg/doc.css"
 | 
			
		||||
          type="text/css"/>
 | 
			
		||||
	<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
 | 
			
		||||
</head>
 | 
			
		||||
<body>
 | 
			
		||||
 | 
			
		||||
<!-- $Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $ -->
 | 
			
		||||
 | 
			
		||||
<div id="container">
 | 
			
		||||
	
 | 
			
		||||
<div id="product">
 | 
			
		||||
  <div id="product_logo">
 | 
			
		||||
    <a href="http://www.inf.puc-rio.br/~roberto/lpeg/">
 | 
			
		||||
    <img alt="LPeg logo" src="lpeg-128.gif"/>
 | 
			
		||||
    </a>
 | 
			
		||||
  </div>
 | 
			
		||||
  <div id="product_name"><big><strong>LPeg.re</strong></big></div>
 | 
			
		||||
  <div id="product_description">
 | 
			
		||||
     Regex syntax for LPEG
 | 
			
		||||
  </div>
 | 
			
		||||
</div> <!-- id="product" -->
 | 
			
		||||
 | 
			
		||||
<div id="main">
 | 
			
		||||
	
 | 
			
		||||
<div id="navigation">
 | 
			
		||||
<h1>re</h1>
 | 
			
		||||
 | 
			
		||||
<ul>
 | 
			
		||||
  <li><a href="#basic">Basic Constructions</a></li>
 | 
			
		||||
  <li><a href="#func">Functions</a></li>
 | 
			
		||||
  <li><a href="#ex">Some Examples</a></li>
 | 
			
		||||
  <li><a href="#license">License</a></li>
 | 
			
		||||
  </ul>
 | 
			
		||||
  </li>
 | 
			
		||||
</ul>
 | 
			
		||||
</div> <!-- id="navigation" -->
 | 
			
		||||
 | 
			
		||||
<div id="content">
 | 
			
		||||
 | 
			
		||||
<h2><a name="basic"></a>The <code>re</code> Module</h2>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
The <code>re</code> module
 | 
			
		||||
(provided by file <code>re.lua</code> in the distribution)
 | 
			
		||||
supports a somewhat conventional regex syntax
 | 
			
		||||
for pattern usage within <a href="lpeg.html">LPeg</a>.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
The next table summarizes <code>re</code>'s syntax.
 | 
			
		||||
A <code>p</code> represents an arbitrary pattern;
 | 
			
		||||
<code>num</code> represents a number (<code>[0-9]+</code>);
 | 
			
		||||
<code>name</code> represents an identifier
 | 
			
		||||
(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
 | 
			
		||||
Constructions are listed in order of decreasing precedence.
 | 
			
		||||
<table border="1">
 | 
			
		||||
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
 | 
			
		||||
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
 | 
			
		||||
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
 | 
			
		||||
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
 | 
			
		||||
<tr><td><code>[class]</code></td> <td>character class</td></tr>
 | 
			
		||||
<tr><td><code>.</code></td> <td>any character</td></tr>
 | 
			
		||||
<tr><td><code>%name</code></td>
 | 
			
		||||
  <td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
 | 
			
		||||
<tr><td><code>name</code></td><td>non terminal</td></tr>
 | 
			
		||||
<tr><td><code><name></code></td><td>non terminal</td></tr>
 | 
			
		||||
<tr><td><code>{}</code></td> <td>position capture</td></tr>
 | 
			
		||||
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
 | 
			
		||||
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
 | 
			
		||||
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
 | 
			
		||||
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
 | 
			
		||||
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
 | 
			
		||||
<tr><td><code>=name</code></td> <td>back reference
 | 
			
		||||
</td></tr>
 | 
			
		||||
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
 | 
			
		||||
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
 | 
			
		||||
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
 | 
			
		||||
<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
 | 
			
		||||
<tr><td><code>p^+num</code></td>
 | 
			
		||||
      <td>at least <code>n</code> repetitions</td></tr>
 | 
			
		||||
<tr><td><code>p^-num</code></td>
 | 
			
		||||
      <td>at most <code>n</code> repetitions</td></tr>
 | 
			
		||||
<tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr>
 | 
			
		||||
<tr><td><code>p -> "string"</code></td> <td>string capture</td></tr>
 | 
			
		||||
<tr><td><code>p -> num</code></td> <td>numbered capture</td></tr>
 | 
			
		||||
<tr><td><code>p -> name</code></td> <td>function/query/string capture
 | 
			
		||||
equivalent to <code>p / defs[name]</code></td></tr>
 | 
			
		||||
<tr><td><code>p => name</code></td> <td>match-time capture
 | 
			
		||||
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
 | 
			
		||||
<tr><td><code>& p</code></td> <td>and predicate</td></tr>
 | 
			
		||||
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
 | 
			
		||||
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
 | 
			
		||||
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
 | 
			
		||||
<tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
 | 
			
		||||
</tbody></table>
 | 
			
		||||
<p>
 | 
			
		||||
Any space appearing in a syntax description can be
 | 
			
		||||
replaced by zero or more space characters and Lua-style comments
 | 
			
		||||
(<code>--</code> until end of line).
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
Character classes define sets of characters.
 | 
			
		||||
An initial <code>^</code> complements the resulting set.
 | 
			
		||||
A range <em>x</em><code>-</code><em>y</em> includes in the set
 | 
			
		||||
all characters with codes between the codes of <em>x</em> and <em>y</em>.
 | 
			
		||||
A pre-defined class <code>%</code><em>name</em> includes all
 | 
			
		||||
characters of that class.
 | 
			
		||||
A simple character includes itself in the set.
 | 
			
		||||
The only special characters inside a class are <code>^</code>
 | 
			
		||||
(special only if it is the first character);
 | 
			
		||||
<code>]</code>
 | 
			
		||||
(can be included in the set as the first character,
 | 
			
		||||
after the optional <code>^</code>);
 | 
			
		||||
<code>%</code> (special only if followed by a letter);
 | 
			
		||||
and <code>-</code>
 | 
			
		||||
(can be included in the set as the first or the last character).
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
Currently the pre-defined classes are similar to those from the
 | 
			
		||||
Lua's string library
 | 
			
		||||
(<code>%a</code> for letters,
 | 
			
		||||
<code>%A</code> for non letters, etc.).
 | 
			
		||||
There is also a class <code>%nl</code>
 | 
			
		||||
containing only the newline character,
 | 
			
		||||
which is particularly handy for grammars written inside long strings,
 | 
			
		||||
as long strings do not interpret escape sequences like <code>\n</code>.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<h2><a name="func">Functions</a></h2>
 | 
			
		||||
 | 
			
		||||
<h3><code>re.compile (string, [, defs])</code></h3>
 | 
			
		||||
<p>
 | 
			
		||||
Compiles the given string and
 | 
			
		||||
returns an equivalent LPeg pattern.
 | 
			
		||||
The given string may define either an expression or a grammar.
 | 
			
		||||
The optional <code>defs</code> table provides extra Lua values
 | 
			
		||||
to be used by the pattern.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<h3><code>re.find (subject, pattern [, init])</code></h3>
 | 
			
		||||
<p>
 | 
			
		||||
Searches the given pattern in the given subject.
 | 
			
		||||
If it finds a match,
 | 
			
		||||
returns the index where this occurrence starts and
 | 
			
		||||
the index where it ends.
 | 
			
		||||
Otherwise, returns nil.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
An optional numeric argument <code>init</code> makes the search
 | 
			
		||||
starts at that position in the subject string.
 | 
			
		||||
As usual in Lua libraries,
 | 
			
		||||
a negative value counts from the end.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<h3><code>re.gsub (subject, pattern, replacement)</code></h3>
 | 
			
		||||
<p>
 | 
			
		||||
Does a <em>global substitution</em>,
 | 
			
		||||
replacing all occurrences of <code>pattern</code>
 | 
			
		||||
in the given <code>subject</code> by <code>replacement</code>.
 | 
			
		||||
 | 
			
		||||
<h3><code>re.match (subject, pattern)</code></h3>
 | 
			
		||||
<p>
 | 
			
		||||
Matches the given pattern against the given subject,
 | 
			
		||||
returning all captures.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<h3><code>re.updatelocale ()</code></h3>
 | 
			
		||||
<p>
 | 
			
		||||
Updates the pre-defined character classes to the current locale.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<h2><a name="ex">Some Examples</a></h2>
 | 
			
		||||
 | 
			
		||||
<h3>A complete simple program</h3>
 | 
			
		||||
<p>
 | 
			
		||||
The next code shows a simple complete Lua program using
 | 
			
		||||
the <code>re</code> module:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
local re = require"re"
 | 
			
		||||
 | 
			
		||||
-- find the position of the first numeral in a string
 | 
			
		||||
print(re.find("the number 423 is odd", "[0-9]+"))  --> 12    14
 | 
			
		||||
 | 
			
		||||
-- returns all words in a string
 | 
			
		||||
print(re.match("the number 423 is odd", "({%a+} / .)*"))
 | 
			
		||||
--> the    number    is    odd
 | 
			
		||||
 | 
			
		||||
-- returns the first numeral in a string
 | 
			
		||||
print(re.match("the number 423 is odd", "s <- {%d+} / . s"))
 | 
			
		||||
--> 423
 | 
			
		||||
 | 
			
		||||
print(re.gsub("hello World", "[aeiou]", "."))
 | 
			
		||||
--> h.ll. W.rld
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<h3>Balanced parentheses</h3>
 | 
			
		||||
<p>
 | 
			
		||||
The following call will produce the same pattern produced by the
 | 
			
		||||
Lua expression in the
 | 
			
		||||
<a href="lpeg.html#balanced">balanced parentheses</a> example:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
b = re.compile[[  balanced <- "(" ([^()] / balanced)* ")"  ]]
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
<h3>String reversal</h3>
 | 
			
		||||
<p>
 | 
			
		||||
The next example reverses a string:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
 | 
			
		||||
print(rev:match"0123456789")   --> 9876543210
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
<h3>CSV decoder</h3>
 | 
			
		||||
<p>
 | 
			
		||||
The next example replicates the <a href="lpeg.html#CSV">CSV decoder</a>:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
record = re.compile[[
 | 
			
		||||
  record <- {| field (',' field)* |} (%nl / !.)
 | 
			
		||||
  field <- escaped / nonescaped
 | 
			
		||||
  nonescaped <- { [^,"%nl]* }
 | 
			
		||||
  escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
 | 
			
		||||
]]
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
<h3>Lua's long strings</h3>
 | 
			
		||||
<p>
 | 
			
		||||
The next example matches Lua long strings:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
c = re.compile([[
 | 
			
		||||
  longstring <- ('[' {:eq: '='* :} '[' close)
 | 
			
		||||
  close <- ']' =eq ']' / . close
 | 
			
		||||
]])
 | 
			
		||||
 | 
			
		||||
print(c:match'[==[]]===]]]]==]===[]')   --> 17
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
<h3>Abstract Syntax Trees</h3>
 | 
			
		||||
<p>
 | 
			
		||||
This example shows a simple way to build an
 | 
			
		||||
abstract syntax tree (AST) for a given grammar.
 | 
			
		||||
To keep our example simple,
 | 
			
		||||
let us consider the following grammar
 | 
			
		||||
for lists of names:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
p = re.compile[[
 | 
			
		||||
      listname <- (name s)*
 | 
			
		||||
      name <- [a-z][a-z]*
 | 
			
		||||
      s <- %s*
 | 
			
		||||
]]
 | 
			
		||||
</pre>
 | 
			
		||||
<p>
 | 
			
		||||
Now, we will add captures to build a corresponding AST.
 | 
			
		||||
As a first step, the pattern will build a table to
 | 
			
		||||
represent each non terminal;
 | 
			
		||||
terminals will be represented by their corresponding strings:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
c = re.compile[[
 | 
			
		||||
      listname <- {| (name s)* |}
 | 
			
		||||
      name <- {| {[a-z][a-z]*} |}
 | 
			
		||||
      s <- %s*
 | 
			
		||||
]]
 | 
			
		||||
</pre>
 | 
			
		||||
<p>
 | 
			
		||||
Now, a match against <code>"hi hello bye"</code>
 | 
			
		||||
results in the table
 | 
			
		||||
<code>{{"hi"}, {"hello"}, {"bye"}}</code>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
For such a simple grammar,
 | 
			
		||||
this AST is more than enough;
 | 
			
		||||
actually, the tables around each single name
 | 
			
		||||
are already overkilling.
 | 
			
		||||
More complex grammars,
 | 
			
		||||
however, may need some more structure.
 | 
			
		||||
Specifically,
 | 
			
		||||
it would be useful if each table had
 | 
			
		||||
a <code>tag</code> field telling what non terminal
 | 
			
		||||
that table represents.
 | 
			
		||||
We can add such a tag using
 | 
			
		||||
<a href="lpeg.html#cap-g">named group captures</a>:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
x = re.compile[[
 | 
			
		||||
      listname <- {| {:tag: '' -> 'list':} (name s)* |}
 | 
			
		||||
      name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
 | 
			
		||||
      s <- ' '*
 | 
			
		||||
]]
 | 
			
		||||
</pre>
 | 
			
		||||
<p>
 | 
			
		||||
With these group captures,
 | 
			
		||||
a match against <code>"hi hello bye"</code>
 | 
			
		||||
results in the following table:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
{tag="list",
 | 
			
		||||
  {tag="id", "hi"},
 | 
			
		||||
  {tag="id", "hello"},
 | 
			
		||||
  {tag="id", "bye"}
 | 
			
		||||
}
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<h3>Indented blocks</h3>
 | 
			
		||||
<p>
 | 
			
		||||
This example breaks indented blocks into tables,
 | 
			
		||||
respecting the indentation:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
p = re.compile[[
 | 
			
		||||
  block <- {| {:ident:' '*:} line
 | 
			
		||||
           ((=ident !' ' line) / &(=ident ' ') block)* |}
 | 
			
		||||
  line <- {[^%nl]*} %nl
 | 
			
		||||
]]
 | 
			
		||||
</pre>
 | 
			
		||||
<p>
 | 
			
		||||
As an example,
 | 
			
		||||
consider the following text:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
t = p:match[[
 | 
			
		||||
first line
 | 
			
		||||
  subline 1
 | 
			
		||||
  subline 2
 | 
			
		||||
second line
 | 
			
		||||
third line
 | 
			
		||||
  subline 3.1
 | 
			
		||||
    subline 3.1.1
 | 
			
		||||
  subline 3.2
 | 
			
		||||
]]
 | 
			
		||||
</pre>
 | 
			
		||||
<p>
 | 
			
		||||
The resulting table <code>t</code> will be like this:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
   {'first line'; {'subline 1'; 'subline 2'; ident = '  '};
 | 
			
		||||
    'second line';
 | 
			
		||||
    'third line'; { 'subline 3.1'; {'subline 3.1.1'; ident = '    '};
 | 
			
		||||
                    'subline 3.2'; ident = '  '};
 | 
			
		||||
    ident = ''}
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
<h3>Macro expander</h3>
 | 
			
		||||
<p>
 | 
			
		||||
This example implements a simple macro expander.
 | 
			
		||||
Macros must be defined as part of the pattern,
 | 
			
		||||
following some simple rules:
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
p = re.compile[[
 | 
			
		||||
      text <- {~ item* ~}
 | 
			
		||||
      item <- macro / [^()] / '(' item* ')'
 | 
			
		||||
      arg <- ' '* {~ (!',' item)* ~}
 | 
			
		||||
      args <- '(' arg (',' arg)* ')'
 | 
			
		||||
      -- now we define some macros
 | 
			
		||||
      macro <- ('apply' args) -> '%1(%2)'
 | 
			
		||||
             / ('add' args) -> '%1 + %2'
 | 
			
		||||
             / ('mul' args) -> '%1 * %2'
 | 
			
		||||
]]
 | 
			
		||||
 | 
			
		||||
print(p:match"add(mul(a,b), apply(f,x))")   --> a * b + f(x)
 | 
			
		||||
</pre>
 | 
			
		||||
<p>
 | 
			
		||||
A <code>text</code> is a sequence of items,
 | 
			
		||||
wherein we apply a substitution capture to expand any macros.
 | 
			
		||||
An <code>item</code> is either a macro,
 | 
			
		||||
any character different from parentheses,
 | 
			
		||||
or a parenthesized expression.
 | 
			
		||||
A macro argument (<code>arg</code>) is a sequence
 | 
			
		||||
of items different from a comma.
 | 
			
		||||
(Note that a comma may appear inside an item,
 | 
			
		||||
e.g., inside a parenthesized expression.)
 | 
			
		||||
Again we do a substitution capture to expand any macro
 | 
			
		||||
in the argument before expanding the outer macro.
 | 
			
		||||
<code>args</code> is a list of arguments separated by commas.
 | 
			
		||||
Finally we define the macros.
 | 
			
		||||
Each macro is a string substitution;
 | 
			
		||||
it replaces the macro name and its arguments by its corresponding string,
 | 
			
		||||
with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<h3>Patterns</h3>
 | 
			
		||||
<p>
 | 
			
		||||
This example shows the complete syntax
 | 
			
		||||
of patterns accepted by <code>re</code>.
 | 
			
		||||
</p>
 | 
			
		||||
<pre class="example">
 | 
			
		||||
p = [=[
 | 
			
		||||
 | 
			
		||||
pattern         <- exp !.
 | 
			
		||||
exp             <- S (grammar / alternative)
 | 
			
		||||
 | 
			
		||||
alternative     <- seq ('/' S seq)*
 | 
			
		||||
seq             <- prefix*
 | 
			
		||||
prefix          <- '&' S prefix / '!' S prefix / suffix
 | 
			
		||||
suffix          <- primary S (([+*?]
 | 
			
		||||
                            / '^' [+-]? num
 | 
			
		||||
                            / '->' S (string / '{}' / name)
 | 
			
		||||
                            / '=>' S name) S)*
 | 
			
		||||
 | 
			
		||||
primary         <- '(' exp ')' / string / class / defined
 | 
			
		||||
                 / '{:' (name ':')? exp ':}'
 | 
			
		||||
                 / '=' name
 | 
			
		||||
                 / '{}'
 | 
			
		||||
                 / '{~' exp '~}'
 | 
			
		||||
                 / '{' exp '}'
 | 
			
		||||
                 / '.'
 | 
			
		||||
                 / name S !arrow
 | 
			
		||||
                 / '<' name '>'          -- old-style non terminals
 | 
			
		||||
 | 
			
		||||
grammar         <- definition+
 | 
			
		||||
definition      <- name S arrow exp
 | 
			
		||||
 | 
			
		||||
class           <- '[' '^'? item (!']' item)* ']'
 | 
			
		||||
item            <- defined / range / .
 | 
			
		||||
range           <- . '-' [^]]
 | 
			
		||||
 | 
			
		||||
S               <- (%s / '--' [^%nl]*)*   -- spaces and comments
 | 
			
		||||
name            <- [A-Za-z][A-Za-z0-9_]*
 | 
			
		||||
arrow           <- '<-'
 | 
			
		||||
num             <- [0-9]+
 | 
			
		||||
string          <- '"' [^"]* '"' / "'" [^']* "'"
 | 
			
		||||
defined         <- '%' name
 | 
			
		||||
 | 
			
		||||
]=]
 | 
			
		||||
 | 
			
		||||
print(re.match(p, p))   -- a self description must match itself
 | 
			
		||||
</pre>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<h2><a name="license">License</a></h2>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
Copyright © 2008-2015 Lua.org, PUC-Rio.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
Permission is hereby granted, free of charge,
 | 
			
		||||
to any person obtaining a copy of this software and
 | 
			
		||||
associated documentation files (the "Software"),
 | 
			
		||||
to deal in the Software without restriction,
 | 
			
		||||
including without limitation the rights to use,
 | 
			
		||||
copy, modify, merge, publish, distribute, sublicense,
 | 
			
		||||
and/or sell copies of the Software,
 | 
			
		||||
and to permit persons to whom the Software is
 | 
			
		||||
furnished to do so,
 | 
			
		||||
subject to the following conditions:
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
The above copyright notice and this permission notice
 | 
			
		||||
shall be included in all copies or substantial portions of the Software.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | 
			
		||||
EXPRESS OR IMPLIED,
 | 
			
		||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 | 
			
		||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 | 
			
		||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 | 
			
		||||
TORT OR OTHERWISE, ARISING FROM,
 | 
			
		||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | 
			
		||||
THE SOFTWARE.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
</div> <!-- id="content" -->
 | 
			
		||||
 | 
			
		||||
</div> <!-- id="main" -->
 | 
			
		||||
 | 
			
		||||
<div id="about">
 | 
			
		||||
<p><small>
 | 
			
		||||
$Id: re.html,v 1.24 2016/09/20 17:41:27 roberto Exp $
 | 
			
		||||
</small></p>
 | 
			
		||||
</div> <!-- id="about" -->
 | 
			
		||||
 | 
			
		||||
</div> <!-- id="container" -->
 | 
			
		||||
 | 
			
		||||
</body>
 | 
			
		||||
</html> 
 | 
			
		||||
							
								
								
									
										259
									
								
								src/ext/lpeg/re.lua
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								src/ext/lpeg/re.lua
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,259 @@
 | 
			
		||||
-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
 | 
			
		||||
 | 
			
		||||
-- imported functions and modules
 | 
			
		||||
local tonumber, type, print, error = tonumber, type, print, error
 | 
			
		||||
local setmetatable = setmetatable
 | 
			
		||||
local m = require"lpeg"
 | 
			
		||||
 | 
			
		||||
-- 'm' will be used to parse expressions, and 'mm' will be used to
 | 
			
		||||
-- create expressions; that is, 're' runs on 'm', creating patterns
 | 
			
		||||
-- on 'mm'
 | 
			
		||||
local mm = m
 | 
			
		||||
 | 
			
		||||
-- pattern's metatable
 | 
			
		||||
local mt = getmetatable(mm.P(0))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
-- No more global accesses after this point
 | 
			
		||||
local version = _VERSION
 | 
			
		||||
if version == "Lua 5.2" then _ENV = nil end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local any = m.P(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
-- Pre-defined names
 | 
			
		||||
local Predef = { nl = m.P"\n" }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local mem
 | 
			
		||||
local fmem
 | 
			
		||||
local gmem
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local function updatelocale ()
 | 
			
		||||
  mm.locale(Predef)
 | 
			
		||||
  Predef.a = Predef.alpha
 | 
			
		||||
  Predef.c = Predef.cntrl
 | 
			
		||||
  Predef.d = Predef.digit
 | 
			
		||||
  Predef.g = Predef.graph
 | 
			
		||||
  Predef.l = Predef.lower
 | 
			
		||||
  Predef.p = Predef.punct
 | 
			
		||||
  Predef.s = Predef.space
 | 
			
		||||
  Predef.u = Predef.upper
 | 
			
		||||
  Predef.w = Predef.alnum
 | 
			
		||||
  Predef.x = Predef.xdigit
 | 
			
		||||
  Predef.A = any - Predef.a
 | 
			
		||||
  Predef.C = any - Predef.c
 | 
			
		||||
  Predef.D = any - Predef.d
 | 
			
		||||
  Predef.G = any - Predef.g
 | 
			
		||||
  Predef.L = any - Predef.l
 | 
			
		||||
  Predef.P = any - Predef.p
 | 
			
		||||
  Predef.S = any - Predef.s
 | 
			
		||||
  Predef.U = any - Predef.u
 | 
			
		||||
  Predef.W = any - Predef.w
 | 
			
		||||
  Predef.X = any - Predef.x
 | 
			
		||||
  mem = {}    -- restart memoization
 | 
			
		||||
  fmem = {}
 | 
			
		||||
  gmem = {}
 | 
			
		||||
  local mt = {__mode = "v"}
 | 
			
		||||
  setmetatable(mem, mt)
 | 
			
		||||
  setmetatable(fmem, mt)
 | 
			
		||||
  setmetatable(gmem, mt)
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
updatelocale()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local function getdef (id, defs)
 | 
			
		||||
  local c = defs and defs[id]
 | 
			
		||||
  if not c then error("undefined name: " .. id) end
 | 
			
		||||
  return c
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local function patt_error (s, i)
 | 
			
		||||
  local msg = (#s < i + 20) and s:sub(i)
 | 
			
		||||
                             or s:sub(i,i+20) .. "..."
 | 
			
		||||
  msg = ("pattern error near '%s'"):format(msg)
 | 
			
		||||
  error(msg, 2)
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function mult (p, n)
 | 
			
		||||
  local np = mm.P(true)
 | 
			
		||||
  while n >= 1 do
 | 
			
		||||
    if n%2 >= 1 then np = np * p end
 | 
			
		||||
    p = p * p
 | 
			
		||||
    n = n/2
 | 
			
		||||
  end
 | 
			
		||||
  return np
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function equalcap (s, i, c)
 | 
			
		||||
  if type(c) ~= "string" then return nil end
 | 
			
		||||
  local e = #c + i
 | 
			
		||||
  if s:sub(i, e - 1) == c then return e else return nil end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
 | 
			
		||||
 | 
			
		||||
local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
 | 
			
		||||
 | 
			
		||||
local arrow = S * "<-"
 | 
			
		||||
 | 
			
		||||
local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
 | 
			
		||||
 | 
			
		||||
name = m.C(name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
-- a defined name only have meaning in a given environment
 | 
			
		||||
local Def = name * m.Carg(1)
 | 
			
		||||
 | 
			
		||||
local num = m.C(m.R"09"^1) * S / tonumber
 | 
			
		||||
 | 
			
		||||
local String = "'" * m.C((any - "'")^0) * "'" +
 | 
			
		||||
               '"' * m.C((any - '"')^0) * '"'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local defined = "%" * Def / function (c,Defs)
 | 
			
		||||
  local cat =  Defs and Defs[c] or Predef[c]
 | 
			
		||||
  if not cat then error ("name '" .. c .. "' undefined") end
 | 
			
		||||
  return cat
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
 | 
			
		||||
 | 
			
		||||
local item = defined + Range + m.C(any)
 | 
			
		||||
 | 
			
		||||
local Class =
 | 
			
		||||
    "["
 | 
			
		||||
  * (m.C(m.P"^"^-1))    -- optional complement symbol
 | 
			
		||||
  * m.Cf(item * (item - "]")^0, mt.__add) /
 | 
			
		||||
                          function (c, p) return c == "^" and any - p or p end
 | 
			
		||||
  * "]"
 | 
			
		||||
 | 
			
		||||
local function adddef (t, k, exp)
 | 
			
		||||
  if t[k] then
 | 
			
		||||
    error("'"..k.."' already defined as a rule")
 | 
			
		||||
  else
 | 
			
		||||
    t[k] = exp
 | 
			
		||||
  end
 | 
			
		||||
  return t
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function firstdef (n, r) return adddef({n}, n, r) end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local function NT (n, b)
 | 
			
		||||
  if not b then
 | 
			
		||||
    error("rule '"..n.."' used outside a grammar")
 | 
			
		||||
  else return mm.V(n)
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local exp = m.P{ "Exp",
 | 
			
		||||
  Exp = S * ( m.V"Grammar"
 | 
			
		||||
            + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
 | 
			
		||||
  Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
 | 
			
		||||
        * (#seq_follow + patt_error);
 | 
			
		||||
  Prefix = "&" * S * m.V"Prefix" / mt.__len
 | 
			
		||||
         + "!" * S * m.V"Prefix" / mt.__unm
 | 
			
		||||
         + m.V"Suffix";
 | 
			
		||||
  Suffix = m.Cf(m.V"Primary" * S *
 | 
			
		||||
          ( ( m.P"+" * m.Cc(1, mt.__pow)
 | 
			
		||||
            + m.P"*" * m.Cc(0, mt.__pow)
 | 
			
		||||
            + m.P"?" * m.Cc(-1, mt.__pow)
 | 
			
		||||
            + "^" * ( m.Cg(num * m.Cc(mult))
 | 
			
		||||
                    + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
 | 
			
		||||
                    )
 | 
			
		||||
            + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
 | 
			
		||||
                         + m.P"{}" * m.Cc(nil, m.Ct)
 | 
			
		||||
                         + m.Cg(Def / getdef * m.Cc(mt.__div))
 | 
			
		||||
                         )
 | 
			
		||||
            + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
 | 
			
		||||
            ) * S
 | 
			
		||||
          )^0, function (a,b,f) return f(a,b) end );
 | 
			
		||||
  Primary = "(" * m.V"Exp" * ")"
 | 
			
		||||
            + String / mm.P
 | 
			
		||||
            + Class
 | 
			
		||||
            + defined
 | 
			
		||||
            + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
 | 
			
		||||
                     function (n, p) return mm.Cg(p, n) end
 | 
			
		||||
            + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
 | 
			
		||||
            + m.P"{}" / mm.Cp
 | 
			
		||||
            + "{~" * m.V"Exp" * "~}" / mm.Cs
 | 
			
		||||
            + "{|" * m.V"Exp" * "|}" / mm.Ct
 | 
			
		||||
            + "{" * m.V"Exp" * "}" / mm.C
 | 
			
		||||
            + m.P"." * m.Cc(any)
 | 
			
		||||
            + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
 | 
			
		||||
  Definition = name * arrow * m.V"Exp";
 | 
			
		||||
  Grammar = m.Cg(m.Cc(true), "G") *
 | 
			
		||||
            m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
 | 
			
		||||
              adddef) / mm.P
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
local function compile (p, defs)
 | 
			
		||||
  if mm.type(p) == "pattern" then return p end   -- already compiled
 | 
			
		||||
  local cp = pattern:match(p, 1, defs)
 | 
			
		||||
  if not cp then error("incorrect pattern", 3) end
 | 
			
		||||
  return cp
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function match (s, p, i)
 | 
			
		||||
  local cp = mem[p]
 | 
			
		||||
  if not cp then
 | 
			
		||||
    cp = compile(p)
 | 
			
		||||
    mem[p] = cp
 | 
			
		||||
  end
 | 
			
		||||
  return cp:match(s, i or 1)
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function find (s, p, i)
 | 
			
		||||
  local cp = fmem[p]
 | 
			
		||||
  if not cp then
 | 
			
		||||
    cp = compile(p) / 0
 | 
			
		||||
    cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
 | 
			
		||||
    fmem[p] = cp
 | 
			
		||||
  end
 | 
			
		||||
  local i, e = cp:match(s, i or 1)
 | 
			
		||||
  if i then return i, e - 1
 | 
			
		||||
  else return i
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
local function gsub (s, p, rep)
 | 
			
		||||
  local g = gmem[p] or {}   -- ensure gmem[p] is not collected while here
 | 
			
		||||
  gmem[p] = g
 | 
			
		||||
  local cp = g[rep]
 | 
			
		||||
  if not cp then
 | 
			
		||||
    cp = compile(p)
 | 
			
		||||
    cp = mm.Cs((cp / rep + 1)^0)
 | 
			
		||||
    g[rep] = cp
 | 
			
		||||
  end
 | 
			
		||||
  return cp:match(s)
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
-- exported names
 | 
			
		||||
local re = {
 | 
			
		||||
  compile = compile,
 | 
			
		||||
  match = match,
 | 
			
		||||
  find = find,
 | 
			
		||||
  gsub = gsub,
 | 
			
		||||
  updatelocale = updatelocale,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
if version == "Lua 5.1" then _G.re = re end
 | 
			
		||||
 | 
			
		||||
return re
 | 
			
		||||
							
								
								
									
										1503
									
								
								src/ext/lpeg/test.lua
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										1503
									
								
								src/ext/lpeg/test.lua
									
									
									
									
									
										Executable file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user