jscript: Cleanup regular expressions engine API.

This commit is contained in:
Piotr Caban 2013-02-19 10:12:12 +01:00 committed by Alexandre Julliard
parent 4c6a1d1d92
commit 67c7cc5517
3 changed files with 128 additions and 109 deletions

View file

@ -28,7 +28,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript);
typedef struct {
jsdisp_t dispex;
JSRegExp *jsregexp;
regexp_t *jsregexp;
jsstr_t *str;
INT last_index;
jsval_t last_index_val;
@ -77,12 +77,15 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
jsstr_t *str, const WCHAR **cp, match_result_t **parens, DWORD *parens_size,
DWORD *parens_cnt, match_result_t *ret)
{
REMatchState *result;
DWORD matchlen;
match_state_t *result;
HRESULT hres;
hres = MatchRegExpNext(regexp->jsregexp, str->str, jsstr_length(str),
cp, &ctx->tmp_heap, &result, &matchlen);
result = alloc_match_state(regexp->jsregexp, &ctx->tmp_heap, *cp);
if(!result)
return E_OUTOFMEMORY;
hres = regexp_execute(regexp->jsregexp, ctx, &ctx->tmp_heap,
str->str, jsstr_length(str), result);
if(FAILED(hres))
return hres;
if(hres == S_FALSE) {
@ -90,19 +93,20 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
set_last_index(regexp, 0);
return S_FALSE;
}
*cp = result->cp;
if(parens) {
if(regexp->jsregexp->parenCount > *parens_size) {
if(result->paren_count > *parens_size) {
match_result_t *new_parens;
if(*parens)
new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
new_parens = heap_realloc(*parens, sizeof(match_result_t)*result->paren_count);
else
new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
new_parens = heap_alloc(sizeof(match_result_t)*result->paren_count);
if(!new_parens)
return E_OUTOFMEMORY;
*parens_size = regexp->jsregexp->parenCount;
*parens_size = result->paren_count;
*parens = new_parens;
}
}
@ -115,9 +119,9 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
if(parens) {
DWORD i;
*parens_cnt = regexp->jsregexp->parenCount;
*parens_cnt = result->paren_count;
for(i=0; i < regexp->jsregexp->parenCount; i++) {
for(i=0; i < result->paren_count; i++) {
if(result->parens[i].index == -1) {
(*parens)[i].str = NULL;
(*parens)[i].len = 0;
@ -129,7 +133,7 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
}
if(!(rem_flags & REM_NO_CTX_UPDATE)) {
DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), regexp->jsregexp->parenCount);
DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), result->paren_count);
for(i=0; i < n; i++) {
if(result->parens[i].index == -1) {
@ -145,13 +149,13 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
memset(ctx->match_parens+n, 0, sizeof(ctx->match_parens) - n*sizeof(ctx->match_parens[0]));
}
ret->str = result->cp-matchlen;
ret->len = matchlen;
ret->str = result->cp - result->match_len;
ret->len = result->match_len;
set_last_index(regexp, result->cp-str->str);
if(!(rem_flags & REM_NO_CTX_UPDATE)) {
ctx->last_match_index = ret->str-str->str;
ctx->last_match_length = matchlen;
ctx->last_match_length = result->match_len;
}
return S_OK;
@ -518,7 +522,7 @@ static void RegExp_destructor(jsdisp_t *dispex)
RegExpInstance *This = (RegExpInstance*)dispex;
if(This->jsregexp)
js_DestroyRegExp(This->jsregexp);
regexp_destroy(This->jsregexp);
jsval_release(This->last_index_val);
jsstr_release(This->str);
heap_free(This);
@ -598,10 +602,10 @@ HRESULT create_regexp(script_ctx_t *ctx, jsstr_t *src, DWORD flags, jsdisp_t **r
regexp->str = jsstr_addref(src);
regexp->last_index_val = jsval_number(0);
regexp->jsregexp = js_NewRegExp(ctx, &ctx->tmp_heap, regexp->str->str,
regexp->jsregexp = regexp_new(ctx, &ctx->tmp_heap, regexp->str->str,
jsstr_length(regexp->str), flags, FALSE);
if(!regexp->jsregexp) {
WARN("js_NewRegExp failed\n");
if(FAILED(hres)) {
WARN("regexp_new failed\n");
jsdisp_release(&regexp->dispex);
return E_FAIL;
}

View file

@ -49,6 +49,30 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript);
#define JS_ReportOutOfMemory(a)
#define JS_COUNT_OPERATION(a,b)
typedef BYTE JSPackedBool;
/*
* This struct holds a bitmap representation of a class from a regexp.
* There's a list of these referenced by the classList field in the regexp_t
* struct below. The initial state has startIndex set to the offset in the
* original regexp source of the beginning of the class contents. The first
* use of the class converts the source representation into a bitmap.
*
*/
typedef struct RECharSet {
JSPackedBool converted;
JSPackedBool sense;
WORD length;
union {
BYTE *bits;
struct {
size_t startIndex;
size_t length;
} src;
} u;
} RECharSet;
#define JSMSG_MIN_TOO_BIG 47
#define JSMSG_MAX_TOO_BIG 48
#define JSMSG_OUT_OF_ORDER 49
@ -209,7 +233,7 @@ typedef struct REBackTrackData {
typedef struct REGlobalData {
void *cx;
JSRegExp *regexp; /* the RE in execution */
regexp_t *regexp; /* the RE in execution */
BOOL ok; /* runtime error (out_of_memory only?) */
size_t start; /* offset to start at */
ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
@ -285,7 +309,7 @@ typedef struct CompilerState {
} classCache[CLASS_CACHE_SIZE];
WORD flags;
heap_pool_t *pool; /* It's faster to use one malloc'd pool
heap_pool_t *pool; /* It's faster to use one malloc'd pool
than to malloc/free */
} CompilerState;
@ -448,7 +472,7 @@ SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
* of recursion.
*/
static jsbytecode *
EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth,
jsbytecode *pc, RENode *t)
{
EmitStateStackEntry *emitStateSP, *emitStateStack;
@ -1896,7 +1920,7 @@ out:
*/
static REBackTrackData *
PushBackTrackState(REGlobalData *gData, REOp op,
jsbytecode *target, REMatchState *x, const WCHAR *cp,
jsbytecode *target, match_state_t *x, const WCHAR *cp,
size_t parenIndex, size_t parenCount)
{
size_t i;
@ -1955,8 +1979,8 @@ PushBackTrackState(REGlobalData *gData, REOp op,
return result;
}
static inline REMatchState *
FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars,
static inline match_state_t *
FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
size_t length)
{
size_t i;
@ -1994,8 +2018,8 @@ FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars,
* 9. Let y be the State (f, cap).
* 10. Call c(y) and return its result.
*/
static REMatchState *
BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
static match_state_t *
BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
{
size_t len, i;
const WCHAR *parenContent;
@ -2300,11 +2324,11 @@ ReallocStateStack(REGlobalData *gData)
* true, then update the current state's cp. Always update startpc to the next
* op.
*/
static inline REMatchState *
SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
static inline match_state_t *
SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op,
jsbytecode **startpc, BOOL updatecp)
{
REMatchState *result = NULL;
match_state_t *result = NULL;
WCHAR matchCh;
size_t parenIndex;
size_t offset, length, index;
@ -2508,10 +2532,10 @@ SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
return NULL;
}
static inline REMatchState *
ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
static inline match_state_t *
ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
{
REMatchState *result = NULL;
match_state_t *result = NULL;
REBackTrackData *backTrackData;
jsbytecode *nextpc, *testpc;
REOp nextop;
@ -2919,7 +2943,7 @@ ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
#define PREPARE_REPEAT() \
do { \
curState->index = x->cp - gData->cpbegin; \
curState->index = x->cp - gData->cpbegin; \
curState->continue_op = REOP_MINIMALREPEAT; \
curState->continue_pc = pc; \
pc += ARG_LEN; \
@ -3046,9 +3070,9 @@ good:
return x;
}
static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
static match_state_t *MatchRegExp(REGlobalData *gData, match_state_t *x)
{
REMatchState *result;
match_state_t *result;
const WCHAR *cp = x->cp;
const WCHAR *cp2;
UINT j;
@ -3073,11 +3097,8 @@ static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
return NULL;
}
#define MIN_BACKTRACK_LIMIT 400000
static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
{
REMatchState *result;
UINT i;
gData->backTrackStackSize = INITIAL_BACKTRACK;
@ -3097,65 +3118,67 @@ static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *
gData->stateStackTop = 0;
gData->cx = cx;
gData->pool = pool;
gData->regexp = re;
gData->ok = TRUE;
result = heap_pool_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
if (!result)
goto bad;
for (i = 0; i < re->classCount; i++) {
if (!re->classList[i].converted &&
!ProcessCharSet(gData, &re->classList[i])) {
return NULL;
!ProcessCharSet(gData, &re->classList[i])) {
return E_FAIL;
}
}
return result;
return S_OK;
bad:
js_ReportOutOfScriptQuota(cx);
gData->ok = FALSE;
return NULL;
return E_OUTOFMEMORY;
}
HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, DWORD str_len,
const WCHAR **cp, heap_pool_t *pool, REMatchState **result, DWORD *matchlen)
HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool,
const WCHAR *str, DWORD str_len, match_state_t *result)
{
REMatchState *x, *res;
match_state_t *res;
REGlobalData gData;
heap_pool_t *mark = heap_pool_mark(pool);
const WCHAR *str_beg = result->cp;
HRESULT hres;
assert(result->cp != NULL);
gData.cpbegin = str;
gData.cpend = str+str_len;
gData.start = *cp-str;
gData.start = result->cp-str;
gData.skipped = 0;
gData.pool = pool;
x = InitMatch(NULL, &gData, jsregexp, gData.cpend - gData.cpbegin);
if(!x) {
hres = InitMatch(regexp, cx, pool, &gData);
if(FAILED(hres)) {
WARN("InitMatch failed\n");
return E_FAIL;
heap_pool_clear(mark);
return hres;
}
x->cp = *cp;
res = MatchRegExp(&gData, x);
res = MatchRegExp(&gData, result);
heap_pool_clear(mark);
if(!gData.ok) {
WARN("MatchRegExp failed\n");
return E_FAIL;
}
*result = res;
if(!res) {
*matchlen = 0;
result->match_len = 0;
return S_FALSE;
}
*matchlen = (res->cp-*cp) - gData.skipped;
*cp = res->cp;
result->match_len = (result->cp-str_beg) - gData.skipped;
result->paren_count = regexp->parenCount;
return S_OK;
}
void js_DestroyRegExp(JSRegExp *re)
void regexp_destroy(regexp_t *re)
{
if (re->classList) {
UINT i;
@ -3169,9 +3192,10 @@ void js_DestroyRegExp(JSRegExp *re)
heap_free(re);
}
JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, UINT flags, BOOL flat)
regexp_t* regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str,
DWORD str_len, WORD flags, BOOL flat)
{
JSRegExp *re;
regexp_t *re;
heap_pool_t *mark;
CompilerState state;
size_t resize;
@ -3213,7 +3237,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
if (!ParseRegExp(&state))
goto out;
}
resize = offsetof(JSRegExp, program) + state.progLength + 1;
resize = offsetof(regexp_t, program) + state.progLength + 1;
re = heap_alloc(resize);
if (!re)
goto out;
@ -3223,7 +3247,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
if (re->classCount) {
re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
if (!re->classList) {
js_DestroyRegExp(re);
regexp_destroy(re);
re = NULL;
goto out;
}
@ -3234,7 +3258,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
}
endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
if (!endPC) {
js_DestroyRegExp(re);
regexp_destroy(re);
re = NULL;
goto out;
}
@ -3245,9 +3269,9 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
* besides re exist here.
*/
if ((size_t)(endPC - re->program) != state.progLength + 1) {
JSRegExp *tmp;
regexp_t *tmp;
assert((size_t)(endPC - re->program) < state.progLength + 1);
resize = offsetof(JSRegExp, program) + (endPC - re->program);
resize = offsetof(regexp_t, program) + (endPC - re->program);
tmp = heap_realloc(re, resize);
if (tmp)
re = tmp;

View file

@ -41,50 +41,41 @@ typedef struct RECapture {
size_t length; /* length of capture */
} RECapture;
typedef struct REMatchState {
typedef struct match_state_t {
const WCHAR *cp;
RECapture parens[1]; /* first of 're->parenCount' captures,
allocated at end of this struct */
} REMatchState;
DWORD match_len;
DWORD paren_count;
RECapture parens[1];
} match_state_t;
typedef BYTE JSPackedBool;
typedef BYTE jsbytecode;
/*
* This struct holds a bitmap representation of a class from a regexp.
* There's a list of these referenced by the classList field in the JSRegExp
* struct below. The initial state has startIndex set to the offset in the
* original regexp source of the beginning of the class contents. The first
* use of the class converts the source representation into a bitmap.
*
*/
typedef struct RECharSet {
JSPackedBool converted;
JSPackedBool sense;
WORD length;
union {
BYTE *bits;
struct {
size_t startIndex;
size_t length;
} src;
} u;
} RECharSet;
typedef struct regexp_t {
WORD flags; /* flags, see jsapi.h's REG_* defines */
size_t parenCount; /* number of parenthesized submatches */
size_t classCount; /* count [...] bitmaps */
struct RECharSet *classList; /* list of [...] bitmaps */
const WCHAR *source; /* locked source string, sans // */
DWORD source_len;
jsbytecode program[1]; /* regular expression bytecode */
} regexp_t;
typedef struct JSRegExp {
WORD flags; /* flags, see jsapi.h's REG_* defines */
size_t parenCount; /* number of parenthesized submatches */
size_t classCount; /* count [...] bitmaps */
RECharSet *classList; /* list of [...] bitmaps */
const WCHAR *source; /* locked source string, sans // */
DWORD source_len;
jsbytecode program[1]; /* regular expression bytecode */
} JSRegExp;
regexp_t* regexp_new(void*, heap_pool_t*, const WCHAR*, DWORD, WORD, BOOL) DECLSPEC_HIDDEN;
void regexp_destroy(regexp_t*) DECLSPEC_HIDDEN;
HRESULT regexp_execute(regexp_t*, void*, heap_pool_t*, const WCHAR*,
DWORD, match_state_t*) DECLSPEC_HIDDEN;
JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str,
DWORD str_len, UINT flags, BOOL flat) DECLSPEC_HIDDEN;
void js_DestroyRegExp(JSRegExp *re) DECLSPEC_HIDDEN;
HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str,
DWORD str_len, const WCHAR **cp, heap_pool_t *pool,
REMatchState **result, DWORD *matchlen) DECLSPEC_HIDDEN;
static inline match_state_t* alloc_match_state(regexp_t *regexp,
heap_pool_t *pool, const WCHAR *pos)
{
size_t size = offsetof(match_state_t, parens) + regexp->parenCount*sizeof(RECapture);
match_state_t *ret;
ret = pool ? heap_pool_alloc(pool, size) : heap_alloc(size);
if(!ret)
return NULL;
ret->cp = pos;
return ret;
}