slre: drop wrong "anchored" optimization
The regex '^a|b' means "does the string start with a, or does it have
a b anywhere", not "does the string start with a or b" (the latter
should be spelled '^[ab]' or '^(a|b)'). It should match exactly the
same strings as 'b|^a'. But the current implementation hard-codes an
assumption that when the regex starts with a ^, the whole regex must
match from the beginning, i.e. it only attempts at offset 0.
It really should be completely symmetrical to 'b|c$' ("does it have a
b anywhere or end with c?"), which is treated correctly.
Another quirk is that currently the regex 'x*$', which should match
all strings (because it just means "does the string end
with 0 or more x'es"), does not, because in the unanchored case we
never attempt to match at ofs==len. In the anchored case, '^x*$', this
works correctly and matches exactly strings (including the empty
string) consisting entirely of x'es.
Fix both of these issues by dropping all use of the slre->anchored
member and always test at all possible offsets. If the regex does have
a ^ somewhere (including after a | branch character), that is
correctly handled by the match engine by only matching when *ofs is 0.
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Rasmus Villemoes <ravi@prevas.dk>
This commit is contained in:
committed by
Tom Rini
parent
ced883d92c
commit
19b3e24083
@@ -63,7 +63,6 @@ struct slre {
|
||||
int code_size;
|
||||
int data_size;
|
||||
int num_caps; /* Number of bracket pairs */
|
||||
int anchored; /* Must match from string start */
|
||||
const char *err_str; /* Error string */
|
||||
};
|
||||
|
||||
|
||||
11
lib/slre.c
11
lib/slre.c
@@ -413,10 +413,7 @@ int
|
||||
slre_compile(struct slre *r, const char *re)
|
||||
{
|
||||
r->err_str = NULL;
|
||||
r->code_size = r->data_size = r->num_caps = r->anchored = 0;
|
||||
|
||||
if (*re == '^')
|
||||
r->anchored++;
|
||||
r->code_size = r->data_size = r->num_caps = 0;
|
||||
|
||||
emit(r, OPEN); /* This will capture what matches full RE */
|
||||
emit(r, 0);
|
||||
@@ -650,14 +647,10 @@ slre_match(const struct slre *r, const char *buf, int len,
|
||||
{
|
||||
int i, ofs = 0, res = 0;
|
||||
|
||||
if (r->anchored) {
|
||||
res = match(r, 0, buf, len, &ofs, caps);
|
||||
} else {
|
||||
for (i = 0; i < len && res == 0; i++) {
|
||||
for (i = 0; i <= len && res == 0; i++) {
|
||||
ofs = i;
|
||||
res = match(r, 0, buf, len, &ofs, caps);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user