mirror of
https://github.com/python/cpython
synced 2024-09-15 22:58:09 +00:00
Initial revision
This commit is contained in:
parent
564f5507c3
commit
6c6b78d6bd
41
Lib/regex_syntax.py
Normal file
41
Lib/regex_syntax.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
# These bits are passed to regex.set_syntax() to choose among
|
||||
# alternative regexp syntaxes.
|
||||
|
||||
# 1 means plain parentheses serve as grouping, and backslash
|
||||
# parentheses are needed for literal searching.
|
||||
# 0 means backslash-parentheses are grouping, and plain parentheses
|
||||
# are for literal searching.
|
||||
RE_NO_BK_PARENS = 1
|
||||
|
||||
# 1 means plain | serves as the "or"-operator, and \| is a literal.
|
||||
# 0 means \| serves as the "or"-operator, and | is a literal.
|
||||
RE_NO_BK_VBAR = 2
|
||||
|
||||
# 0 means plain + or ? serves as an operator, and \+, \? are literals.
|
||||
# 1 means \+, \? are operators and plain +, ? are literals.
|
||||
RE_BK_PLUS_QM = 4
|
||||
|
||||
# 1 means | binds tighter than ^ or $.
|
||||
# 0 means the contrary.
|
||||
RE_TIGHT_VBAR = 8
|
||||
|
||||
# 1 means treat \n as an _OR operator
|
||||
# 0 means treat it as a normal character
|
||||
RE_NEWLINE_OR = 16
|
||||
|
||||
# 0 means that a special characters (such as *, ^, and $) always have
|
||||
# their special meaning regardless of the surrounding context.
|
||||
# 1 means that special characters may act as normal characters in some
|
||||
# contexts. Specifically, this applies to:
|
||||
# ^ - only special at the beginning, or after ( or |
|
||||
# $ - only special at the end, or before ) or |
|
||||
# *, +, ? - only special when not after the beginning, (, or |
|
||||
RE_CONTEXT_INDEP_OPS = 32
|
||||
|
||||
# Now define combinations of bits for the standard possibilities.
|
||||
RE_SYNTAX_AWK = (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
|
||||
RE_SYNTAX_EGREP = (RE_SYNTAX_AWK | RE_NEWLINE_OR)
|
||||
RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_NEWLINE_OR)
|
||||
RE_SYNTAX_EMACS = 0
|
||||
|
||||
# (Python's obsolete "regexp" module used a syntax similar to awk.)
|
37
Lib/regexp.py
Normal file
37
Lib/regexp.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Provide backward compatibility for module "regexp" using "regex".
|
||||
|
||||
import regex
|
||||
from regex_syntax import *
|
||||
|
||||
class Prog:
|
||||
def init(self, pat):
|
||||
save_syntax = regex.set_syntax(RE_SYNTAX_AWK)
|
||||
try:
|
||||
self.prog = regex.compile(pat)
|
||||
finally:
|
||||
xxx = regex.set_syntax(save_syntax)
|
||||
return self
|
||||
def match(self, args):
|
||||
if type(args) = type(()):
|
||||
str, offset = args
|
||||
else:
|
||||
str, offset = args, 0
|
||||
if self.prog.search(str, offset) < 0:
|
||||
return ()
|
||||
regs = self.prog.regs()
|
||||
i = len(regs)
|
||||
while i > 0 and regs[i-1] = (-1, -1):
|
||||
i = i-1
|
||||
return regs[:i]
|
||||
|
||||
def compile(pat):
|
||||
return Prog().init(pat)
|
||||
|
||||
cache_pat = None
|
||||
cache_prog = None
|
||||
|
||||
def match(pat, str):
|
||||
global cache_pat, cache_prog
|
||||
if pat <> cache_pat:
|
||||
cache_pat, cache_prog = pat, compile(pat)
|
||||
return cache_prog.match(str)
|
Loading…
Reference in a new issue