h@41: # YPL parser 1.6 h@41: # adapted for Python 3.x h@41: h@41: # written by VB. h@41: h@41: import re h@41: import sys, codecs h@41: h@41: class keyword(str): pass h@41: class code(str): pass h@41: class ignore(object): h@41: def __init__(self, regex_text, *args): h@41: self.regex = re.compile(regex_text, *args) h@41: h@41: class _and(object): h@41: def __init__(self, something): h@41: self.obj = something h@41: h@41: class _not(_and): pass h@41: h@41: class Name(str): h@41: def __init__(self, *args): h@41: self.line = 0 h@41: self.file = "" h@41: h@41: class Symbol(list): h@41: def __init__(self, name, what): h@41: self.__name__ = name h@41: self.append(name) h@41: self.what = what h@41: self.append(what) h@41: def __call__(self): h@41: return self.what h@41: def __str__(self): h@41: return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')' h@41: def __repr__(self): h@41: return str(self) h@41: h@41: word_regex = re.compile(r"\w+") h@41: rest_regex = re.compile(r".*") h@41: h@41: print_trace = False h@41: h@41: def u(text): h@41: if isinstance(text, BaseException): h@41: text = text.args[0] h@41: if type(text) is str: h@41: return text h@41: if isinstance(text, bytes): h@41: if sys.stdin.encoding: h@41: return codecs.decode(text, sys.stdin.encoding) h@41: else: h@41: return codecs.decode(text, "utf-8") h@41: return str(text) h@41: h@41: def skip(skipper, text, skipWS, skipComments): h@41: if skipWS: h@41: t = text.lstrip() h@41: else: h@41: t = text h@41: if skipComments: h@41: try: h@41: while True: h@41: skip, t = skipper.parseLine(t, skipComments, [], skipWS, None) h@41: if skipWS: h@41: t = t.lstrip() h@41: except: pass h@41: return t h@41: h@41: class parser(object): h@41: def __init__(self, another = False, p = False): h@41: self.restlen = -1 h@41: if not(another): h@41: self.skipper = parser(True, p) h@41: self.skipper.packrat = p h@41: else: h@41: self.skipper = self h@41: self.lines = None h@41: self.textlen = 0 h@41: self.memory = {} h@41: self.packrat = p h@41: h@41: # parseLine(): h@41: # textline: text to parse h@41: # pattern: pyPEG language description h@41: # resultSoFar: parsing result so far (default: blank list []) h@41: # skipWS: Flag if whitespace should be skipped (default: True) h@41: # skipComments: Python functions returning pyPEG for matching comments h@41: # h@41: # returns: pyAST, textrest h@41: # h@41: # raises: SyntaxError(reason) if textline is detected not being in language h@41: # described by pattern h@41: # h@41: # SyntaxError(reason) if pattern is an illegal language description h@41: h@41: def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None): h@41: name = None h@41: _textline = textline h@41: _pattern = pattern h@41: h@41: def R(result, text): h@41: if __debug__: h@41: if print_trace: h@41: try: h@41: if _pattern.__name__ != "comment": h@41: sys.stderr.write("match: " + _pattern.__name__ + "\n") h@41: except: pass h@41: h@41: if self.restlen == -1: h@41: self.restlen = len(text) h@41: else: h@41: self.restlen = min(self.restlen, len(text)) h@41: res = resultSoFar h@41: if name and result: h@41: name.line = self.lineNo() h@41: res.append(Symbol(name, result)) h@41: elif name: h@41: name.line = self.lineNo() h@41: res.append(Symbol(name, [])) h@41: elif result: h@41: if type(result) is type([]): h@41: res.extend(result) h@41: else: h@41: res.extend([result]) h@41: if self.packrat: h@41: self.memory[(len(_textline), id(_pattern))] = (res, text) h@41: return res, text h@41: h@41: def syntaxError(): h@41: if self.packrat: h@41: self.memory[(len(_textline), id(_pattern))] = False h@41: raise SyntaxError() h@41: h@41: if self.packrat: h@41: try: h@41: result = self.memory[(len(textline), id(pattern))] h@41: if result: h@41: return result h@41: else: h@41: raise SyntaxError() h@41: except: pass h@41: h@41: if callable(pattern): h@41: if __debug__: h@41: if print_trace: h@41: try: h@41: if pattern.__name__ != "comment": h@41: sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n") h@41: except: pass h@41: h@41: if pattern.__name__[0] != "_": h@41: name = Name(pattern.__name__) h@41: h@41: pattern = pattern() h@41: if callable(pattern): h@41: pattern = (pattern,) h@41: h@41: text = skip(self.skipper, textline, skipWS, skipComments) h@41: h@41: pattern_type = type(pattern) h@41: h@41: if pattern_type is str: h@41: if text[:len(pattern)] == pattern: h@41: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) h@41: return R(None, text) h@41: else: h@41: syntaxError() h@41: h@41: elif pattern_type is keyword: h@41: m = word_regex.match(text) h@41: if m: h@41: if m.group(0) == pattern: h@41: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) h@41: return R(None, text) h@41: else: h@41: syntaxError() h@41: else: h@41: syntaxError() h@41: h@41: elif pattern_type is _not: h@41: try: h@41: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) h@41: except: h@41: return resultSoFar, textline h@41: syntaxError() h@41: h@41: elif pattern_type is _and: h@41: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) h@41: return resultSoFar, textline h@41: h@41: elif pattern_type is type(word_regex) or pattern_type is ignore: h@41: if pattern_type is ignore: h@41: pattern = pattern.regex h@41: m = pattern.match(text) h@41: if m: h@41: text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments) h@41: if pattern_type is ignore: h@41: return R(None, text) h@41: else: h@41: return R(m.group(0), text) h@41: else: h@41: syntaxError() h@41: h@41: elif pattern_type is tuple: h@41: result = [] h@41: n = 1 h@41: for p in pattern: h@41: if type(p) is type(0): h@41: n = p h@41: else: h@41: if n>0: h@41: for i in range(n): h@41: result, text = self.parseLine(text, p, result, skipWS, skipComments) h@41: elif n==0: h@41: if text == "": h@41: pass h@41: else: h@41: try: h@41: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) h@41: result, text = newResult, newText h@41: except SyntaxError: h@41: pass h@41: elif n<0: h@41: found = False h@41: while True: h@41: try: h@41: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) h@41: result, text, found = newResult, newText, True h@41: except SyntaxError: h@41: break h@41: if n == -2 and not(found): h@41: syntaxError() h@41: n = 1 h@41: return R(result, text) h@41: h@41: elif pattern_type is list: h@41: result = [] h@41: found = False h@41: for p in pattern: h@41: try: h@41: result, text = self.parseLine(text, p, result, skipWS, skipComments) h@41: found = True h@41: except SyntaxError: h@41: pass h@41: if found: h@41: break h@41: if found: h@41: return R(result, text) h@41: else: h@41: syntaxError() h@41: h@41: else: h@41: raise SyntaxError("illegal type in grammar: " + u(pattern_type)) h@41: h@41: def lineNo(self): h@41: if not(self.lines): return "" h@41: if self.restlen == -1: return "" h@41: parsed = self.textlen - self.restlen h@41: h@41: left, right = 0, len(self.lines) h@41: h@41: while True: h@41: mid = int((right + left) / 2) h@41: if self.lines[mid][0] <= parsed: h@41: try: h@41: if self.lines[mid + 1][0] >= parsed: h@41: try: h@41: return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2]) h@41: except: h@41: return "" h@41: else: h@41: left = mid + 1 h@41: except: h@41: try: h@41: return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2]) h@41: except: h@41: return "" h@41: else: h@41: right = mid - 1 h@41: if left > right: h@41: return "" h@41: h@41: # plain module API h@41: h@41: def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): h@41: p = parser(p=packrat) h@41: text = skip(p.skipper, textline, skipWS, skipComments) h@41: ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments) h@41: return ast, text h@41: h@41: # parse(): h@41: # language: pyPEG language description h@41: # lineSource: a fileinput.FileInput object h@41: # skipWS: Flag if whitespace should be skipped (default: True) h@41: # skipComments: Python function which returns pyPEG for matching comments h@41: # packrat: use memoization h@41: # lineCount: add line number information to AST h@41: # h@41: # returns: pyAST h@41: # h@41: # raises: SyntaxError(reason), if a parsed line is not in language h@41: # SyntaxError(reason), if the language description is illegal h@41: h@41: def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True): h@41: lines, lineNo = [], 0 h@41: h@41: while callable(language): h@41: language = language() h@41: h@41: orig, ld = "", 0 h@41: for line in lineSource: h@41: if lineSource.isfirstline(): h@41: ld = 1 h@41: else: h@41: ld += 1 h@41: lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1)) h@41: orig += u(line) h@41: h@41: textlen = len(orig) h@41: h@41: try: h@41: p = parser(p=packrat) h@41: p.textlen = len(orig) h@41: if lineCount: h@41: p.lines = lines h@41: else: h@41: p.line = None h@41: text = skip(p.skipper, orig, skipWS, skipComments) h@41: result, text = p.parseLine(text, language, [], skipWS, skipComments) h@41: if text: h@41: raise SyntaxError() h@41: h@41: except SyntaxError as msg: h@41: parsed = textlen - p.restlen h@41: textlen = 0 h@41: nn, lineNo, file = 0, 0, "" h@41: for n, ld, l in lines: h@41: if n >= parsed: h@41: break h@41: else: h@41: lineNo = l h@41: nn += 1 h@41: file = ld h@41: h@41: lineNo += 1 h@41: nn -= 1 h@41: lineCont = orig.splitlines()[nn] h@41: raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont) h@41: h@41: return result