claudio@58: # YPL parser 1.6 claudio@58: # adapted for Python 3.x claudio@52: claudio@52: # written by VB. claudio@52: claudio@52: import re claudio@52: import sys, codecs claudio@58: claudio@58: class keyword(str): pass claudio@58: class code(str): pass claudio@52: class ignore(object): claudio@52: def __init__(self, regex_text, *args): claudio@52: self.regex = re.compile(regex_text, *args) claudio@52: claudio@52: class _and(object): claudio@52: def __init__(self, something): claudio@52: self.obj = something claudio@52: claudio@52: class _not(_and): pass claudio@52: claudio@58: class Name(str): claudio@52: def __init__(self, *args): claudio@52: self.line = 0 claudio@58: self.file = "" claudio@52: claudio@52: class Symbol(list): claudio@52: def __init__(self, name, what): claudio@52: self.__name__ = name claudio@52: self.append(name) claudio@52: self.what = what claudio@52: self.append(what) claudio@52: def __call__(self): claudio@52: return self.what claudio@58: def __str__(self): claudio@58: return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')' claudio@52: def __repr__(self): claudio@58: return str(self) claudio@58: claudio@58: word_regex = re.compile(r"\w+") claudio@58: rest_regex = re.compile(r".*") claudio@52: claudio@52: print_trace = False claudio@52: claudio@52: def u(text): claudio@58: if isinstance(text, BaseException): claudio@52: text = text.args[0] claudio@58: if type(text) is str: claudio@52: return text claudio@58: if isinstance(text, bytes): claudio@52: if sys.stdin.encoding: claudio@52: return codecs.decode(text, sys.stdin.encoding) claudio@52: else: claudio@52: return codecs.decode(text, "utf-8") claudio@58: return str(text) claudio@52: claudio@52: def skip(skipper, text, skipWS, skipComments): claudio@52: if skipWS: claudio@52: t = text.lstrip() claudio@52: else: claudio@52: t = text claudio@52: if skipComments: claudio@52: try: claudio@52: while True: claudio@52: skip, t = skipper.parseLine(t, skipComments, [], skipWS, None) claudio@52: if skipWS: claudio@52: t = t.lstrip() claudio@52: except: pass claudio@52: return t claudio@52: claudio@52: class parser(object): claudio@52: def __init__(self, another = False, p = False): claudio@52: self.restlen = -1 claudio@52: if not(another): claudio@52: self.skipper = parser(True, p) claudio@52: self.skipper.packrat = p claudio@52: else: claudio@52: self.skipper = self claudio@52: self.lines = None claudio@52: self.textlen = 0 claudio@52: self.memory = {} claudio@52: self.packrat = p claudio@52: claudio@52: # parseLine(): claudio@52: # textline: text to parse claudio@52: # pattern: pyPEG language description claudio@52: # resultSoFar: parsing result so far (default: blank list []) claudio@52: # skipWS: Flag if whitespace should be skipped (default: True) claudio@52: # skipComments: Python functions returning pyPEG for matching comments claudio@52: # claudio@52: # returns: pyAST, textrest claudio@52: # claudio@52: # raises: SyntaxError(reason) if textline is detected not being in language claudio@52: # described by pattern claudio@52: # claudio@52: # SyntaxError(reason) if pattern is an illegal language description claudio@52: claudio@52: def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None): claudio@52: name = None claudio@52: _textline = textline claudio@52: _pattern = pattern claudio@52: claudio@52: def R(result, text): claudio@52: if __debug__: claudio@52: if print_trace: claudio@52: try: claudio@52: if _pattern.__name__ != "comment": claudio@58: sys.stderr.write("match: " + _pattern.__name__ + "\n") claudio@52: except: pass claudio@52: claudio@52: if self.restlen == -1: claudio@52: self.restlen = len(text) claudio@52: else: claudio@52: self.restlen = min(self.restlen, len(text)) claudio@52: res = resultSoFar claudio@52: if name and result: claudio@52: name.line = self.lineNo() claudio@52: res.append(Symbol(name, result)) claudio@52: elif name: claudio@52: name.line = self.lineNo() claudio@52: res.append(Symbol(name, [])) claudio@52: elif result: claudio@52: if type(result) is type([]): claudio@52: res.extend(result) claudio@52: else: claudio@52: res.extend([result]) claudio@52: if self.packrat: claudio@52: self.memory[(len(_textline), id(_pattern))] = (res, text) claudio@52: return res, text claudio@52: claudio@52: def syntaxError(): claudio@52: if self.packrat: claudio@52: self.memory[(len(_textline), id(_pattern))] = False claudio@52: raise SyntaxError() claudio@52: claudio@52: if self.packrat: claudio@52: try: claudio@52: result = self.memory[(len(textline), id(pattern))] claudio@52: if result: claudio@52: return result claudio@52: else: claudio@52: raise SyntaxError() claudio@52: except: pass claudio@52: claudio@52: if callable(pattern): claudio@52: if __debug__: claudio@52: if print_trace: claudio@52: try: claudio@52: if pattern.__name__ != "comment": claudio@58: sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n") claudio@52: except: pass claudio@52: claudio@52: if pattern.__name__[0] != "_": claudio@52: name = Name(pattern.__name__) claudio@52: claudio@52: pattern = pattern() claudio@52: if callable(pattern): claudio@52: pattern = (pattern,) claudio@52: claudio@52: text = skip(self.skipper, textline, skipWS, skipComments) claudio@52: claudio@52: pattern_type = type(pattern) claudio@52: claudio@58: if pattern_type is str: claudio@52: if text[:len(pattern)] == pattern: claudio@52: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) claudio@52: return R(None, text) claudio@52: else: claudio@52: syntaxError() claudio@52: claudio@52: elif pattern_type is keyword: claudio@52: m = word_regex.match(text) claudio@52: if m: claudio@52: if m.group(0) == pattern: claudio@52: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) claudio@52: return R(None, text) claudio@52: else: claudio@52: syntaxError() claudio@52: else: claudio@52: syntaxError() claudio@52: claudio@52: elif pattern_type is _not: claudio@52: try: claudio@52: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) claudio@52: except: claudio@52: return resultSoFar, textline claudio@52: syntaxError() claudio@52: claudio@52: elif pattern_type is _and: claudio@52: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) claudio@52: return resultSoFar, textline claudio@52: claudio@52: elif pattern_type is type(word_regex) or pattern_type is ignore: claudio@52: if pattern_type is ignore: claudio@52: pattern = pattern.regex claudio@52: m = pattern.match(text) claudio@52: if m: claudio@52: text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments) claudio@52: if pattern_type is ignore: claudio@52: return R(None, text) claudio@52: else: claudio@52: return R(m.group(0), text) claudio@52: else: claudio@52: syntaxError() claudio@52: claudio@52: elif pattern_type is tuple: claudio@52: result = [] claudio@52: n = 1 claudio@52: for p in pattern: claudio@52: if type(p) is type(0): claudio@52: n = p claudio@52: else: claudio@52: if n>0: claudio@52: for i in range(n): claudio@52: result, text = self.parseLine(text, p, result, skipWS, skipComments) claudio@52: elif n==0: claudio@52: if text == "": claudio@52: pass claudio@52: else: claudio@52: try: claudio@52: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) claudio@52: result, text = newResult, newText claudio@52: except SyntaxError: claudio@52: pass claudio@52: elif n<0: claudio@52: found = False claudio@52: while True: claudio@52: try: claudio@52: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) claudio@52: result, text, found = newResult, newText, True claudio@52: except SyntaxError: claudio@52: break claudio@52: if n == -2 and not(found): claudio@52: syntaxError() claudio@52: n = 1 claudio@52: return R(result, text) claudio@52: claudio@52: elif pattern_type is list: claudio@52: result = [] claudio@52: found = False claudio@52: for p in pattern: claudio@52: try: claudio@52: result, text = self.parseLine(text, p, result, skipWS, skipComments) claudio@52: found = True claudio@52: except SyntaxError: claudio@52: pass claudio@52: if found: claudio@52: break claudio@52: if found: claudio@52: return R(result, text) claudio@52: else: claudio@52: syntaxError() claudio@52: claudio@52: else: claudio@58: raise SyntaxError("illegal type in grammar: " + u(pattern_type)) claudio@52: claudio@52: def lineNo(self): claudio@58: if not(self.lines): return "" claudio@58: if self.restlen == -1: return "" claudio@52: parsed = self.textlen - self.restlen claudio@52: claudio@52: left, right = 0, len(self.lines) claudio@52: claudio@52: while True: claudio@52: mid = int((right + left) / 2) claudio@52: if self.lines[mid][0] <= parsed: claudio@52: try: claudio@52: if self.lines[mid + 1][0] >= parsed: claudio@52: try: claudio@58: return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2]) claudio@52: except: claudio@58: return "" claudio@52: else: claudio@52: left = mid + 1 claudio@52: except: claudio@52: try: claudio@58: return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2]) claudio@52: except: claudio@58: return "" claudio@52: else: claudio@52: right = mid - 1 claudio@52: if left > right: claudio@58: return "" claudio@52: claudio@52: # plain module API claudio@52: claudio@52: def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): claudio@52: p = parser(p=packrat) claudio@52: text = skip(p.skipper, textline, skipWS, skipComments) claudio@52: ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments) claudio@52: return ast, text claudio@52: claudio@52: # parse(): claudio@52: # language: pyPEG language description claudio@52: # lineSource: a fileinput.FileInput object claudio@52: # skipWS: Flag if whitespace should be skipped (default: True) claudio@52: # skipComments: Python function which returns pyPEG for matching comments claudio@52: # packrat: use memoization claudio@52: # lineCount: add line number information to AST claudio@52: # claudio@52: # returns: pyAST claudio@52: # claudio@52: # raises: SyntaxError(reason), if a parsed line is not in language claudio@52: # SyntaxError(reason), if the language description is illegal claudio@52: claudio@52: def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True): claudio@52: lines, lineNo = [], 0 claudio@52: claudio@52: while callable(language): claudio@52: language = language() claudio@52: claudio@58: orig, ld = "", 0 claudio@52: for line in lineSource: claudio@52: if lineSource.isfirstline(): claudio@52: ld = 1 claudio@52: else: claudio@52: ld += 1 claudio@52: lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1)) claudio@52: orig += u(line) claudio@52: claudio@52: textlen = len(orig) claudio@52: claudio@52: try: claudio@52: p = parser(p=packrat) claudio@52: p.textlen = len(orig) claudio@52: if lineCount: claudio@52: p.lines = lines claudio@52: else: claudio@52: p.line = None claudio@52: text = skip(p.skipper, orig, skipWS, skipComments) claudio@52: result, text = p.parseLine(text, language, [], skipWS, skipComments) claudio@52: if text: claudio@52: raise SyntaxError() claudio@52: claudio@58: except SyntaxError as msg: claudio@52: parsed = textlen - p.restlen claudio@52: textlen = 0 claudio@58: nn, lineNo, file = 0, 0, "" claudio@52: for n, ld, l in lines: claudio@52: if n >= parsed: claudio@52: break claudio@52: else: claudio@52: lineNo = l claudio@52: nn += 1 claudio@52: file = ld claudio@52: claudio@52: lineNo += 1 claudio@52: nn -= 1 claudio@52: lineCont = orig.splitlines()[nn] claudio@58: raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont) claudio@52: claudio@52: return result