diff -r 000000000000 -r 76005e62091d pyPEG.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyPEG.py Mon Jul 11 23:15:28 2016 +0200 @@ -0,0 +1,351 @@ +# YPL parser 1.5 + +# written by VB. + +import re +import sys, codecs +import exceptions + +class keyword(unicode): pass +class code(unicode): pass +class ignore(object): + def __init__(self, regex_text, *args): + self.regex = re.compile(regex_text, *args) + +class _and(object): + def __init__(self, something): + self.obj = something + +class _not(_and): pass + +class Name(unicode): + def __init__(self, *args): + self.line = 0 + self.file = u"" + +class Symbol(list): + def __init__(self, name, what): + self.__name__ = name + self.append(name) + self.what = what + self.append(what) + def __call__(self): + return self.what + def __unicode__(self): + return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')' + def __repr__(self): + return unicode(self) + +word_regex = re.compile(ur"\w+") +rest_regex = re.compile(ur".*") + +print_trace = False + +def u(text): + if isinstance(text, exceptions.BaseException): + text = text.args[0] + if type(text) is unicode: + return text + if isinstance(text, str): + if sys.stdin.encoding: + return codecs.decode(text, sys.stdin.encoding) + else: + return codecs.decode(text, "utf-8") + return unicode(text) + +def skip(skipper, text, skipWS, skipComments): + if skipWS: + t = text.lstrip() + else: + t = text + if skipComments: + try: + while True: + skip, t = skipper.parseLine(t, skipComments, [], skipWS, None) + if skipWS: + t = t.lstrip() + except: pass + return t + +class parser(object): + def __init__(self, another = False, p = False): + self.restlen = -1 + if not(another): + self.skipper = parser(True, p) + self.skipper.packrat = p + else: + self.skipper = self + self.lines = None + self.textlen = 0 + self.memory = {} + self.packrat = p + + # parseLine(): + # textline: text to parse + # pattern: pyPEG language description + # resultSoFar: parsing result so far (default: blank list []) + # skipWS: Flag if whitespace should be skipped (default: True) + # skipComments: Python functions returning pyPEG for matching comments + # + # returns: pyAST, textrest + # + # raises: SyntaxError(reason) if textline is detected not being in language + # described by pattern + # + # SyntaxError(reason) if pattern is an illegal language description + + def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None): + name = None + _textline = textline + _pattern = pattern + + def R(result, text): + if __debug__: + if print_trace: + try: + if _pattern.__name__ != "comment": + sys.stderr.write(u"match: " + _pattern.__name__ + u"\n") + except: pass + + if self.restlen == -1: + self.restlen = len(text) + else: + self.restlen = min(self.restlen, len(text)) + res = resultSoFar + if name and result: + name.line = self.lineNo() + res.append(Symbol(name, result)) + elif name: + name.line = self.lineNo() + res.append(Symbol(name, [])) + elif result: + if type(result) is type([]): + res.extend(result) + else: + res.extend([result]) + if self.packrat: + self.memory[(len(_textline), id(_pattern))] = (res, text) + return res, text + + def syntaxError(): + if self.packrat: + self.memory[(len(_textline), id(_pattern))] = False + raise SyntaxError() + + if self.packrat: + try: + result = self.memory[(len(textline), id(pattern))] + if result: + return result + else: + raise SyntaxError() + except: pass + + if callable(pattern): + if __debug__: + if print_trace: + try: + if pattern.__name__ != "comment": + sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n") + except: pass + + if pattern.__name__[0] != "_": + name = Name(pattern.__name__) + + pattern = pattern() + if callable(pattern): + pattern = (pattern,) + + text = skip(self.skipper, textline, skipWS, skipComments) + + pattern_type = type(pattern) + + if pattern_type is str or pattern_type is unicode: + if text[:len(pattern)] == pattern: + text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) + return R(None, text) + else: + syntaxError() + + elif pattern_type is keyword: + m = word_regex.match(text) + if m: + if m.group(0) == pattern: + text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) + return R(None, text) + else: + syntaxError() + else: + syntaxError() + + elif pattern_type is _not: + try: + r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) + except: + return resultSoFar, textline + syntaxError() + + elif pattern_type is _and: + r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) + return resultSoFar, textline + + elif pattern_type is type(word_regex) or pattern_type is ignore: + if pattern_type is ignore: + pattern = pattern.regex + m = pattern.match(text) + if m: + text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments) + if pattern_type is ignore: + return R(None, text) + else: + return R(m.group(0), text) + else: + syntaxError() + + elif pattern_type is tuple: + result = [] + n = 1 + for p in pattern: + if type(p) is type(0): + n = p + else: + if n>0: + for i in range(n): + result, text = self.parseLine(text, p, result, skipWS, skipComments) + elif n==0: + if text == "": + pass + else: + try: + newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) + result, text = newResult, newText + except SyntaxError: + pass + elif n<0: + found = False + while True: + try: + newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) + result, text, found = newResult, newText, True + except SyntaxError: + break + if n == -2 and not(found): + syntaxError() + n = 1 + return R(result, text) + + elif pattern_type is list: + result = [] + found = False + for p in pattern: + try: + result, text = self.parseLine(text, p, result, skipWS, skipComments) + found = True + except SyntaxError: + pass + if found: + break + if found: + return R(result, text) + else: + syntaxError() + + else: + raise SyntaxError(u"illegal type in grammar: " + u(pattern_type)) + + def lineNo(self): + if not(self.lines): return u"" + if self.restlen == -1: return u"" + parsed = self.textlen - self.restlen + + left, right = 0, len(self.lines) + + while True: + mid = int((right + left) / 2) + if self.lines[mid][0] <= parsed: + try: + if self.lines[mid + 1][0] >= parsed: + try: + return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) + except: + return u"" + else: + left = mid + 1 + except: + try: + return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) + except: + return u"" + else: + right = mid - 1 + if left > right: + return u"" + +# plain module API + +def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): + p = parser(p=packrat) + text = skip(p.skipper, textline, skipWS, skipComments) + ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments) + return ast, text + +# parse(): +# language: pyPEG language description +# lineSource: a fileinput.FileInput object +# skipWS: Flag if whitespace should be skipped (default: True) +# skipComments: Python function which returns pyPEG for matching comments +# packrat: use memoization +# lineCount: add line number information to AST +# +# returns: pyAST +# +# raises: SyntaxError(reason), if a parsed line is not in language +# SyntaxError(reason), if the language description is illegal + +def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True): + lines, lineNo = [], 0 + + while callable(language): + language = language() + + orig, ld = u"", 0 + for line in lineSource: + if lineSource.isfirstline(): + ld = 1 + else: + ld += 1 + lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1)) + orig += u(line) + + textlen = len(orig) + + try: + p = parser(p=packrat) + p.textlen = len(orig) + if lineCount: + p.lines = lines + else: + p.line = None + text = skip(p.skipper, orig, skipWS, skipComments) + result, text = p.parseLine(text, language, [], skipWS, skipComments) + if text: + raise SyntaxError() + + except SyntaxError, msg: + parsed = textlen - p.restlen + textlen = 0 + nn, lineNo, file = 0, 0, u"" + for n, ld, l in lines: + if n >= parsed: + break + else: + lineNo = l + nn += 1 + file = ld + + lineNo += 1 + nn -= 1 + lineCont = orig.splitlines()[nn] + raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont) + + return result