pyPEG.py
changeset 52 b4a9a3122abb
parent 22 3a2bd70c01df
child 53 b94d4c5b9496
--- a/pyPEG.py	Wed Aug 29 23:57:58 2018 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,351 +0,0 @@
-# YPL parser 1.5
-
-# written by VB.
-
-import re
-import sys, codecs
-import exceptions
-
-class keyword(unicode): pass
-class code(unicode): pass
-class ignore(object):
-    def __init__(self, regex_text, *args):
-        self.regex = re.compile(regex_text, *args)
-
-class _and(object):
-    def __init__(self, something):
-        self.obj = something
-
-class _not(_and): pass
-
-class Name(unicode):
-    def __init__(self, *args):
-        self.line = 0
-        self.file = u""
-
-class Symbol(list):
-    def __init__(self, name, what):
-        self.__name__ = name
-        self.append(name)
-        self.what = what
-        self.append(what)
-    def __call__(self):
-        return self.what
-    def __unicode__(self):
-        return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')'
-    def __repr__(self):
-        return unicode(self)
-
-word_regex = re.compile(ur"\w+")
-rest_regex = re.compile(ur".*")
-
-print_trace = False
-
-def u(text):
-    if isinstance(text, exceptions.BaseException):
-        text = text.args[0]
-    if type(text) is unicode:
-        return text
-    if isinstance(text, str):
-        if sys.stdin.encoding:
-            return codecs.decode(text, sys.stdin.encoding)
-        else:
-            return codecs.decode(text, "utf-8")
-    return unicode(text)
-
-def skip(skipper, text, skipWS, skipComments):
-    if skipWS:
-        t = text.lstrip()
-    else:
-        t = text
-    if skipComments:
-        try:
-            while True:
-                skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
-                if skipWS:
-                    t = t.lstrip()
-        except: pass
-    return t
-
-class parser(object):
-    def __init__(self, another = False, p = False):
-        self.restlen = -1 
-        if not(another):
-            self.skipper = parser(True, p)
-            self.skipper.packrat = p
-        else:
-            self.skipper = self
-        self.lines = None
-        self.textlen = 0
-        self.memory = {}
-        self.packrat = p
-
-    # parseLine():
-    #   textline:       text to parse
-    #   pattern:        pyPEG language description
-    #   resultSoFar:    parsing result so far (default: blank list [])
-    #   skipWS:         Flag if whitespace should be skipped (default: True)
-    #   skipComments:   Python functions returning pyPEG for matching comments
-    #   
-    #   returns:        pyAST, textrest
-    #
-    #   raises:         SyntaxError(reason) if textline is detected not being in language
-    #                   described by pattern
-    #
-    #                   SyntaxError(reason) if pattern is an illegal language description
-
-    def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
-        name = None
-        _textline = textline
-        _pattern = pattern
-
-        def R(result, text):
-            if __debug__:
-                if print_trace:
-                    try:
-                        if _pattern.__name__ != "comment":
-                            sys.stderr.write(u"match: " + _pattern.__name__ + u"\n")
-                    except: pass
-
-            if self.restlen == -1:
-                self.restlen = len(text)
-            else:
-                self.restlen = min(self.restlen, len(text))
-            res = resultSoFar
-            if name and result:
-                name.line = self.lineNo()
-                res.append(Symbol(name, result))
-            elif name:
-                name.line = self.lineNo()
-                res.append(Symbol(name, []))
-            elif result:
-                if type(result) is type([]):
-                    res.extend(result)
-                else:
-                    res.extend([result])
-            if self.packrat:
-                self.memory[(len(_textline), id(_pattern))] = (res, text)
-            return res, text
-
-        def syntaxError():
-            if self.packrat:
-                self.memory[(len(_textline), id(_pattern))] = False
-            raise SyntaxError()
-
-        if self.packrat:
-            try:
-                result = self.memory[(len(textline), id(pattern))]
-                if result:
-                    return result
-                else:
-                    raise SyntaxError()
-            except: pass
-
-        if callable(pattern):
-            if __debug__:
-                if print_trace:
-                    try:
-                        if pattern.__name__ != "comment":
-                            sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n")
-                    except: pass
-
-            if pattern.__name__[0] != "_":
-                name = Name(pattern.__name__)
-
-            pattern = pattern()
-            if callable(pattern):
-                pattern = (pattern,)
-
-        text = skip(self.skipper, textline, skipWS, skipComments)
-
-        pattern_type = type(pattern)
-
-        if pattern_type is str or pattern_type is unicode:
-            if text[:len(pattern)] == pattern:
-                text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
-                return R(None, text)
-            else:
-                syntaxError()
-
-        elif pattern_type is keyword:
-            m = word_regex.match(text)
-            if m:
-                if m.group(0) == pattern:
-                    text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
-                    return R(None, text)
-                else:
-                    syntaxError()
-            else:
-                syntaxError()
-
-        elif pattern_type is _not:
-            try:
-                r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
-            except:
-                return resultSoFar, textline
-            syntaxError()
-
-        elif pattern_type is _and:
-            r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
-            return resultSoFar, textline
-
-        elif pattern_type is type(word_regex) or pattern_type is ignore:
-            if pattern_type is ignore:
-                pattern = pattern.regex
-            m = pattern.match(text)
-            if m:
-                text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
-                if pattern_type is ignore:
-                    return R(None, text)
-                else:
-                    return R(m.group(0), text)
-            else:
-                syntaxError()
-
-        elif pattern_type is tuple:
-            result = []
-            n = 1
-            for p in pattern:
-                if type(p) is type(0):
-                    n = p
-                else:
-                    if n>0:
-                        for i in range(n):
-                            result, text = self.parseLine(text, p, result, skipWS, skipComments)
-                    elif n==0:
-                        if text == "":
-                            pass
-                        else:
-                            try:
-                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
-                                result, text = newResult, newText
-                            except SyntaxError:
-                                pass
-                    elif n<0:
-                        found = False
-                        while True:
-                            try:
-                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
-                                result, text, found = newResult, newText, True
-                            except SyntaxError:
-                                break
-                        if n == -2 and not(found):
-                            syntaxError()
-                    n = 1
-            return R(result, text)
-
-        elif pattern_type is list:
-            result = []
-            found = False
-            for p in pattern:
-                try:
-                    result, text = self.parseLine(text, p, result, skipWS, skipComments)
-                    found = True
-                except SyntaxError:
-                    pass
-                if found:
-                    break
-            if found:
-                return R(result, text)
-            else:
-                syntaxError()
-
-        else:
-            raise SyntaxError(u"illegal type in grammar: " + u(pattern_type))
-
-    def lineNo(self):
-        if not(self.lines): return u""
-        if self.restlen == -1: return u""
-        parsed = self.textlen - self.restlen
-
-        left, right = 0, len(self.lines)
-
-        while True:
-            mid = int((right + left) / 2)
-            if self.lines[mid][0] <= parsed:
-                try:
-                    if self.lines[mid + 1][0] >= parsed:
-                        try:
-                            return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2])
-                        except:
-                            return u""
-                    else:
-                        left = mid + 1
-                except:
-                    try:
-                        return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2])
-                    except:
-                        return u""
-            else:
-                right = mid - 1
-            if left > right:
-                return u""
-
-# plain module API
-
-def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
-    p = parser(p=packrat)
-    text = skip(p.skipper, textline, skipWS, skipComments)
-    ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
-    return ast, text
-
-# parse():
-#   language:       pyPEG language description
-#   lineSource:     a fileinput.FileInput object
-#   skipWS:         Flag if whitespace should be skipped (default: True)
-#   skipComments:   Python function which returns pyPEG for matching comments
-#   packrat:        use memoization
-#   lineCount:      add line number information to AST
-#   
-#   returns:        pyAST
-#
-#   raises:         SyntaxError(reason), if a parsed line is not in language
-#                   SyntaxError(reason), if the language description is illegal
-
-def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
-    lines, lineNo = [], 0
-
-    while callable(language):
-        language = language()
-
-    orig, ld = u"", 0
-    for line in lineSource:
-        if lineSource.isfirstline():
-            ld = 1
-        else:
-            ld += 1
-        lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
-        orig += u(line)
-
-    textlen = len(orig)
-
-    try:
-        p = parser(p=packrat)
-        p.textlen = len(orig)
-        if lineCount:
-            p.lines = lines
-        else:
-            p.line = None
-        text = skip(p.skipper, orig, skipWS, skipComments)
-        result, text = p.parseLine(text, language, [], skipWS, skipComments)
-        if text:
-            raise SyntaxError()
-
-    except SyntaxError, msg:
-        parsed = textlen - p.restlen
-        textlen = 0
-        nn, lineNo, file = 0, 0, u""
-        for n, ld, l in lines:
-            if n >= parsed:
-                break
-            else:
-                lineNo = l
-                nn += 1
-                file = ld
-
-        lineNo += 1
-        nn -= 1
-        lineCont = orig.splitlines()[nn]
-        raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont)
-
-    return result