pyPEG.py
changeset 41 98a53c3282c3
parent 40 432ab62b2537
child 42 700f4d003349
--- a/pyPEG.py	Mon Nov 04 11:38:34 2019 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,351 +0,0 @@
-# YPL parser 1.6
-# adapted for Python 3.x
-
-# written by VB.
-
-import re
-import sys, codecs
-
-class keyword(str): pass
-class code(str): pass
-class ignore(object):
-    def __init__(self, regex_text, *args):
-        self.regex = re.compile(regex_text, *args)
-
-class _and(object):
-    def __init__(self, something):
-        self.obj = something
-
-class _not(_and): pass
-
-class Name(str):
-    def __init__(self, *args):
-        self.line = 0
-        self.file = ""
-
-class Symbol(list):
-    def __init__(self, name, what):
-        self.__name__ = name
-        self.append(name)
-        self.what = what
-        self.append(what)
-    def __call__(self):
-        return self.what
-    def __str__(self):
-        return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
-    def __repr__(self):
-        return str(self)
-
-word_regex = re.compile(r"\w+")
-rest_regex = re.compile(r".*")
-
-print_trace = False
-
-def u(text):
-    if isinstance(text, BaseException):
-        text = text.args[0]
-    if type(text) is str:
-        return text
-    if isinstance(text, bytes):
-        if sys.stdin.encoding:
-            return codecs.decode(text, sys.stdin.encoding)
-        else:
-            return codecs.decode(text, "utf-8")
-    return str(text)
-
-def skip(skipper, text, skipWS, skipComments):
-    if skipWS:
-        t = text.lstrip()
-    else:
-        t = text
-    if skipComments:
-        try:
-            while True:
-                skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
-                if skipWS:
-                    t = t.lstrip()
-        except: pass
-    return t
-
-class parser(object):
-    def __init__(self, another = False, p = False):
-        self.restlen = -1 
-        if not(another):
-            self.skipper = parser(True, p)
-            self.skipper.packrat = p
-        else:
-            self.skipper = self
-        self.lines = None
-        self.textlen = 0
-        self.memory = {}
-        self.packrat = p
-
-    # parseLine():
-    #   textline:       text to parse
-    #   pattern:        pyPEG language description
-    #   resultSoFar:    parsing result so far (default: blank list [])
-    #   skipWS:         Flag if whitespace should be skipped (default: True)
-    #   skipComments:   Python functions returning pyPEG for matching comments
-    #   
-    #   returns:        pyAST, textrest
-    #
-    #   raises:         SyntaxError(reason) if textline is detected not being in language
-    #                   described by pattern
-    #
-    #                   SyntaxError(reason) if pattern is an illegal language description
-
-    def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
-        name = None
-        _textline = textline
-        _pattern = pattern
-
-        def R(result, text):
-            if __debug__:
-                if print_trace:
-                    try:
-                        if _pattern.__name__ != "comment":
-                            sys.stderr.write("match: " + _pattern.__name__ + "\n")
-                    except: pass
-
-            if self.restlen == -1:
-                self.restlen = len(text)
-            else:
-                self.restlen = min(self.restlen, len(text))
-            res = resultSoFar
-            if name and result:
-                name.line = self.lineNo()
-                res.append(Symbol(name, result))
-            elif name:
-                name.line = self.lineNo()
-                res.append(Symbol(name, []))
-            elif result:
-                if type(result) is type([]):
-                    res.extend(result)
-                else:
-                    res.extend([result])
-            if self.packrat:
-                self.memory[(len(_textline), id(_pattern))] = (res, text)
-            return res, text
-
-        def syntaxError():
-            if self.packrat:
-                self.memory[(len(_textline), id(_pattern))] = False
-            raise SyntaxError()
-
-        if self.packrat:
-            try:
-                result = self.memory[(len(textline), id(pattern))]
-                if result:
-                    return result
-                else:
-                    raise SyntaxError()
-            except: pass
-
-        if callable(pattern):
-            if __debug__:
-                if print_trace:
-                    try:
-                        if pattern.__name__ != "comment":
-                            sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
-                    except: pass
-
-            if pattern.__name__[0] != "_":
-                name = Name(pattern.__name__)
-
-            pattern = pattern()
-            if callable(pattern):
-                pattern = (pattern,)
-
-        text = skip(self.skipper, textline, skipWS, skipComments)
-
-        pattern_type = type(pattern)
-
-        if pattern_type is str:
-            if text[:len(pattern)] == pattern:
-                text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
-                return R(None, text)
-            else:
-                syntaxError()
-
-        elif pattern_type is keyword:
-            m = word_regex.match(text)
-            if m:
-                if m.group(0) == pattern:
-                    text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
-                    return R(None, text)
-                else:
-                    syntaxError()
-            else:
-                syntaxError()
-
-        elif pattern_type is _not:
-            try:
-                r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
-            except:
-                return resultSoFar, textline
-            syntaxError()
-
-        elif pattern_type is _and:
-            r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
-            return resultSoFar, textline
-
-        elif pattern_type is type(word_regex) or pattern_type is ignore:
-            if pattern_type is ignore:
-                pattern = pattern.regex
-            m = pattern.match(text)
-            if m:
-                text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
-                if pattern_type is ignore:
-                    return R(None, text)
-                else:
-                    return R(m.group(0), text)
-            else:
-                syntaxError()
-
-        elif pattern_type is tuple:
-            result = []
-            n = 1
-            for p in pattern:
-                if type(p) is type(0):
-                    n = p
-                else:
-                    if n>0:
-                        for i in range(n):
-                            result, text = self.parseLine(text, p, result, skipWS, skipComments)
-                    elif n==0:
-                        if text == "":
-                            pass
-                        else:
-                            try:
-                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
-                                result, text = newResult, newText
-                            except SyntaxError:
-                                pass
-                    elif n<0:
-                        found = False
-                        while True:
-                            try:
-                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
-                                result, text, found = newResult, newText, True
-                            except SyntaxError:
-                                break
-                        if n == -2 and not(found):
-                            syntaxError()
-                    n = 1
-            return R(result, text)
-
-        elif pattern_type is list:
-            result = []
-            found = False
-            for p in pattern:
-                try:
-                    result, text = self.parseLine(text, p, result, skipWS, skipComments)
-                    found = True
-                except SyntaxError:
-                    pass
-                if found:
-                    break
-            if found:
-                return R(result, text)
-            else:
-                syntaxError()
-
-        else:
-            raise SyntaxError("illegal type in grammar: " + u(pattern_type))
-
-    def lineNo(self):
-        if not(self.lines): return ""
-        if self.restlen == -1: return ""
-        parsed = self.textlen - self.restlen
-
-        left, right = 0, len(self.lines)
-
-        while True:
-            mid = int((right + left) / 2)
-            if self.lines[mid][0] <= parsed:
-                try:
-                    if self.lines[mid + 1][0] >= parsed:
-                        try:
-                            return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
-                        except:
-                            return ""
-                    else:
-                        left = mid + 1
-                except:
-                    try:
-                        return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
-                    except:
-                        return ""
-            else:
-                right = mid - 1
-            if left > right:
-                return ""
-
-# plain module API
-
-def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
-    p = parser(p=packrat)
-    text = skip(p.skipper, textline, skipWS, skipComments)
-    ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
-    return ast, text
-
-# parse():
-#   language:       pyPEG language description
-#   lineSource:     a fileinput.FileInput object
-#   skipWS:         Flag if whitespace should be skipped (default: True)
-#   skipComments:   Python function which returns pyPEG for matching comments
-#   packrat:        use memoization
-#   lineCount:      add line number information to AST
-#   
-#   returns:        pyAST
-#
-#   raises:         SyntaxError(reason), if a parsed line is not in language
-#                   SyntaxError(reason), if the language description is illegal
-
-def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
-    lines, lineNo = [], 0
-
-    while callable(language):
-        language = language()
-
-    orig, ld = "", 0
-    for line in lineSource:
-        if lineSource.isfirstline():
-            ld = 1
-        else:
-            ld += 1
-        lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
-        orig += u(line)
-
-    textlen = len(orig)
-
-    try:
-        p = parser(p=packrat)
-        p.textlen = len(orig)
-        if lineCount:
-            p.lines = lines
-        else:
-            p.line = None
-        text = skip(p.skipper, orig, skipWS, skipComments)
-        result, text = p.parseLine(text, language, [], skipWS, skipComments)
-        if text:
-            raise SyntaxError()
-
-    except SyntaxError as msg:
-        parsed = textlen - p.restlen
-        textlen = 0
-        nn, lineNo, file = 0, 0, ""
-        for n, ld, l in lines:
-            if n >= parsed:
-                break
-            else:
-                lineNo = l
-                nn += 1
-                file = ld
-
-        lineNo += 1
-        nn -= 1
-        lineCont = orig.splitlines()[nn]
-        raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont)
-
-    return result