yml2/pyPEG.py
changeset 41 98a53c3282c3
child 65 d659b8c2ed22
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/yml2/pyPEG.py	Tue Mar 17 10:12:14 2020 +0100
@@ -0,0 +1,351 @@
+# YPL parser 1.6
+# adapted for Python 3.x
+
+# written by VB.
+
+import re
+import sys, codecs
+
+class keyword(str): pass
+class code(str): pass
+class ignore(object):
+    def __init__(self, regex_text, *args):
+        self.regex = re.compile(regex_text, *args)
+
+class _and(object):
+    def __init__(self, something):
+        self.obj = something
+
+class _not(_and): pass
+
+class Name(str):
+    def __init__(self, *args):
+        self.line = 0
+        self.file = ""
+
+class Symbol(list):
+    def __init__(self, name, what):
+        self.__name__ = name
+        self.append(name)
+        self.what = what
+        self.append(what)
+    def __call__(self):
+        return self.what
+    def __str__(self):
+        return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
+    def __repr__(self):
+        return str(self)
+
+word_regex = re.compile(r"\w+")
+rest_regex = re.compile(r".*")
+
+print_trace = False
+
+def u(text):
+    if isinstance(text, BaseException):
+        text = text.args[0]
+    if type(text) is str:
+        return text
+    if isinstance(text, bytes):
+        if sys.stdin.encoding:
+            return codecs.decode(text, sys.stdin.encoding)
+        else:
+            return codecs.decode(text, "utf-8")
+    return str(text)
+
+def skip(skipper, text, skipWS, skipComments):
+    if skipWS:
+        t = text.lstrip()
+    else:
+        t = text
+    if skipComments:
+        try:
+            while True:
+                skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
+                if skipWS:
+                    t = t.lstrip()
+        except: pass
+    return t
+
+class parser(object):
+    def __init__(self, another = False, p = False):
+        self.restlen = -1 
+        if not(another):
+            self.skipper = parser(True, p)
+            self.skipper.packrat = p
+        else:
+            self.skipper = self
+        self.lines = None
+        self.textlen = 0
+        self.memory = {}
+        self.packrat = p
+
+    # parseLine():
+    #   textline:       text to parse
+    #   pattern:        pyPEG language description
+    #   resultSoFar:    parsing result so far (default: blank list [])
+    #   skipWS:         Flag if whitespace should be skipped (default: True)
+    #   skipComments:   Python functions returning pyPEG for matching comments
+    #   
+    #   returns:        pyAST, textrest
+    #
+    #   raises:         SyntaxError(reason) if textline is detected not being in language
+    #                   described by pattern
+    #
+    #                   SyntaxError(reason) if pattern is an illegal language description
+
+    def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
+        name = None
+        _textline = textline
+        _pattern = pattern
+
+        def R(result, text):
+            if __debug__:
+                if print_trace:
+                    try:
+                        if _pattern.__name__ != "comment":
+                            sys.stderr.write("match: " + _pattern.__name__ + "\n")
+                    except: pass
+
+            if self.restlen == -1:
+                self.restlen = len(text)
+            else:
+                self.restlen = min(self.restlen, len(text))
+            res = resultSoFar
+            if name and result:
+                name.line = self.lineNo()
+                res.append(Symbol(name, result))
+            elif name:
+                name.line = self.lineNo()
+                res.append(Symbol(name, []))
+            elif result:
+                if type(result) is type([]):
+                    res.extend(result)
+                else:
+                    res.extend([result])
+            if self.packrat:
+                self.memory[(len(_textline), id(_pattern))] = (res, text)
+            return res, text
+
+        def syntaxError():
+            if self.packrat:
+                self.memory[(len(_textline), id(_pattern))] = False
+            raise SyntaxError()
+
+        if self.packrat:
+            try:
+                result = self.memory[(len(textline), id(pattern))]
+                if result:
+                    return result
+                else:
+                    raise SyntaxError()
+            except: pass
+
+        if callable(pattern):
+            if __debug__:
+                if print_trace:
+                    try:
+                        if pattern.__name__ != "comment":
+                            sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
+                    except: pass
+
+            if pattern.__name__[0] != "_":
+                name = Name(pattern.__name__)
+
+            pattern = pattern()
+            if callable(pattern):
+                pattern = (pattern,)
+
+        text = skip(self.skipper, textline, skipWS, skipComments)
+
+        pattern_type = type(pattern)
+
+        if pattern_type is str:
+            if text[:len(pattern)] == pattern:
+                text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
+                return R(None, text)
+            else:
+                syntaxError()
+
+        elif pattern_type is keyword:
+            m = word_regex.match(text)
+            if m:
+                if m.group(0) == pattern:
+                    text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
+                    return R(None, text)
+                else:
+                    syntaxError()
+            else:
+                syntaxError()
+
+        elif pattern_type is _not:
+            try:
+                r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
+            except:
+                return resultSoFar, textline
+            syntaxError()
+
+        elif pattern_type is _and:
+            r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
+            return resultSoFar, textline
+
+        elif pattern_type is type(word_regex) or pattern_type is ignore:
+            if pattern_type is ignore:
+                pattern = pattern.regex
+            m = pattern.match(text)
+            if m:
+                text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
+                if pattern_type is ignore:
+                    return R(None, text)
+                else:
+                    return R(m.group(0), text)
+            else:
+                syntaxError()
+
+        elif pattern_type is tuple:
+            result = []
+            n = 1
+            for p in pattern:
+                if type(p) is type(0):
+                    n = p
+                else:
+                    if n>0:
+                        for i in range(n):
+                            result, text = self.parseLine(text, p, result, skipWS, skipComments)
+                    elif n==0:
+                        if text == "":
+                            pass
+                        else:
+                            try:
+                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
+                                result, text = newResult, newText
+                            except SyntaxError:
+                                pass
+                    elif n<0:
+                        found = False
+                        while True:
+                            try:
+                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
+                                result, text, found = newResult, newText, True
+                            except SyntaxError:
+                                break
+                        if n == -2 and not(found):
+                            syntaxError()
+                    n = 1
+            return R(result, text)
+
+        elif pattern_type is list:
+            result = []
+            found = False
+            for p in pattern:
+                try:
+                    result, text = self.parseLine(text, p, result, skipWS, skipComments)
+                    found = True
+                except SyntaxError:
+                    pass
+                if found:
+                    break
+            if found:
+                return R(result, text)
+            else:
+                syntaxError()
+
+        else:
+            raise SyntaxError("illegal type in grammar: " + u(pattern_type))
+
+    def lineNo(self):
+        if not(self.lines): return ""
+        if self.restlen == -1: return ""
+        parsed = self.textlen - self.restlen
+
+        left, right = 0, len(self.lines)
+
+        while True:
+            mid = int((right + left) / 2)
+            if self.lines[mid][0] <= parsed:
+                try:
+                    if self.lines[mid + 1][0] >= parsed:
+                        try:
+                            return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
+                        except:
+                            return ""
+                    else:
+                        left = mid + 1
+                except:
+                    try:
+                        return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
+                    except:
+                        return ""
+            else:
+                right = mid - 1
+            if left > right:
+                return ""
+
+# plain module API
+
+def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
+    p = parser(p=packrat)
+    text = skip(p.skipper, textline, skipWS, skipComments)
+    ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
+    return ast, text
+
+# parse():
+#   language:       pyPEG language description
+#   lineSource:     a fileinput.FileInput object
+#   skipWS:         Flag if whitespace should be skipped (default: True)
+#   skipComments:   Python function which returns pyPEG for matching comments
+#   packrat:        use memoization
+#   lineCount:      add line number information to AST
+#   
+#   returns:        pyAST
+#
+#   raises:         SyntaxError(reason), if a parsed line is not in language
+#                   SyntaxError(reason), if the language description is illegal
+
+def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
+    lines, lineNo = [], 0
+
+    while callable(language):
+        language = language()
+
+    orig, ld = "", 0
+    for line in lineSource:
+        if lineSource.isfirstline():
+            ld = 1
+        else:
+            ld += 1
+        lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
+        orig += u(line)
+
+    textlen = len(orig)
+
+    try:
+        p = parser(p=packrat)
+        p.textlen = len(orig)
+        if lineCount:
+            p.lines = lines
+        else:
+            p.line = None
+        text = skip(p.skipper, orig, skipWS, skipComments)
+        result, text = p.parseLine(text, language, [], skipWS, skipComments)
+        if text:
+            raise SyntaxError()
+
+    except SyntaxError as msg:
+        parsed = textlen - p.restlen
+        textlen = 0
+        nn, lineNo, file = 0, 0, ""
+        for n, ld, l in lines:
+            if n >= parsed:
+                break
+            else:
+                lineNo = l
+                nn += 1
+                file = ld
+
+        lineNo += 1
+        nn -= 1
+        lineCont = orig.splitlines()[nn]
+        raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont)
+
+    return result