pyPEG.py
author Volker Birk <vb@pep-project.org>
Mon, 04 Nov 2019 11:38:34 +0100
changeset 40 432ab62b2537
parent 31 d3dddb80d1f5
permissions -rw-r--r--
date
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
     1
# YPL parser 1.6
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
     2
# adapted for Python 3.x
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
     3
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
     4
# written by VB.
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
     5
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
     6
import re
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
     7
import sys, codecs
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
     8
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
     9
class keyword(str): pass
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    10
class code(str): pass
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    11
class ignore(object):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    12
    def __init__(self, regex_text, *args):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    13
        self.regex = re.compile(regex_text, *args)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    14
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    15
class _and(object):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    16
    def __init__(self, something):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    17
        self.obj = something
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    18
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    19
class _not(_and): pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    20
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    21
class Name(str):
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    22
    def __init__(self, *args):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    23
        self.line = 0
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    24
        self.file = ""
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    25
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    26
class Symbol(list):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    27
    def __init__(self, name, what):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    28
        self.__name__ = name
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    29
        self.append(name)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    30
        self.what = what
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    31
        self.append(what)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    32
    def __call__(self):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    33
        return self.what
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    34
    def __str__(self):
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    35
        return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    36
    def __repr__(self):
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    37
        return str(self)
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    38
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    39
word_regex = re.compile(r"\w+")
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    40
rest_regex = re.compile(r".*")
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    41
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    42
print_trace = False
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    43
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    44
def u(text):
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    45
    if isinstance(text, BaseException):
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    46
        text = text.args[0]
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    47
    if type(text) is str:
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    48
        return text
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    49
    if isinstance(text, bytes):
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    50
        if sys.stdin.encoding:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    51
            return codecs.decode(text, sys.stdin.encoding)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    52
        else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    53
            return codecs.decode(text, "utf-8")
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
    54
    return str(text)
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    55
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    56
def skip(skipper, text, skipWS, skipComments):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    57
    if skipWS:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    58
        t = text.lstrip()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    59
    else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    60
        t = text
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    61
    if skipComments:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    62
        try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    63
            while True:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    64
                skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    65
                if skipWS:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    66
                    t = t.lstrip()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    67
        except: pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    68
    return t
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    69
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    70
class parser(object):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    71
    def __init__(self, another = False, p = False):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    72
        self.restlen = -1 
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    73
        if not(another):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    74
            self.skipper = parser(True, p)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    75
            self.skipper.packrat = p
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    76
        else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    77
            self.skipper = self
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    78
        self.lines = None
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    79
        self.textlen = 0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    80
        self.memory = {}
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    81
        self.packrat = p
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    82
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    83
    # parseLine():
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    84
    #   textline:       text to parse
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    85
    #   pattern:        pyPEG language description
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    86
    #   resultSoFar:    parsing result so far (default: blank list [])
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    87
    #   skipWS:         Flag if whitespace should be skipped (default: True)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    88
    #   skipComments:   Python functions returning pyPEG for matching comments
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    89
    #   
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    90
    #   returns:        pyAST, textrest
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    91
    #
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    92
    #   raises:         SyntaxError(reason) if textline is detected not being in language
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    93
    #                   described by pattern
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    94
    #
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    95
    #                   SyntaxError(reason) if pattern is an illegal language description
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    96
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    97
    def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    98
        name = None
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
    99
        _textline = textline
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   100
        _pattern = pattern
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   101
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   102
        def R(result, text):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   103
            if __debug__:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   104
                if print_trace:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   105
                    try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   106
                        if _pattern.__name__ != "comment":
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   107
                            sys.stderr.write("match: " + _pattern.__name__ + "\n")
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   108
                    except: pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   109
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   110
            if self.restlen == -1:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   111
                self.restlen = len(text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   112
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   113
                self.restlen = min(self.restlen, len(text))
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   114
            res = resultSoFar
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   115
            if name and result:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   116
                name.line = self.lineNo()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   117
                res.append(Symbol(name, result))
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   118
            elif name:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   119
                name.line = self.lineNo()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   120
                res.append(Symbol(name, []))
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   121
            elif result:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   122
                if type(result) is type([]):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   123
                    res.extend(result)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   124
                else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   125
                    res.extend([result])
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   126
            if self.packrat:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   127
                self.memory[(len(_textline), id(_pattern))] = (res, text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   128
            return res, text
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   129
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   130
        def syntaxError():
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   131
            if self.packrat:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   132
                self.memory[(len(_textline), id(_pattern))] = False
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   133
            raise SyntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   134
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   135
        if self.packrat:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   136
            try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   137
                result = self.memory[(len(textline), id(pattern))]
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   138
                if result:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   139
                    return result
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   140
                else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   141
                    raise SyntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   142
            except: pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   143
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   144
        if callable(pattern):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   145
            if __debug__:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   146
                if print_trace:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   147
                    try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   148
                        if pattern.__name__ != "comment":
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   149
                            sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   150
                    except: pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   151
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   152
            if pattern.__name__[0] != "_":
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   153
                name = Name(pattern.__name__)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   154
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   155
            pattern = pattern()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   156
            if callable(pattern):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   157
                pattern = (pattern,)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   158
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   159
        text = skip(self.skipper, textline, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   160
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   161
        pattern_type = type(pattern)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   162
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   163
        if pattern_type is str:
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   164
            if text[:len(pattern)] == pattern:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   165
                text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   166
                return R(None, text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   167
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   168
                syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   169
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   170
        elif pattern_type is keyword:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   171
            m = word_regex.match(text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   172
            if m:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   173
                if m.group(0) == pattern:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   174
                    text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   175
                    return R(None, text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   176
                else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   177
                    syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   178
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   179
                syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   180
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   181
        elif pattern_type is _not:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   182
            try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   183
                r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   184
            except:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   185
                return resultSoFar, textline
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   186
            syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   187
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   188
        elif pattern_type is _and:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   189
            r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   190
            return resultSoFar, textline
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   191
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   192
        elif pattern_type is type(word_regex) or pattern_type is ignore:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   193
            if pattern_type is ignore:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   194
                pattern = pattern.regex
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   195
            m = pattern.match(text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   196
            if m:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   197
                text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   198
                if pattern_type is ignore:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   199
                    return R(None, text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   200
                else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   201
                    return R(m.group(0), text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   202
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   203
                syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   204
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   205
        elif pattern_type is tuple:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   206
            result = []
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   207
            n = 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   208
            for p in pattern:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   209
                if type(p) is type(0):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   210
                    n = p
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   211
                else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   212
                    if n>0:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   213
                        for i in range(n):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   214
                            result, text = self.parseLine(text, p, result, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   215
                    elif n==0:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   216
                        if text == "":
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   217
                            pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   218
                        else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   219
                            try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   220
                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   221
                                result, text = newResult, newText
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   222
                            except SyntaxError:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   223
                                pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   224
                    elif n<0:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   225
                        found = False
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   226
                        while True:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   227
                            try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   228
                                newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   229
                                result, text, found = newResult, newText, True
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   230
                            except SyntaxError:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   231
                                break
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   232
                        if n == -2 and not(found):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   233
                            syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   234
                    n = 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   235
            return R(result, text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   236
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   237
        elif pattern_type is list:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   238
            result = []
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   239
            found = False
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   240
            for p in pattern:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   241
                try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   242
                    result, text = self.parseLine(text, p, result, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   243
                    found = True
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   244
                except SyntaxError:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   245
                    pass
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   246
                if found:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   247
                    break
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   248
            if found:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   249
                return R(result, text)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   250
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   251
                syntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   252
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   253
        else:
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   254
            raise SyntaxError("illegal type in grammar: " + u(pattern_type))
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   255
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   256
    def lineNo(self):
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   257
        if not(self.lines): return ""
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   258
        if self.restlen == -1: return ""
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   259
        parsed = self.textlen - self.restlen
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   260
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   261
        left, right = 0, len(self.lines)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   262
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   263
        while True:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   264
            mid = int((right + left) / 2)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   265
            if self.lines[mid][0] <= parsed:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   266
                try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   267
                    if self.lines[mid + 1][0] >= parsed:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   268
                        try:
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   269
                            return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   270
                        except:
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   271
                            return ""
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   272
                    else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   273
                        left = mid + 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   274
                except:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   275
                    try:
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   276
                        return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   277
                    except:
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   278
                        return ""
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   279
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   280
                right = mid - 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   281
            if left > right:
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   282
                return ""
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   283
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   284
# plain module API
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   285
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   286
def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   287
    p = parser(p=packrat)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   288
    text = skip(p.skipper, textline, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   289
    ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   290
    return ast, text
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   291
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   292
# parse():
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   293
#   language:       pyPEG language description
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   294
#   lineSource:     a fileinput.FileInput object
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   295
#   skipWS:         Flag if whitespace should be skipped (default: True)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   296
#   skipComments:   Python function which returns pyPEG for matching comments
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   297
#   packrat:        use memoization
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   298
#   lineCount:      add line number information to AST
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   299
#   
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   300
#   returns:        pyAST
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   301
#
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   302
#   raises:         SyntaxError(reason), if a parsed line is not in language
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   303
#                   SyntaxError(reason), if the language description is illegal
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   304
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   305
def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   306
    lines, lineNo = [], 0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   307
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   308
    while callable(language):
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   309
        language = language()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   310
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   311
    orig, ld = "", 0
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   312
    for line in lineSource:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   313
        if lineSource.isfirstline():
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   314
            ld = 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   315
        else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   316
            ld += 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   317
        lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   318
        orig += u(line)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   319
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   320
    textlen = len(orig)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   321
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   322
    try:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   323
        p = parser(p=packrat)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   324
        p.textlen = len(orig)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   325
        if lineCount:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   326
            p.lines = lines
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   327
        else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   328
            p.line = None
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   329
        text = skip(p.skipper, orig, skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   330
        result, text = p.parseLine(text, language, [], skipWS, skipComments)
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   331
        if text:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   332
            raise SyntaxError()
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   333
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   334
    except SyntaxError as msg:
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   335
        parsed = textlen - p.restlen
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   336
        textlen = 0
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   337
        nn, lineNo, file = 0, 0, ""
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   338
        for n, ld, l in lines:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   339
            if n >= parsed:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   340
                break
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   341
            else:
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   342
                lineNo = l
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   343
                nn += 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   344
                file = ld
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   345
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   346
        lineNo += 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   347
        nn -= 1
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   348
        lineCont = orig.splitlines()[nn]
31
d3dddb80d1f5 adapting to Python 3
Volker Birk <vb@pep-project.org>
parents: 0
diff changeset
   349
        raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont)
0
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   350
76005e62091d initial commit
Volker Birk <vb@pep-project.org>
parents:
diff changeset
   351
    return result