vb@0: # YPL parser 1.5 vb@0: vb@0: # written by VB. vb@0: vb@0: import re vb@0: import sys, codecs vb@0: import exceptions vb@0: vb@0: class keyword(unicode): pass vb@0: class code(unicode): pass vb@0: class ignore(object): vb@0: def __init__(self, regex_text, *args): vb@0: self.regex = re.compile(regex_text, *args) vb@0: vb@0: class _and(object): vb@0: def __init__(self, something): vb@0: self.obj = something vb@0: vb@0: class _not(_and): pass vb@0: vb@0: class Name(unicode): vb@0: def __init__(self, *args): vb@0: self.line = 0 vb@0: self.file = u"" vb@0: vb@0: class Symbol(list): vb@0: def __init__(self, name, what): vb@0: self.__name__ = name vb@0: self.append(name) vb@0: self.what = what vb@0: self.append(what) vb@0: def __call__(self): vb@0: return self.what vb@0: def __unicode__(self): vb@0: return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')' vb@0: def __repr__(self): vb@0: return unicode(self) vb@0: vb@0: word_regex = re.compile(ur"\w+") vb@0: rest_regex = re.compile(ur".*") vb@0: vb@0: print_trace = False vb@0: vb@0: def u(text): vb@0: if isinstance(text, exceptions.BaseException): vb@0: text = text.args[0] vb@0: if type(text) is unicode: vb@0: return text vb@0: if isinstance(text, str): vb@0: if sys.stdin.encoding: vb@0: return codecs.decode(text, sys.stdin.encoding) vb@0: else: vb@0: return codecs.decode(text, "utf-8") vb@0: return unicode(text) vb@0: vb@0: def skip(skipper, text, skipWS, skipComments): vb@0: if skipWS: vb@0: t = text.lstrip() vb@0: else: vb@0: t = text vb@0: if skipComments: vb@0: try: vb@0: while True: vb@0: skip, t = skipper.parseLine(t, skipComments, [], skipWS, None) vb@0: if skipWS: vb@0: t = t.lstrip() vb@0: except: pass vb@0: return t vb@0: vb@0: class parser(object): vb@0: def __init__(self, another = False, p = False): vb@0: self.restlen = -1 vb@0: if not(another): vb@0: self.skipper = parser(True, p) vb@0: self.skipper.packrat = p vb@0: else: vb@0: self.skipper = self vb@0: self.lines = None vb@0: self.textlen = 0 vb@0: self.memory = {} vb@0: self.packrat = p vb@0: vb@0: # parseLine(): vb@0: # textline: text to parse vb@0: # pattern: pyPEG language description vb@0: # resultSoFar: parsing result so far (default: blank list []) vb@0: # skipWS: Flag if whitespace should be skipped (default: True) vb@0: # skipComments: Python functions returning pyPEG for matching comments vb@0: # vb@0: # returns: pyAST, textrest vb@0: # vb@0: # raises: SyntaxError(reason) if textline is detected not being in language vb@0: # described by pattern vb@0: # vb@0: # SyntaxError(reason) if pattern is an illegal language description vb@0: vb@0: def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None): vb@0: name = None vb@0: _textline = textline vb@0: _pattern = pattern vb@0: vb@0: def R(result, text): vb@0: if __debug__: vb@0: if print_trace: vb@0: try: vb@0: if _pattern.__name__ != "comment": vb@0: sys.stderr.write(u"match: " + _pattern.__name__ + u"\n") vb@0: except: pass vb@0: vb@0: if self.restlen == -1: vb@0: self.restlen = len(text) vb@0: else: vb@0: self.restlen = min(self.restlen, len(text)) vb@0: res = resultSoFar vb@0: if name and result: vb@0: name.line = self.lineNo() vb@0: res.append(Symbol(name, result)) vb@0: elif name: vb@0: name.line = self.lineNo() vb@0: res.append(Symbol(name, [])) vb@0: elif result: vb@0: if type(result) is type([]): vb@0: res.extend(result) vb@0: else: vb@0: res.extend([result]) vb@0: if self.packrat: vb@0: self.memory[(len(_textline), id(_pattern))] = (res, text) vb@0: return res, text vb@0: vb@0: def syntaxError(): vb@0: if self.packrat: vb@0: self.memory[(len(_textline), id(_pattern))] = False vb@0: raise SyntaxError() vb@0: vb@0: if self.packrat: vb@0: try: vb@0: result = self.memory[(len(textline), id(pattern))] vb@0: if result: vb@0: return result vb@0: else: vb@0: raise SyntaxError() vb@0: except: pass vb@0: vb@0: if callable(pattern): vb@0: if __debug__: vb@0: if print_trace: vb@0: try: vb@0: if pattern.__name__ != "comment": vb@0: sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n") vb@0: except: pass vb@0: vb@0: if pattern.__name__[0] != "_": vb@0: name = Name(pattern.__name__) vb@0: vb@0: pattern = pattern() vb@0: if callable(pattern): vb@0: pattern = (pattern,) vb@0: vb@0: text = skip(self.skipper, textline, skipWS, skipComments) vb@0: vb@0: pattern_type = type(pattern) vb@0: vb@0: if pattern_type is str or pattern_type is unicode: vb@0: if text[:len(pattern)] == pattern: vb@0: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) vb@0: return R(None, text) vb@0: else: vb@0: syntaxError() vb@0: vb@0: elif pattern_type is keyword: vb@0: m = word_regex.match(text) vb@0: if m: vb@0: if m.group(0) == pattern: vb@0: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) vb@0: return R(None, text) vb@0: else: vb@0: syntaxError() vb@0: else: vb@0: syntaxError() vb@0: vb@0: elif pattern_type is _not: vb@0: try: vb@0: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) vb@0: except: vb@0: return resultSoFar, textline vb@0: syntaxError() vb@0: vb@0: elif pattern_type is _and: vb@0: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) vb@0: return resultSoFar, textline vb@0: vb@0: elif pattern_type is type(word_regex) or pattern_type is ignore: vb@0: if pattern_type is ignore: vb@0: pattern = pattern.regex vb@0: m = pattern.match(text) vb@0: if m: vb@0: text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments) vb@0: if pattern_type is ignore: vb@0: return R(None, text) vb@0: else: vb@0: return R(m.group(0), text) vb@0: else: vb@0: syntaxError() vb@0: vb@0: elif pattern_type is tuple: vb@0: result = [] vb@0: n = 1 vb@0: for p in pattern: vb@0: if type(p) is type(0): vb@0: n = p vb@0: else: vb@0: if n>0: vb@0: for i in range(n): vb@0: result, text = self.parseLine(text, p, result, skipWS, skipComments) vb@0: elif n==0: vb@0: if text == "": vb@0: pass vb@0: else: vb@0: try: vb@0: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) vb@0: result, text = newResult, newText vb@0: except SyntaxError: vb@0: pass vb@0: elif n<0: vb@0: found = False vb@0: while True: vb@0: try: vb@0: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) vb@0: result, text, found = newResult, newText, True vb@0: except SyntaxError: vb@0: break vb@0: if n == -2 and not(found): vb@0: syntaxError() vb@0: n = 1 vb@0: return R(result, text) vb@0: vb@0: elif pattern_type is list: vb@0: result = [] vb@0: found = False vb@0: for p in pattern: vb@0: try: vb@0: result, text = self.parseLine(text, p, result, skipWS, skipComments) vb@0: found = True vb@0: except SyntaxError: vb@0: pass vb@0: if found: vb@0: break vb@0: if found: vb@0: return R(result, text) vb@0: else: vb@0: syntaxError() vb@0: vb@0: else: vb@0: raise SyntaxError(u"illegal type in grammar: " + u(pattern_type)) vb@0: vb@0: def lineNo(self): vb@0: if not(self.lines): return u"" vb@0: if self.restlen == -1: return u"" vb@0: parsed = self.textlen - self.restlen vb@0: vb@0: left, right = 0, len(self.lines) vb@0: vb@0: while True: vb@0: mid = int((right + left) / 2) vb@0: if self.lines[mid][0] <= parsed: vb@0: try: vb@0: if self.lines[mid + 1][0] >= parsed: vb@0: try: vb@0: return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) vb@0: except: vb@0: return u"" vb@0: else: vb@0: left = mid + 1 vb@0: except: vb@0: try: vb@0: return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) vb@0: except: vb@0: return u"" vb@0: else: vb@0: right = mid - 1 vb@0: if left > right: vb@0: return u"" vb@0: vb@0: # plain module API vb@0: vb@0: def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): vb@0: p = parser(p=packrat) vb@0: text = skip(p.skipper, textline, skipWS, skipComments) vb@0: ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments) vb@0: return ast, text vb@0: vb@0: # parse(): vb@0: # language: pyPEG language description vb@0: # lineSource: a fileinput.FileInput object vb@0: # skipWS: Flag if whitespace should be skipped (default: True) vb@0: # skipComments: Python function which returns pyPEG for matching comments vb@0: # packrat: use memoization vb@0: # lineCount: add line number information to AST vb@0: # vb@0: # returns: pyAST vb@0: # vb@0: # raises: SyntaxError(reason), if a parsed line is not in language vb@0: # SyntaxError(reason), if the language description is illegal vb@0: vb@0: def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True): vb@0: lines, lineNo = [], 0 vb@0: vb@0: while callable(language): vb@0: language = language() vb@0: vb@0: orig, ld = u"", 0 vb@0: for line in lineSource: vb@0: if lineSource.isfirstline(): vb@0: ld = 1 vb@0: else: vb@0: ld += 1 vb@0: lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1)) vb@0: orig += u(line) vb@0: vb@0: textlen = len(orig) vb@0: vb@0: try: vb@0: p = parser(p=packrat) vb@0: p.textlen = len(orig) vb@0: if lineCount: vb@0: p.lines = lines vb@0: else: vb@0: p.line = None vb@0: text = skip(p.skipper, orig, skipWS, skipComments) vb@0: result, text = p.parseLine(text, language, [], skipWS, skipComments) vb@0: if text: vb@0: raise SyntaxError() vb@0: vb@0: except SyntaxError, msg: vb@0: parsed = textlen - p.restlen vb@0: textlen = 0 vb@0: nn, lineNo, file = 0, 0, u"" vb@0: for n, ld, l in lines: vb@0: if n >= parsed: vb@0: break vb@0: else: vb@0: lineNo = l vb@0: nn += 1 vb@0: file = ld vb@0: vb@0: lineNo += 1 vb@0: nn -= 1 vb@0: lineCont = orig.splitlines()[nn] vb@0: raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont) vb@0: vb@0: return result