h@41: # YPL parser 1.6
h@41: # adapted for Python 3.x
h@41: 
h@41: # written by VB.
h@41: 
h@41: import re
h@41: import sys, codecs
h@41: 
h@41: class keyword(str): pass
h@41: class code(str): pass
h@41: class ignore(object):
h@41:     def __init__(self, regex_text, *args):
h@41:         self.regex = re.compile(regex_text, *args)
h@41: 
h@41: class _and(object):
h@41:     def __init__(self, something):
h@41:         self.obj = something
h@41: 
h@41: class _not(_and): pass
h@41: 
h@41: class Name(str):
h@41:     def __init__(self, *args):
h@41:         self.line = 0
h@41:         self.file = ""
h@41: 
h@41: class Symbol(list):
h@41:     def __init__(self, name, what):
h@41:         self.__name__ = name
h@41:         self.append(name)
h@41:         self.what = what
h@41:         self.append(what)
h@41:     def __call__(self):
h@41:         return self.what
h@41:     def __str__(self):
h@41:         return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
h@41:     def __repr__(self):
h@41:         return str(self)
h@41: 
h@41: word_regex = re.compile(r"\w+")
h@41: rest_regex = re.compile(r".*")
h@41: 
h@41: print_trace = False
h@41: 
h@41: def u(text):
h@41:     if isinstance(text, BaseException):
h@41:         text = text.args[0]
h@41:     if type(text) is str:
h@41:         return text
h@41:     if isinstance(text, bytes):
h@41:         if sys.stdin.encoding:
h@41:             return codecs.decode(text, sys.stdin.encoding)
h@41:         else:
h@41:             return codecs.decode(text, "utf-8")
h@41:     return str(text)
h@41: 
h@41: def skip(skipper, text, skipWS, skipComments):
h@41:     if skipWS:
h@41:         t = text.lstrip()
h@41:     else:
h@41:         t = text
h@41:     if skipComments:
h@41:         try:
h@41:             while True:
h@41:                 skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
h@41:                 if skipWS:
h@41:                     t = t.lstrip()
h@41:         except: pass
h@41:     return t
h@41: 
h@41: class parser(object):
h@41:     def __init__(self, another = False, p = False):
h@41:         self.restlen = -1 
h@41:         if not(another):
h@41:             self.skipper = parser(True, p)
h@41:             self.skipper.packrat = p
h@41:         else:
h@41:             self.skipper = self
h@41:         self.lines = None
h@41:         self.textlen = 0
h@41:         self.memory = {}
h@41:         self.packrat = p
h@41: 
h@41:     # parseLine():
h@41:     #   textline:       text to parse
h@41:     #   pattern:        pyPEG language description
h@41:     #   resultSoFar:    parsing result so far (default: blank list [])
h@41:     #   skipWS:         Flag if whitespace should be skipped (default: True)
h@41:     #   skipComments:   Python functions returning pyPEG for matching comments
h@41:     #   
h@41:     #   returns:        pyAST, textrest
h@41:     #
h@41:     #   raises:         SyntaxError(reason) if textline is detected not being in language
h@41:     #                   described by pattern
h@41:     #
h@41:     #                   SyntaxError(reason) if pattern is an illegal language description
h@41: 
h@41:     def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
h@41:         name = None
h@41:         _textline = textline
h@41:         _pattern = pattern
h@41: 
h@41:         def R(result, text):
h@41:             if __debug__:
h@41:                 if print_trace:
h@41:                     try:
h@41:                         if _pattern.__name__ != "comment":
h@41:                             sys.stderr.write("match: " + _pattern.__name__ + "\n")
h@41:                     except: pass
h@41: 
h@41:             if self.restlen == -1:
h@41:                 self.restlen = len(text)
h@41:             else:
h@41:                 self.restlen = min(self.restlen, len(text))
h@41:             res = resultSoFar
h@41:             if name and result:
h@41:                 name.line = self.lineNo()
h@41:                 res.append(Symbol(name, result))
h@41:             elif name:
h@41:                 name.line = self.lineNo()
h@41:                 res.append(Symbol(name, []))
h@41:             elif result:
h@41:                 if type(result) is type([]):
h@41:                     res.extend(result)
h@41:                 else:
h@41:                     res.extend([result])
h@41:             if self.packrat:
h@41:                 self.memory[(len(_textline), id(_pattern))] = (res, text)
h@41:             return res, text
h@41: 
h@41:         def syntaxError():
h@41:             if self.packrat:
h@41:                 self.memory[(len(_textline), id(_pattern))] = False
h@41:             raise SyntaxError()
h@41: 
h@41:         if self.packrat:
h@41:             try:
h@41:                 result = self.memory[(len(textline), id(pattern))]
h@41:                 if result:
h@41:                     return result
h@41:                 else:
h@41:                     raise SyntaxError()
h@41:             except: pass
h@41: 
h@41:         if callable(pattern):
h@41:             if __debug__:
h@41:                 if print_trace:
h@41:                     try:
h@41:                         if pattern.__name__ != "comment":
h@41:                             sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
h@41:                     except: pass
h@41: 
h@41:             if pattern.__name__[0] != "_":
h@41:                 name = Name(pattern.__name__)
h@41: 
h@41:             pattern = pattern()
h@41:             if callable(pattern):
h@41:                 pattern = (pattern,)
h@41: 
h@41:         text = skip(self.skipper, textline, skipWS, skipComments)
h@41: 
h@41:         pattern_type = type(pattern)
h@41: 
h@41:         if pattern_type is str:
h@41:             if text[:len(pattern)] == pattern:
h@41:                 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
h@41:                 return R(None, text)
h@41:             else:
h@41:                 syntaxError()
h@41: 
h@41:         elif pattern_type is keyword:
h@41:             m = word_regex.match(text)
h@41:             if m:
h@41:                 if m.group(0) == pattern:
h@41:                     text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
h@41:                     return R(None, text)
h@41:                 else:
h@41:                     syntaxError()
h@41:             else:
h@41:                 syntaxError()
h@41: 
h@41:         elif pattern_type is _not:
h@41:             try:
h@41:                 r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
h@41:             except:
h@41:                 return resultSoFar, textline
h@41:             syntaxError()
h@41: 
h@41:         elif pattern_type is _and:
h@41:             r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
h@41:             return resultSoFar, textline
h@41: 
h@41:         elif pattern_type is type(word_regex) or pattern_type is ignore:
h@41:             if pattern_type is ignore:
h@41:                 pattern = pattern.regex
h@41:             m = pattern.match(text)
h@41:             if m:
h@41:                 text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
h@41:                 if pattern_type is ignore:
h@41:                     return R(None, text)
h@41:                 else:
h@41:                     return R(m.group(0), text)
h@41:             else:
h@41:                 syntaxError()
h@41: 
h@41:         elif pattern_type is tuple:
h@41:             result = []
h@41:             n = 1
h@41:             for p in pattern:
h@41:                 if type(p) is type(0):
h@41:                     n = p
h@41:                 else:
h@41:                     if n>0:
h@41:                         for i in range(n):
h@41:                             result, text = self.parseLine(text, p, result, skipWS, skipComments)
h@41:                     elif n==0:
h@41:                         if text == "":
h@41:                             pass
h@41:                         else:
h@41:                             try:
h@41:                                 newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
h@41:                                 result, text = newResult, newText
h@41:                             except SyntaxError:
h@41:                                 pass
h@41:                     elif n<0:
h@41:                         found = False
h@41:                         while True:
h@41:                             try:
h@41:                                 newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
h@41:                                 result, text, found = newResult, newText, True
h@41:                             except SyntaxError:
h@41:                                 break
h@41:                         if n == -2 and not(found):
h@41:                             syntaxError()
h@41:                     n = 1
h@41:             return R(result, text)
h@41: 
h@41:         elif pattern_type is list:
h@41:             result = []
h@41:             found = False
h@41:             for p in pattern:
h@41:                 try:
h@41:                     result, text = self.parseLine(text, p, result, skipWS, skipComments)
h@41:                     found = True
h@41:                 except SyntaxError:
h@41:                     pass
h@41:                 if found:
h@41:                     break
h@41:             if found:
h@41:                 return R(result, text)
h@41:             else:
h@41:                 syntaxError()
h@41: 
h@41:         else:
h@41:             raise SyntaxError("illegal type in grammar: " + u(pattern_type))
h@41: 
h@41:     def lineNo(self):
h@41:         if not(self.lines): return ""
h@41:         if self.restlen == -1: return ""
h@41:         parsed = self.textlen - self.restlen
h@41: 
h@41:         left, right = 0, len(self.lines)
h@41: 
h@41:         while True:
h@41:             mid = int((right + left) / 2)
h@41:             if self.lines[mid][0] <= parsed:
h@41:                 try:
h@41:                     if self.lines[mid + 1][0] >= parsed:
h@41:                         try:
h@41:                             return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
h@41:                         except:
h@41:                             return ""
h@41:                     else:
h@41:                         left = mid + 1
h@41:                 except:
h@41:                     try:
h@41:                         return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
h@41:                     except:
h@41:                         return ""
h@41:             else:
h@41:                 right = mid - 1
h@41:             if left > right:
h@41:                 return ""
h@41: 
h@41: # plain module API
h@41: 
h@41: def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
h@41:     p = parser(p=packrat)
h@41:     text = skip(p.skipper, textline, skipWS, skipComments)
h@41:     ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
h@41:     return ast, text
h@41: 
h@41: # parse():
h@41: #   language:       pyPEG language description
h@41: #   lineSource:     a fileinput.FileInput object
h@41: #   skipWS:         Flag if whitespace should be skipped (default: True)
h@41: #   skipComments:   Python function which returns pyPEG for matching comments
h@41: #   packrat:        use memoization
h@41: #   lineCount:      add line number information to AST
h@41: #   
h@41: #   returns:        pyAST
h@41: #
h@41: #   raises:         SyntaxError(reason), if a parsed line is not in language
h@41: #                   SyntaxError(reason), if the language description is illegal
h@41: 
h@41: def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
h@41:     lines, lineNo = [], 0
h@41: 
h@41:     while callable(language):
h@41:         language = language()
h@41: 
h@41:     orig, ld = "", 0
h@41:     for line in lineSource:
h@41:         if lineSource.isfirstline():
h@41:             ld = 1
h@41:         else:
h@41:             ld += 1
h@41:         lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
h@41:         orig += u(line)
h@41: 
h@41:     textlen = len(orig)
h@41: 
h@41:     try:
h@41:         p = parser(p=packrat)
h@41:         p.textlen = len(orig)
h@41:         if lineCount:
h@41:             p.lines = lines
h@41:         else:
h@41:             p.line = None
h@41:         text = skip(p.skipper, orig, skipWS, skipComments)
h@41:         result, text = p.parseLine(text, language, [], skipWS, skipComments)
h@41:         if text:
h@41:             raise SyntaxError()
h@41: 
h@41:     except SyntaxError as msg:
h@41:         parsed = textlen - p.restlen
h@41:         textlen = 0
h@41:         nn, lineNo, file = 0, 0, ""
h@41:         for n, ld, l in lines:
h@41:             if n >= parsed:
h@41:                 break
h@41:             else:
h@41:                 lineNo = l
h@41:                 nn += 1
h@41:                 file = ld
h@41: 
h@41:         lineNo += 1
h@41:         nn -= 1
h@41:         lineCont = orig.splitlines()[nn]
h@41:         raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont)
h@41: 
h@41:     return result