yml2/pyPEG.py
changeset 58 a218553807ab
parent 52 b4a9a3122abb
child 65 d659b8c2ed22
equal deleted inserted replaced
57:2f4ad3800a3f 58:a218553807ab
     1 # YPL parser 1.5
     1 # YPL parser 1.6
       
     2 # adapted for Python 3.x
     2 
     3 
     3 # written by VB.
     4 # written by VB.
     4 
     5 
     5 import re
     6 import re
     6 import sys, codecs
     7 import sys, codecs
     7 import exceptions
     8 
     8 
     9 class keyword(str): pass
     9 class keyword(unicode): pass
    10 class code(str): pass
    10 class code(unicode): pass
       
    11 class ignore(object):
    11 class ignore(object):
    12     def __init__(self, regex_text, *args):
    12     def __init__(self, regex_text, *args):
    13         self.regex = re.compile(regex_text, *args)
    13         self.regex = re.compile(regex_text, *args)
    14 
    14 
    15 class _and(object):
    15 class _and(object):
    16     def __init__(self, something):
    16     def __init__(self, something):
    17         self.obj = something
    17         self.obj = something
    18 
    18 
    19 class _not(_and): pass
    19 class _not(_and): pass
    20 
    20 
    21 class Name(unicode):
    21 class Name(str):
    22     def __init__(self, *args):
    22     def __init__(self, *args):
    23         self.line = 0
    23         self.line = 0
    24         self.file = u""
    24         self.file = ""
    25 
    25 
    26 class Symbol(list):
    26 class Symbol(list):
    27     def __init__(self, name, what):
    27     def __init__(self, name, what):
    28         self.__name__ = name
    28         self.__name__ = name
    29         self.append(name)
    29         self.append(name)
    30         self.what = what
    30         self.what = what
    31         self.append(what)
    31         self.append(what)
    32     def __call__(self):
    32     def __call__(self):
    33         return self.what
    33         return self.what
    34     def __unicode__(self):
    34     def __str__(self):
    35         return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')'
    35         return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
    36     def __repr__(self):
    36     def __repr__(self):
    37         return unicode(self)
    37         return str(self)
    38 
    38 
    39 word_regex = re.compile(ur"\w+")
    39 word_regex = re.compile(r"\w+")
    40 rest_regex = re.compile(ur".*")
    40 rest_regex = re.compile(r".*")
    41 
    41 
    42 print_trace = False
    42 print_trace = False
    43 
    43 
    44 def u(text):
    44 def u(text):
    45     if isinstance(text, exceptions.BaseException):
    45     if isinstance(text, BaseException):
    46         text = text.args[0]
    46         text = text.args[0]
    47     if type(text) is unicode:
    47     if type(text) is str:
    48         return text
    48         return text
    49     if isinstance(text, str):
    49     if isinstance(text, bytes):
    50         if sys.stdin.encoding:
    50         if sys.stdin.encoding:
    51             return codecs.decode(text, sys.stdin.encoding)
    51             return codecs.decode(text, sys.stdin.encoding)
    52         else:
    52         else:
    53             return codecs.decode(text, "utf-8")
    53             return codecs.decode(text, "utf-8")
    54     return unicode(text)
    54     return str(text)
    55 
    55 
    56 def skip(skipper, text, skipWS, skipComments):
    56 def skip(skipper, text, skipWS, skipComments):
    57     if skipWS:
    57     if skipWS:
    58         t = text.lstrip()
    58         t = text.lstrip()
    59     else:
    59     else:
   102         def R(result, text):
   102         def R(result, text):
   103             if __debug__:
   103             if __debug__:
   104                 if print_trace:
   104                 if print_trace:
   105                     try:
   105                     try:
   106                         if _pattern.__name__ != "comment":
   106                         if _pattern.__name__ != "comment":
   107                             sys.stderr.write(u"match: " + _pattern.__name__ + u"\n")
   107                             sys.stderr.write("match: " + _pattern.__name__ + "\n")
   108                     except: pass
   108                     except: pass
   109 
   109 
   110             if self.restlen == -1:
   110             if self.restlen == -1:
   111                 self.restlen = len(text)
   111                 self.restlen = len(text)
   112             else:
   112             else:
   144         if callable(pattern):
   144         if callable(pattern):
   145             if __debug__:
   145             if __debug__:
   146                 if print_trace:
   146                 if print_trace:
   147                     try:
   147                     try:
   148                         if pattern.__name__ != "comment":
   148                         if pattern.__name__ != "comment":
   149                             sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n")
   149                             sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
   150                     except: pass
   150                     except: pass
   151 
   151 
   152             if pattern.__name__[0] != "_":
   152             if pattern.__name__[0] != "_":
   153                 name = Name(pattern.__name__)
   153                 name = Name(pattern.__name__)
   154 
   154 
   158 
   158 
   159         text = skip(self.skipper, textline, skipWS, skipComments)
   159         text = skip(self.skipper, textline, skipWS, skipComments)
   160 
   160 
   161         pattern_type = type(pattern)
   161         pattern_type = type(pattern)
   162 
   162 
   163         if pattern_type is str or pattern_type is unicode:
   163         if pattern_type is str:
   164             if text[:len(pattern)] == pattern:
   164             if text[:len(pattern)] == pattern:
   165                 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
   165                 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
   166                 return R(None, text)
   166                 return R(None, text)
   167             else:
   167             else:
   168                 syntaxError()
   168                 syntaxError()
   249                 return R(result, text)
   249                 return R(result, text)
   250             else:
   250             else:
   251                 syntaxError()
   251                 syntaxError()
   252 
   252 
   253         else:
   253         else:
   254             raise SyntaxError(u"illegal type in grammar: " + u(pattern_type))
   254             raise SyntaxError("illegal type in grammar: " + u(pattern_type))
   255 
   255 
   256     def lineNo(self):
   256     def lineNo(self):
   257         if not(self.lines): return u""
   257         if not(self.lines): return ""
   258         if self.restlen == -1: return u""
   258         if self.restlen == -1: return ""
   259         parsed = self.textlen - self.restlen
   259         parsed = self.textlen - self.restlen
   260 
   260 
   261         left, right = 0, len(self.lines)
   261         left, right = 0, len(self.lines)
   262 
   262 
   263         while True:
   263         while True:
   264             mid = int((right + left) / 2)
   264             mid = int((right + left) / 2)
   265             if self.lines[mid][0] <= parsed:
   265             if self.lines[mid][0] <= parsed:
   266                 try:
   266                 try:
   267                     if self.lines[mid + 1][0] >= parsed:
   267                     if self.lines[mid + 1][0] >= parsed:
   268                         try:
   268                         try:
   269                             return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2])
   269                             return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
   270                         except:
   270                         except:
   271                             return u""
   271                             return ""
   272                     else:
   272                     else:
   273                         left = mid + 1
   273                         left = mid + 1
   274                 except:
   274                 except:
   275                     try:
   275                     try:
   276                         return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2])
   276                         return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
   277                     except:
   277                     except:
   278                         return u""
   278                         return ""
   279             else:
   279             else:
   280                 right = mid - 1
   280                 right = mid - 1
   281             if left > right:
   281             if left > right:
   282                 return u""
   282                 return ""
   283 
   283 
   284 # plain module API
   284 # plain module API
   285 
   285 
   286 def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
   286 def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
   287     p = parser(p=packrat)
   287     p = parser(p=packrat)
   306     lines, lineNo = [], 0
   306     lines, lineNo = [], 0
   307 
   307 
   308     while callable(language):
   308     while callable(language):
   309         language = language()
   309         language = language()
   310 
   310 
   311     orig, ld = u"", 0
   311     orig, ld = "", 0
   312     for line in lineSource:
   312     for line in lineSource:
   313         if lineSource.isfirstline():
   313         if lineSource.isfirstline():
   314             ld = 1
   314             ld = 1
   315         else:
   315         else:
   316             ld += 1
   316             ld += 1
   329         text = skip(p.skipper, orig, skipWS, skipComments)
   329         text = skip(p.skipper, orig, skipWS, skipComments)
   330         result, text = p.parseLine(text, language, [], skipWS, skipComments)
   330         result, text = p.parseLine(text, language, [], skipWS, skipComments)
   331         if text:
   331         if text:
   332             raise SyntaxError()
   332             raise SyntaxError()
   333 
   333 
   334     except SyntaxError, msg:
   334     except SyntaxError as msg:
   335         parsed = textlen - p.restlen
   335         parsed = textlen - p.restlen
   336         textlen = 0
   336         textlen = 0
   337         nn, lineNo, file = 0, 0, u""
   337         nn, lineNo, file = 0, 0, ""
   338         for n, ld, l in lines:
   338         for n, ld, l in lines:
   339             if n >= parsed:
   339             if n >= parsed:
   340                 break
   340                 break
   341             else:
   341             else:
   342                 lineNo = l
   342                 lineNo = l
   344                 file = ld
   344                 file = ld
   345 
   345 
   346         lineNo += 1
   346         lineNo += 1
   347         nn -= 1
   347         nn -= 1
   348         lineCont = orig.splitlines()[nn]
   348         lineCont = orig.splitlines()[nn]
   349         raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont)
   349         raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont)
   350 
   350 
   351     return result
   351     return result