yml2/grammar.py
changeset 72 e52ee17bca47
child 74 c3c5a089072a
equal deleted inserted replaced
71:adb8b1a1d517 72:e52ee17bca47
       
     1 # YML 2.7.0 language definition
       
     2 
       
     3 # written by VB.
       
     4 
       
     5 import re
       
     6 from .pyPEG import keyword, _and, _not
       
     7 
       
     8 # pyPEG:
       
     9 #
       
    10 #   basestring:     terminal symbol (characters)
       
    11 #   keyword:        terminal symbol (keyword)
       
    12 #   matchobj:       terminal symbols (regex, use for scanning symbols)
       
    13 #   function:       named non-terminal symbol, recursive definition
       
    14 #                   if you don't want naming in output, precede name with an underscore
       
    15 #   tuple:          production sequence
       
    16 #   integer:        count in production sequence:
       
    17 #                    0: following element is optional
       
    18 #                   -1: following element can be omitted or repeated endless
       
    19 #                   -2: following element is required and can be repeated endless
       
    20 #   list:           options, choose one of them
       
    21 #   _not:           next element in production sequence is matched only if this would not
       
    22 #   _and:           next element in production sequence is matched only if this would, too
       
    23 
       
    24 newSyntax = True
       
    25 
       
    26 def oldSyntax():
       
    27     global newSyntax
       
    28     newSyntax = False
       
    29 
       
    30 def _if(cond, val):
       
    31     if cond:
       
    32         return val
       
    33     else:
       
    34         return ()
       
    35 
       
    36 def listing(x):     return x, -1, (",", x)
       
    37 r = re.compile
       
    38 
       
    39 comment = [r(r"//.*"), r(r"/\*.*?\*/", re.S)]
       
    40 _symbol = r"(?=\D)\w(\w|:)*"
       
    41 symbol = r(_symbol, re.U)
       
    42 pointer = r(r"\*" + _symbol, re.U)
       
    43 ppointer = r(r"\*\*" + _symbol, re.U)
       
    44 macro = r(r"\%" + _symbol, re.U)
       
    45 reference = r(r"\&" + _symbol, re.U)
       
    46 
       
    47 NameStartChar = r''':|[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]'''
       
    48 NameChar = NameStartChar + r'''|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]'''
       
    49 
       
    50 _xmlSymbol = "(" + NameStartChar + ")(" + NameChar + ")*"
       
    51 xmlSymbol = r(_xmlSymbol)
       
    52 aliasSymbol = r(r"-|(" + _xmlSymbol + r")")
       
    53 
       
    54 literal = [r(r'""".*?"""', re.S), r(r"'''.*?'''", re.S), r(r"""0x[a-f0-9]+|-?\d+\.\d*|-?\.\d+|-?\d+|".*?"|'.*?'""")]
       
    55 filename = [("'", r(r"[^']*"), "'"), ('"', r(r'[^"]*'), '"'), r(r"[^\s;]+")]
       
    56 ws = r(r"\s+", re.U)
       
    57 
       
    58 def pyExp():        return "!", r(r"(!=|\\!|[^!])+"), "!"
       
    59 value = [literal, pyExp]
       
    60 
       
    61 def tagQuote():     return r(r"\].*|\<.*?\>")
       
    62 def lineQuote():    return r(r"\|.*")
       
    63 def quote():        return [r(r"\d*>.*"), (literal, 0, [";", "."])]
       
    64 def parm():         return [([xmlSymbol, pyExp, pointer, macro], "=", [value, pointer, symbol]), value, pointer]
       
    65 def parm_eq():      return [xmlSymbol, pyExp, pointer, macro], "=", [value, pointer, symbol]
       
    66 parm_eq.__name__ = "parm"
       
    67 _func = [symbol, ppointer, pointer, reference], _if(newSyntax, (-1, ("[", listing(parm), "]"))), 0, ("(", listing(parm), ")"), 0, listing(parm), -1, parm_eq
       
    68 def pythonCall():   return keyword("python"), _func, [";", "."]
       
    69 def declParm():     return [pointer, macro, xmlSymbol], 0, ("=", [literal, symbol])
       
    70 def alias():        return keyword("alias"), aliasSymbol
       
    71 def descend():      return r(r"[+@*]" + _symbol, re.U)
       
    72 def base():         return keyword("is"), symbol
       
    73 def shape():        return symbol
       
    74 def decl():         return symbol, 0, base, 0, ("<", listing(shape), ">"), -1, descend, _if(newSyntax, (-1, ("[", 0, listing(declParm), "]"))), 0, ("(", 0, listing(declParm), ")"), 0, alias, 0, content
       
    75 def python():       return [r(r"!!.*?!!", re.S), r(r"!.*")]
       
    76 def operator():     return 0, keyword("define"), keyword("operator"), literal, keyword("as"), r(r".*")
       
    77 def constant():     return 0, keyword("define"), [pointer, symbol], "=", literal, 0, [";", "."]
       
    78 def in_ns():        return keyword("in"), xmlSymbol, [_decl, ("{", -2, _decl, "}")]
       
    79 _decl = keyword("decl"), listing(decl), [";", "."]
       
    80 def textsection():  return r(r'(\|\|(\>*)(.*?)\|\|(\>*))\s*$', re.S | re.M)
       
    81 def textsectionu(): return r(r'(\>\>.*?\>\>)', re.S)
       
    82 def include():      return keyword("include"), 0, reverse, 0, [ktext, kxml], [(kpointer, pointer), filename], 0, [";", "."]
       
    83 def func():         return _func, 0, content
       
    84 def funclist():     return listing(func)
       
    85 _cmd = funclist, 0, [";", "."]
       
    86 _inner = [include, textsection, textsectionu, pythonCall, _cmd, quote, lineQuote, tagQuote, pyExp]
       
    87 _cc = "{", -1, _inner, "}"
       
    88 def content_plain(): return [ (_l, 0, _p, 0, _b, 0, _cc), (_p, 0, _b, 0, _cc), (_b, 0, _cc), _cc ]
       
    89 content_plain.__name__ = "content"
       
    90 def func_plain():   return _func, 0, content_plain
       
    91 func_plain.__name__ = "func"
       
    92 def flist_plain():  return -2, func_plain
       
    93 flist_plain.__name__ = "funclist"
       
    94 def xbase():        return flist_plain
       
    95 def generic():      return flist_plain
       
    96 def subscript():    return flist_plain
       
    97 def parentheses():  return "(", 0, funclist, ")"
       
    98 def fparm():        return flist_plain
       
    99 
       
   100 _l = _if(newSyntax, ("<", listing(generic), ">"))
       
   101 _p = _if(not newSyntax, parentheses), _if(newSyntax, ("(", 0, listing(fparm), ")"))
       
   102 _b = (":", listing(xbase))
       
   103 _c = [_inner, _cc]
       
   104 
       
   105 def content():      return [ (_l, 0, _p, 0, _b, 0, _c), (_p, 0, _b, 0, _c), (_b, 0, _c), _c ]
       
   106 def reverse():      return keyword("reverse")
       
   107 def ktext():        return keyword("text")
       
   108 def kxml():         return keyword("xml")
       
   109 def kpointer():     return keyword("pointer")
       
   110 def ymlCStyle():    return -1, [_decl, in_ns, include, python, operator, constant, tagQuote, lineQuote, quote, _cmd]