yml2proc
changeset 0 76005e62091d
child 4 e7b49a65cf3a
equal deleted inserted replaced
-1:000000000000 0:76005e62091d
       
     1 #!/usr/bin/env python
       
     2 # vim: set fileencoding=utf-8 :
       
     3 
       
     4 """\
       
     5 YML/YSLT 2 processor version 5.4
       
     6 Copyleft (c), 2009-2011 Volker Birk  http://fdik.org/yml/
       
     7 
       
     8 """
       
     9 
       
    10 import sys, os, codecs, locale
       
    11 import fileinput, unicodedata
       
    12 from optparse import OptionParser
       
    13 
       
    14 try:
       
    15     from lxml import etree
       
    16 except:
       
    17     sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n")
       
    18     sys.exit(1)
       
    19 
       
    20 from yml2 import ymlCStyle, comment, oldSyntax
       
    21 from pyPEG import parse, u
       
    22 import backend
       
    23 
       
    24 def printInfo(option, opt_str, value, parser):
       
    25     sys.stdout.write(__doc__)
       
    26     sys.exit(0)
       
    27 
       
    28 class YMLAssert(Exception): pass
       
    29 
       
    30 def w(msg):
       
    31     if isinstance(msg, BaseException):
       
    32         try:
       
    33             msg = str(msg) + "\n"
       
    34         except:
       
    35             msg = u(msg) + u"\n"
       
    36     if type(msg) is unicode:
       
    37         msg = codecs.encode(msg, sys.stderr.encoding)
       
    38     sys.stderr.write(msg)
       
    39 
       
    40 optParser = OptionParser()
       
    41 optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
       
    42         help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
       
    43 optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
       
    44         help="emit line numbers into the resulting XML for debugging purposes", default=False)
       
    45 optParser.add_option("--debug", action="store_true", dest="trace",
       
    46         help="switch on tracing to stderr", default=False)
       
    47 optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
       
    48         help="call X/YSLT script with dictionary PARAMS as parameters")
       
    49 optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
       
    50         help="execute XPath expression XPATH and print result")
       
    51 optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
       
    52         help="encoding of input files (default to locale)")
       
    53 optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
       
    54         help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
       
    55 optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
       
    56         help="does nothing (only there for compatibility reasons)", default=False)
       
    57 optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
       
    58         help="use an empty input document", default=False)
       
    59 optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
       
    60         help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
       
    61 optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
       
    62         help="place output in file FILE")
       
    63 optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
       
    64         help="parse only, then output pyAST as text to stdout", default=False)
       
    65 optParser.add_option("-P", "--pretty", action="store_true", default=False,
       
    66         help="pretty print output adding whitespace")
       
    67 optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
       
    68         help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
       
    69 optParser.add_option("-x", "--xml", action="store_true", default=False,
       
    70         help="input document is XML already")
       
    71 optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
       
    72         help="execute XSLT script XSLTSCRIPT")
       
    73 optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
       
    74         help="execute YSLT script YSLTSCRIPT")
       
    75 optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
       
    76         help="convert XML to normalized YML code")
       
    77 optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
       
    78 (options, args) = optParser.parse_args()
       
    79 
       
    80 if options.old_syntax:
       
    81     oldSyntax()
       
    82 
       
    83 if options.trace:
       
    84     backend.enable_tracing = True
       
    85 
       
    86 if options.emitlinenumbers:
       
    87     backend.emitlinenumbers = True
       
    88 
       
    89 if options.includePathText:
       
    90     backend.includePath = options.includePathText.split(':')
       
    91 
       
    92 backend.encoding = options.encoding
       
    93 
       
    94 dirs = os.environ.get('YML_PATH', '.').split(':')
       
    95 backend.includePath.extend(dirs)
       
    96 
       
    97 if options.xml2yml:
       
    98     for directory in backend.includePath:
       
    99         try:
       
   100             name = directory + "/xml2yml.ysl2"
       
   101             f = open(name, "r")
       
   102             f.close()
       
   103             break
       
   104         except:
       
   105             pass
       
   106 
       
   107     options.yslt = name
       
   108     options.xml = True
       
   109 
       
   110 if  (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
       
   111     sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
       
   112     sys.exit(1)
       
   113 
       
   114 try:
       
   115     ymlC = ymlCStyle()
       
   116 
       
   117     rtext = u""
       
   118 
       
   119     if not options.emptyinput:
       
   120         files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
       
   121         # fileinput suffer from two nasty bugs :
       
   122         # - ignoring open hook with stdin
       
   123         # - iterator requires ctrl-D to be pressed twice on siome platform
       
   124         if args in [[],['-']] :
       
   125             files._files=[]
       
   126             files._buffer=[unicode(line, options.encoding) for line in sys.stdin.readlines()]
       
   127 
       
   128         if options.xml:
       
   129             rtext = ""
       
   130             for line in files:
       
   131                 rtext += line
       
   132         else:
       
   133             result = parse(ymlC, files, True, comment)
       
   134             if options.parseonly:
       
   135                 print result
       
   136                 sys.exit(0)
       
   137             else:
       
   138                 rtext = backend.finish(result)
       
   139 
       
   140     if not rtext:
       
   141         rtext = u"<empty/>"
       
   142 
       
   143     def ymldebug(context, text):
       
   144         if options.trace:
       
   145             sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
       
   146         return ""
       
   147 
       
   148     def ymlassert(context, value, msg):
       
   149         if options.trace:
       
   150             if not value:
       
   151                 raise YMLAssert(msg)
       
   152         return ""
       
   153 
       
   154     ymlns = etree.FunctionNamespace("http://fdik.org/yml")
       
   155     ymlns.prefix = "yml"
       
   156     ymlns['debug'] = ymldebug
       
   157     ymlns['assert'] = ymlassert
       
   158 
       
   159     if options.xpath:
       
   160         tree = etree.fromstring(rtext)
       
   161         ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
       
   162         rtext = u""
       
   163         try:
       
   164             for rtree in ltree:
       
   165                 rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
       
   166         except:
       
   167             rtext = ltree
       
   168 
       
   169     elif options.yslt or options.xslt:
       
   170         params = {}
       
   171 
       
   172         if options.yslt:
       
   173             backend.clearAll()
       
   174             yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
       
   175             yresult = parse(ymlC, yscript, True, comment)
       
   176             ytext = backend.finish(yresult)
       
   177         else:
       
   178             yscript = fileinput.input(options.xslt, mode="rU")
       
   179             ytext = ""
       
   180             for line in yscript:
       
   181                 ytext += line
       
   182 
       
   183         doc = etree.fromstring(rtext)
       
   184 
       
   185         xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
       
   186         transform = etree.XSLT(xsltree)
       
   187         
       
   188         if options.params:
       
   189             params = eval(options.params)
       
   190             for key, value in params.iteritems():
       
   191                 if type(value) != unicode:
       
   192                     params[key] = u(value)
       
   193         if options.stringparams:
       
   194             for key, value in eval(options.stringparams).iteritems():
       
   195                 params[key] = u"'" + u(value) + u"'"
       
   196 
       
   197         rresult = transform(doc, **params)
       
   198         # lxml is somewhat buggy
       
   199         try:
       
   200             rtext = u(rresult)
       
   201         except:
       
   202             rtext = etree.tostring(rresult, encoding=unicode)
       
   203             if not rtext:
       
   204                 rtext = codecs.decode(str(rresult), "utf-8")
       
   205 
       
   206     if options.normalization != "none":
       
   207         rtext = unicodedata.normalize(options.normalization, rtext)
       
   208 
       
   209     if options.pretty:
       
   210         plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
       
   211     else:
       
   212         if isinstance(rtext, unicode):
       
   213             plaintext = codecs.encode(rtext, options.encoding)
       
   214         else:
       
   215             plaintext = str(rtext)
       
   216 
       
   217     try:
       
   218         if plaintext[-1] == "\n":
       
   219             plaintext = plaintext[:-1]
       
   220     except: pass
       
   221 
       
   222     if options.outputFile and options.outputFile != "-":
       
   223         outfile = open(options.outputFile, "w")
       
   224         outfile.write(plaintext)
       
   225         outfile.close()
       
   226     else:
       
   227         print plaintext
       
   228 
       
   229 except KeyboardInterrupt:
       
   230     w("\n")
       
   231     sys.exit(1)
       
   232 except YMLAssert, msg:
       
   233     w(u"YML Assertion failed: " + u(msg) + u"\n")
       
   234     sys.exit(2)
       
   235 except KeyError, msg:
       
   236     w(u"not found: " + u(msg) + u"\n")
       
   237     sys.exit(4)
       
   238 except LookupError, msg:
       
   239     w(u"not found: " + u(msg) + u"\n")
       
   240     sys.exit(4)
       
   241 except etree.XMLSyntaxError, e:
       
   242     log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
       
   243     for entry in log:
       
   244         w(u"XML error: " + u(entry.message) + u"\n")
       
   245     sys.exit(5)
       
   246 except Exception, msg:
       
   247     w(msg)
       
   248     sys.exit(5)