yml2proc
author Volker Birk <vb@pep.foundation>
Tue, 25 Apr 2017 17:44:53 +0200
changeset 8 b49a28a57f67
parent 7 f81a4471bc28
child 11 7026ccc7a60e
permissions -rwxr-xr-x
typo
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""\
YML/YSLT 2 processor version 5.5
Copyleft (c), 2009-2011 Volker Birk  http://fdik.org/yml/

"""

import sys, os, codecs, locale
import fileinput, unicodedata
from optparse import OptionParser

try:
    from lxml import etree
except:
    sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n")
    sys.exit(1)

from yml2 import ymlCStyle, comment, oldSyntax
from pyPEG import parse, u
import backend

def printInfo(option, opt_str, value, parser):
    sys.stdout.write(__doc__)
    sys.exit(0)

class YMLAssert(Exception): pass

def w(msg):
    if isinstance(msg, BaseException):
        try:
            msg = str(msg) + "\n"
        except:
            msg = u(msg) + u"\n"
    if type(msg) is unicode:
        msg = codecs.encode(msg, sys.stderr.encoding)
    sys.stderr.write(msg)

optParser = OptionParser()
optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
        help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
        help="emit line numbers into the resulting XML for debugging purposes", default=False)
optParser.add_option("--debug", action="store_true", dest="trace",
        help="switch on tracing to stderr", default=False)
optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
        help="call X/YSLT script with dictionary PARAMS as parameters")
optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
        help="execute XPath expression XPATH and print result")
optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
        help="encoding of input files (default to locale)")
optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
        help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
        help="does nothing (only there for compatibility reasons)", default=False)
optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
        help="use an empty input document", default=False)
optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
        help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
        help="place output in file FILE")
optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
        help="parse only, then output pyAST as text to stdout", default=False)
optParser.add_option("-P", "--pretty", action="store_true", default=False,
        help="pretty print output adding whitespace")
optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
        help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
optParser.add_option("-x", "--xml", action="store_true", default=False,
        help="input document is XML already")
optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
        help="execute XSLT script XSLTSCRIPT")
optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
        help="execute YSLT script YSLTSCRIPT")
optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
        help="convert XML to normalized YML code")
optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
(options, args) = optParser.parse_args()

if options.old_syntax:
    oldSyntax()

if options.trace:
    backend.enable_tracing = True

if options.emitlinenumbers:
    backend.emitlinenumbers = True

if options.includePathText:
    backend.includePath = options.includePathText.split(':')

backend.encoding = options.encoding

dirs = os.environ.get('YML_PATH', '.').split(':')
backend.includePath.extend(dirs)

if options.xml2yml:
    for directory in backend.includePath:
        try:
            name = directory + "/xml2yml.ysl2"
            f = open(name, "r")
            f.close()
            break
        except:
            pass

    options.yslt = name
    options.xml = True

if  (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
    sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
    sys.exit(1)

try:
    ymlC = ymlCStyle()

    rtext = u""

    if not options.emptyinput:
        files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
        # fileinput suffer from two nasty bugs :
        # - ignoring open hook with stdin
        # - iterator requires ctrl-D to be pressed twice on siome platform
        if args in [[],['-']] :
            files._files=[]
            files._buffer=[unicode(line, options.encoding) for line in sys.stdin.readlines()]

        if options.xml:
            rtext = ""
            for line in files:
                rtext += line
        else:
            result = parse(ymlC, files, True, comment)
            if options.parseonly:
                print(result)
                sys.exit(0)
            else:
                rtext = backend.finish(result)

    if not rtext:
        rtext = u"<empty/>"

    def ymldebug(context, text):
        if options.trace:
            sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
        return ""

    def ymlassert(context, value, msg):
        if options.trace:
            if not value:
                raise YMLAssert(msg)
        return ""

    ymlns = etree.FunctionNamespace("http://fdik.org/yml")
    ymlns.prefix = "yml"
    ymlns['debug'] = ymldebug
    ymlns['assert'] = ymlassert

    if options.xpath:
        tree = etree.fromstring(rtext)
        ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
        rtext = u""
        try:
            for rtree in ltree:
                rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
        except:
            rtext = ltree

    elif options.yslt or options.xslt:
        params = {}

        if options.yslt:
            backend.clearAll()
            yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
            yresult = parse(ymlC, yscript, True, comment)
            ytext = backend.finish(yresult)
        else:
            yscript = fileinput.input(options.xslt, mode="rU")
            ytext = ""
            for line in yscript:
                ytext += line

        doc = etree.fromstring(rtext)

        xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
        transform = etree.XSLT(xsltree)
        
        if options.params:
            params = eval(options.params)
            for key, value in params.iteritems():
                if type(value) != unicode:
                    params[key] = u(value)
        if options.stringparams:
            for key, value in eval(options.stringparams).iteritems():
                params[key] = u"'" + u(value) + u"'"

        rresult = transform(doc, **params)
        # lxml is somewhat buggy
        try:
            rtext = u(rresult)
        except:
            rtext = etree.tostring(rresult, encoding=unicode)
            if not rtext:
                rtext = codecs.decode(str(rresult), "utf-8")

    if options.normalization != "none":
        rtext = unicodedata.normalize(options.normalization, rtext)

    if options.pretty:
        plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
    else:
        if isinstance(rtext, unicode):
            plaintext = codecs.encode(rtext, options.encoding)
        else:
            plaintext = str(rtext)

    try:
        if plaintext[-1] == "\n":
            plaintext = plaintext[:-1]
    except: pass

    if options.outputFile and options.outputFile != "-":
        outfile = open(options.outputFile, "w")
        outfile.write(plaintext)
        outfile.close()
    else:
        print(plaintext)

except KeyboardInterrupt:
    w("\n")
    sys.exit(1)
except YMLAssert as msg:
    w(u"YML Assertion failed: " + u(msg) + u"\n")
    sys.exit(2)
except KeyError as msg:
    w(u"not found: " + u(msg) + u"\n")
    sys.exit(4)
except LookupError as msg:
    w(u"not found: " + u(msg) + u"\n")
    sys.exit(4)
except etree.XMLSyntaxError as e:
    log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
    for entry in log:
        w(u"XML error: " + u(entry.message) + u"\n")
    sys.exit(5)
except Exception as msg:
    w(msg)
    sys.exit(5)