yml2/yml2proc.py
author Claudio Luck <claudio.luck@pep.foundation>
Wed, 18 Mar 2020 19:20:01 +0100
changeset 58 a218553807ab
parent 57 2f4ad3800a3f
child 60 b19d0a86651b
permissions -rwxr-xr-x
pypackage: Merge 2.6.2
#!/usr/bin/env python3
# vim: set fileencoding=utf-8 :

"""\
YML/YSLT 2 processor version 6.2
Copyleft (c), 2009-2019 Volker Birk  http://fdik.org/yml/

"""

import sys, os, codecs, locale
import fileinput, unicodedata
from optparse import OptionParser

try:
    from lxml import etree
except:
    sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n")
    sys.exit(1)

from yml2 import ymlCStyle, comment, oldSyntax
from pyPEG import parse, u
import backend

def printInfo(option, opt_str, value, parser):
    sys.stdout.write(__doc__)
    sys.exit(0)

class YMLAssert(Exception): pass

def w(msg):
    if isinstance(msg, BaseException):
        try:
            msg = str(msg) + "\n"
        except:
            msg = u(msg) + "\n"
    sys.stderr.write(msg)

def main():
    optParser = OptionParser()
    optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
            help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
    optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
            help="emit line numbers into the resulting XML for debugging purposes", default=False)
    optParser.add_option("--debug", action="store_true", dest="trace",
            help="switch on tracing to stderr", default=False)
    optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
            help="call X/YSLT script with dictionary PARAMS as parameters")
    optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
            help="execute XPath expression XPATH and print result")
    optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
            help="encoding of input files (default to locale)")
    optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
            help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
    optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
            help="does nothing (only there for compatibility reasons)", default=False)
    optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
            help="use an empty input document", default=False)
    optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
            help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
    optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
            help="place output in file FILE")
    optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
            help="parse only, then output pyAST as text to stdout", default=False)
    optParser.add_option("-P", "--pretty", action="store_true", default=False,
            help="pretty print output adding whitespace")
    optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
            help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
    optParser.add_option("-x", "--xml", action="store_true", default=False,
            help="input document is XML already")
    optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
            help="execute XSLT script XSLTSCRIPT")
    optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
            help="execute YSLT script YSLTSCRIPT")
    optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
            help="convert XML to normalized YML code")
    optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
    (options, args) = optParser.parse_args()
    
    if options.old_syntax:
        oldSyntax()
    
    if options.trace:
        backend.enable_tracing = True
    
    if options.emitlinenumbers:
        backend.emitlinenumbers = True
    
    if options.includePathText:
        backend.includePath = options.includePathText.split(':')
    
    backend.encoding = options.encoding
    
    dirs = os.environ.get('YML_PATH', '.').split(':')
    backend.includePath.extend(dirs)
    
    if options.xml2yml:
        for directory in backend.includePath:
            try:
                name = directory + "/xml2yml.ysl2"
                f = open(name, "r")
                f.close()
                break
            except:
                pass
    
        options.yslt = name
        options.xml = True
    
    if  (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
        sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
        sys.exit(1)
    
    try:
        ymlC = ymlCStyle()
    
        rtext = ""
    
        if not options.emptyinput:
            files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
    
            if options.xml:
                rtext = ""
                for line in files:
                    rtext += line
            else:
                result = parse(ymlC, files, True, comment)
                if options.parseonly:
                    print(result)
                    sys.exit(0)
                else:
                    rtext = backend.finish(result)
    
        if not rtext:
            rtext = "<empty/>"
    
        def ymldebug(context, text):
            if options.trace:
                sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
            return ""
    
        def ymlassert(context, value, msg):
            if options.trace:
                if not value:
                    raise YMLAssert(msg)
            return ""
    
        ymlns = etree.FunctionNamespace("http://fdik.org/yml")
        ymlns.prefix = "yml"
        ymlns['debug'] = ymldebug
        ymlns['assert'] = ymlassert
    
        if options.xpath:
            tree = etree.fromstring(rtext)
            ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
            rtext = ""
            try:
                for rtree in ltree:
                    rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
            except:
                rtext = ltree
    
        elif options.yslt or options.xslt:
            params = {}
    
            if options.yslt:
                backend.clearAll()
                yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
                yresult = parse(ymlC, yscript, True, comment)
                ytext = backend.finish(yresult)
            else:
                yscript = fileinput.input(options.xslt, mode="rU")
                ytext = ""
                for line in yscript:
                    ytext += line
    
            doc = etree.fromstring(rtext)
    
            xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
            transform = etree.XSLT(xsltree)
            
            if options.params:
                params = eval(options.params)
                for key, value in params.iteritems():
                    if type(value) is not str:
                        params[key] = u(value)
            if options.stringparams:
                for key, value in eval(options.stringparams).iteritems():
                    params[key] = "'" + u(value) + "'"
    
            rresult = transform(doc, **params)
            # lxml is somewhat buggy
            try:
                rtext = u(rresult)
            except:
                rtext = etree.tostring(rresult, encoding=unicode)
                if not rtext:
                    rtext = codecs.decode(str(rresult), "utf-8")
    
        if options.normalization != "none":
            rtext = unicodedata.normalize(options.normalization, rtext)
    
        if options.pretty:
            plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
        else:
            if isinstance(rtext, str):
                plaintext = codecs.encode(rtext, options.encoding)
            else:
                plaintext = rtext
    
        try:
            if plaintext[-1] == "\n":
                plaintext = plaintext[:-1]
        except: pass
    
        if options.outputFile and options.outputFile != "-":
            outfile = open(options.outputFile, "wb")
            outfile.write(plaintext)
            outfile.close()
        else:
            sys.stdout.buffer.write(plaintext)
            if not options.pretty:
                print()
    
    except KeyboardInterrupt:
        w("\n")
        sys.exit(1)
    except YMLAssert as msg:
        w("YML Assertion failed: " + u(msg) + "\n")
        sys.exit(2)
    except KeyError as msg:
        w("not found: " + u(msg) + "\n")
        sys.exit(4)
    except LookupError as msg:
        w("not found: " + u(msg) + "\n")
        sys.exit(4)
    except etree.XMLSyntaxError as e:
        log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
        for entry in log:
            w("XML error: " + u(entry.message) + "\n")
        sys.exit(5)
    except Exception as msg:
        w(msg)
        sys.exit(5)


if __name__ == "__main__":
    sys.exit(main())