yml2proc
author Volker Birk <vb@pep-project.org>
Thu, 08 Apr 2021 14:37:11 +0200
changeset 72 e52ee17bca47
parent 70 58043c7bcc51
permissions -rwxr-xr-x
grammar in own file
#!/usr/bin/env python3
# vim: set fileencoding=utf-8 :

"""\
YML/YSLT 2 processor version 6.2
Copyleft (c), 2009-2020 Volker Birk  http://fdik.org/yml/

"""

import sys, os, codecs, locale
import fileinput, unicodedata
from optparse import OptionParser

try:
    from lxml import etree
except:
    sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n")
    sys.exit(1)

from yml2.grammar import ymlCStyle, comment, oldSyntax
from yml2.pyPEG import parse, u
import yml2.backend as backend

YML_DEFAULT_PATH = [os.path.dirname(backend.__file__)]

def printInfo(option, opt_str, value, parser):
    sys.stdout.write(__doc__)
    sys.exit(0)

class YMLAssert(Exception): pass

def w(msg):
    if isinstance(msg, BaseException):
        try:
            msg = str(msg) + "\n"
        except:
            msg = u(msg) + "\n"
    sys.stderr.write(msg)

def main():
    optParser = OptionParser()
    optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
            help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
    optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
            help="emit line numbers into the resulting XML for debugging purposes", default=False)
    optParser.add_option("--debug", action="store_true", dest="trace",
            help="switch on tracing to stderr", default=False)
    optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
            help="call X/YSLT script with dictionary PARAMS as parameters")
    optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
            help="execute XPath expression XPATH and print result")
    optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
            help="encoding of input files (default to locale)")
    optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
            help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
    optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
            help="does nothing (only there for compatibility reasons)", default=False)
    optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
            help="use an empty input document", default=False)
    optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
            help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
    optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
            help="place output in file FILE")
    optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
            help="parse only, then output pyAST as text to stdout", default=False)
    optParser.add_option("-P", "--pretty", action="store_true", default=False,
            help="pretty print output adding whitespace")
    optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
            help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
    optParser.add_option("-x", "--xml", action="store_true", default=False,
            help="input document is XML already")
    optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
            help="execute XSLT script XSLTSCRIPT")
    optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
            help="execute YSLT script YSLTSCRIPT")
    optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
            help="convert XML to normalized YML code")
    optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
    (options, args) = optParser.parse_args()

    if options.old_syntax:
        oldSyntax()

    if options.trace:
        backend.enable_tracing = True

    if options.emitlinenumbers:
        backend.emitlinenumbers = True

    if options.includePathText:
        backend.includePath = options.includePathText.split(':')

    backend.encoding = options.encoding

    dirs = os.environ.get('YML_PATH', '.').split(':') + YML_DEFAULT_PATH
    backend.includePath.extend(dirs)

    if options.xml2yml:
        for directory in backend.includePath:
            name = os.path.join(directory, "xml2yml.ysl2")
            if os.path.isfile(name):
                options.yslt = name
                options.xml = True
                break
        else:
            sys.stderr.write("Error: Stylesheet xml2yml.ysl2 required for --xml2yml not found\n")
            sys.stderr.write("Please check your YML_PATH\n")
            sys.exit(1)

    if  (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
        sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
        sys.exit(1)

    try:
        ymlC = ymlCStyle()

        rtext = ""

        if not options.emptyinput:
            files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))

            if options.xml:
                rtext = ""
                for line in files:
                    rtext += line
            else:
                result = parse(ymlC, files, True, comment)
                if options.parseonly:
                    print(result)
                    sys.exit(0)
                else:
                    rtext = backend.finish(result)

        if not rtext:
            rtext = "<empty/>"

        def ymldebug(context, text):
            if options.trace:
                sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
            return ""

        def ymlassert(context, value, msg):
            if options.trace:
                if not value:
                    raise YMLAssert(msg)
            return ""

        ymlns = etree.FunctionNamespace("http://fdik.org/yml")
        ymlns.prefix = "yml"
        ymlns['debug'] = ymldebug
        ymlns['assert'] = ymlassert

        if options.xpath:
            tree = etree.fromstring(rtext)
            ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
            rtext = ""
            try:
                for rtree in ltree:
                    rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
            except:
                rtext = ltree

        elif options.yslt or options.xslt:
            params = {}

            if options.yslt:
                backend.clearAll()
                yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
                yresult = parse(ymlC, yscript, True, comment)
                ytext = backend.finish(yresult)
            else:
                yscript = fileinput.input(options.xslt, mode="rU")
                ytext = ""
                for line in yscript:
                    ytext += line

            doc = etree.fromstring(rtext)

            xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
            transform = etree.XSLT(xsltree)
            
            if options.params:
                params = eval(options.params)
                for key, value in params.items():
                    if type(value) is not str:
                        params[key] = u(value)
            if options.stringparams:
                for key, value in eval(options.stringparams).items():
                    params[key] = "'" + u(value) + "'"

            rresult = transform(doc, **params)
            # lxml is somewhat buggy
            try:
                rtext = u(rresult)
            except:
                rtext = etree.tostring(rresult, encoding=unicode)
                if not rtext:
                    rtext = codecs.decode(str(rresult), "utf-8")

        if options.normalization != "none":
            rtext = unicodedata.normalize(options.normalization, rtext)

        if options.pretty:
            plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
        else:
            if isinstance(rtext, str):
                plaintext = codecs.encode(rtext, options.encoding)
            else:
                plaintext = rtext

        try:
            if plaintext[-1] == "\n":
                plaintext = plaintext[:-1]
        except: pass

        if options.outputFile and options.outputFile != "-":
            outfile = open(options.outputFile, "wb")
            outfile.write(plaintext)
            outfile.close()
        else:
            sys.stdout.buffer.write(plaintext)
            if not options.pretty:
                print()

    except KeyboardInterrupt:
        w("\n")
        sys.exit(1)
    except YMLAssert as msg:
        w("YML Assertion failed: " + u(msg) + "\n")
        sys.exit(2)
    except KeyError as msg:
        w("not found: " + u(msg) + "\n")
        sys.exit(4)
    except LookupError as msg:
        w("not found: " + u(msg) + "\n")
        sys.exit(4)
    except etree.XMLSyntaxError as e:
        log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
        for entry in log:
            w("XML error: " + u(entry.message) + "\n")
        sys.exit(5)
    except Exception as msg:
        w(msg)
        sys.exit(5)


if __name__ == "__main__":
    main()