yml2/yml2proc
changeset 54 cefcaac752c9
parent 53 b94d4c5b9496
equal deleted inserted replaced
53:b94d4c5b9496 54:cefcaac752c9
    37         msg = codecs.encode(msg, sys.stderr.encoding)
    37         msg = codecs.encode(msg, sys.stderr.encoding)
    38     sys.stderr.write(msg)
    38     sys.stderr.write(msg)
    39 
    39 
    40 
    40 
    41 def main():
    41 def main():
    42     pass
    42     optParser = OptionParser()
    43 
    43     optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
    44 optParser = OptionParser()
    44             help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
    45 optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
    45     optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
    46         help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
    46             help="emit line numbers into the resulting XML for debugging purposes", default=False)
    47 optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
    47     optParser.add_option("--debug", action="store_true", dest="trace",
    48         help="emit line numbers into the resulting XML for debugging purposes", default=False)
    48             help="switch on tracing to stderr", default=False)
    49 optParser.add_option("--debug", action="store_true", dest="trace",
    49     optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
    50         help="switch on tracing to stderr", default=False)
    50             help="call X/YSLT script with dictionary PARAMS as parameters")
    51 optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
    51     optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
    52         help="call X/YSLT script with dictionary PARAMS as parameters")
    52             help="execute XPath expression XPATH and print result")
    53 optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
    53     optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
    54         help="execute XPath expression XPATH and print result")
    54             help="encoding of input files (default to locale)")
    55 optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
    55     optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
    56         help="encoding of input files (default to locale)")
    56             help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
    57 optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
    57     optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
    58         help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
    58             help="does nothing (only there for compatibility reasons)", default=False)
    59 optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
    59     optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
    60         help="does nothing (only there for compatibility reasons)", default=False)
    60             help="use an empty input document", default=False)
    61 optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
    61     optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
    62         help="use an empty input document", default=False)
    62             help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
    63 optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
    63     optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
    64         help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
    64             help="place output in file FILE")
    65 optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
    65     optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
    66         help="place output in file FILE")
    66             help="parse only, then output pyAST as text to stdout", default=False)
    67 optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
    67     optParser.add_option("-P", "--pretty", action="store_true", default=False,
    68         help="parse only, then output pyAST as text to stdout", default=False)
    68             help="pretty print output adding whitespace")
    69 optParser.add_option("-P", "--pretty", action="store_true", default=False,
    69     optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
    70         help="pretty print output adding whitespace")
    70             help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
    71 optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
    71     optParser.add_option("-x", "--xml", action="store_true", default=False,
    72         help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
    72             help="input document is XML already")
    73 optParser.add_option("-x", "--xml", action="store_true", default=False,
    73     optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
    74         help="input document is XML already")
    74             help="execute XSLT script XSLTSCRIPT")
    75 optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
    75     optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
    76         help="execute XSLT script XSLTSCRIPT")
    76             help="execute YSLT script YSLTSCRIPT")
    77 optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
    77     optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
    78         help="execute YSLT script YSLTSCRIPT")
    78             help="convert XML to normalized YML code")
    79 optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
    79     optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
    80         help="convert XML to normalized YML code")
    80     (options, args) = optParser.parse_args()
    81 optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
    81 
    82 (options, args) = optParser.parse_args()
    82     if options.old_syntax:
    83 
    83         oldSyntax()
    84 if options.old_syntax:
    84 
    85     oldSyntax()
    85     if options.trace:
    86 
    86         backend.enable_tracing = True
    87 if options.trace:
    87 
    88     backend.enable_tracing = True
    88     if options.emitlinenumbers:
    89 
    89         backend.emitlinenumbers = True
    90 if options.emitlinenumbers:
    90 
    91     backend.emitlinenumbers = True
    91     if options.includePathText:
    92 
    92         backend.includePath = options.includePathText.split(':')
    93 if options.includePathText:
    93 
    94     backend.includePath = options.includePathText.split(':')
    94     backend.encoding = options.encoding
    95 
    95 
    96 backend.encoding = options.encoding
    96     dirs = os.environ.get('YML_PATH', '.').split(':')
    97 
    97     backend.includePath.extend(dirs)
    98 dirs = os.environ.get('YML_PATH', '.').split(':')
    98 
    99 backend.includePath.extend(dirs)
    99     if options.xml2yml:
   100 
   100         for directory in backend.includePath:
   101 if options.xml2yml:
   101             try:
   102     for directory in backend.includePath:
   102                 name = directory + "/xml2yml.ysl2"
       
   103                 f = open(name, "r")
       
   104                 f.close()
       
   105                 break
       
   106             except:
       
   107                 pass
       
   108 
       
   109         options.yslt = name
       
   110         options.xml = True
       
   111 
       
   112     if  (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
       
   113         sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
       
   114         sys.exit(1)
       
   115 
       
   116     try:
       
   117         ymlC = ymlCStyle()
       
   118 
       
   119         rtext = u""
       
   120 
       
   121         if not options.emptyinput:
       
   122             files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
       
   123 
       
   124             if options.xml:
       
   125                 rtext = ""
       
   126                 for line in files:
       
   127                     rtext += line
       
   128             else:
       
   129                 result = parse(ymlC, files, True, comment)
       
   130                 if options.parseonly:
       
   131                     print(result)
       
   132                     sys.exit(0)
       
   133                 else:
       
   134                     rtext = backend.finish(result)
       
   135 
       
   136         if not rtext:
       
   137             rtext = u"<empty/>"
       
   138 
       
   139         def ymldebug(context, text):
       
   140             if options.trace:
       
   141                 sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
       
   142             return ""
       
   143 
       
   144         def ymlassert(context, value, msg):
       
   145             if options.trace:
       
   146                 if not value:
       
   147                     raise YMLAssert(msg)
       
   148             return ""
       
   149 
       
   150         ymlns = etree.FunctionNamespace("http://fdik.org/yml")
       
   151         ymlns.prefix = "yml"
       
   152         ymlns['debug'] = ymldebug
       
   153         ymlns['assert'] = ymlassert
       
   154 
       
   155         if options.xpath:
       
   156             tree = etree.fromstring(rtext)
       
   157             ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
       
   158             rtext = u""
       
   159             try:
       
   160                 for rtree in ltree:
       
   161                     rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
       
   162             except:
       
   163                 rtext = ltree
       
   164 
       
   165         elif options.yslt or options.xslt:
       
   166             params = {}
       
   167 
       
   168             if options.yslt:
       
   169                 backend.clearAll()
       
   170                 yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
       
   171                 yresult = parse(ymlC, yscript, True, comment)
       
   172                 ytext = backend.finish(yresult)
       
   173             else:
       
   174                 yscript = fileinput.input(options.xslt, mode="rU")
       
   175                 ytext = ""
       
   176                 for line in yscript:
       
   177                     ytext += line
       
   178 
       
   179             doc = etree.fromstring(rtext)
       
   180 
       
   181             xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
       
   182             transform = etree.XSLT(xsltree)
       
   183             
       
   184             if options.params:
       
   185                 params = eval(options.params)
       
   186                 for key, value in params.iteritems():
       
   187                     if type(value) != unicode:
       
   188                         params[key] = u(value)
       
   189             if options.stringparams:
       
   190                 for key, value in eval(options.stringparams).iteritems():
       
   191                     params[key] = u"'" + u(value) + u"'"
       
   192 
       
   193             rresult = transform(doc, **params)
       
   194             # lxml is somewhat buggy
       
   195             try:
       
   196                 rtext = u(rresult)
       
   197             except:
       
   198                 rtext = etree.tostring(rresult, encoding=unicode)
       
   199                 if not rtext:
       
   200                     rtext = codecs.decode(str(rresult), "utf-8")
       
   201 
       
   202         if options.normalization != "none":
       
   203             rtext = unicodedata.normalize(options.normalization, rtext)
       
   204 
       
   205         if options.pretty:
       
   206             plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
       
   207         else:
       
   208             if isinstance(rtext, unicode):
       
   209                 plaintext = codecs.encode(rtext, options.encoding)
       
   210             else:
       
   211                 plaintext = str(rtext)
       
   212 
   103         try:
   213         try:
   104             name = directory + "/xml2yml.ysl2"
   214             if plaintext[-1] == "\n":
   105             f = open(name, "r")
   215                 plaintext = plaintext[:-1]
   106             f.close()
   216         except: pass
   107             break
   217 
   108         except:
   218         if options.outputFile and options.outputFile != "-":
   109             pass
   219             outfile = open(options.outputFile, "w")
   110 
   220             outfile.write(plaintext)
   111     options.yslt = name
   221             outfile.close()
   112     options.xml = True
       
   113 
       
   114 if  (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
       
   115     sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
       
   116     sys.exit(1)
       
   117 
       
   118 try:
       
   119     ymlC = ymlCStyle()
       
   120 
       
   121     rtext = u""
       
   122 
       
   123     if not options.emptyinput:
       
   124         files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
       
   125 
       
   126         if options.xml:
       
   127             rtext = ""
       
   128             for line in files:
       
   129                 rtext += line
       
   130         else:
   222         else:
   131             result = parse(ymlC, files, True, comment)
   223             print(plaintext)
   132             if options.parseonly:
   224 
   133                 print(result)
   225     except KeyboardInterrupt:
   134                 sys.exit(0)
   226         w("\n")
   135             else:
   227         sys.exit(1)
   136                 rtext = backend.finish(result)
   228     except YMLAssert as msg:
   137 
   229         w(u"YML Assertion failed: " + u(msg) + u"\n")
   138     if not rtext:
   230         sys.exit(2)
   139         rtext = u"<empty/>"
   231     except KeyError as msg:
   140 
   232         w(u"not found: " + u(msg) + u"\n")
   141     def ymldebug(context, text):
   233         sys.exit(4)
   142         if options.trace:
   234     except LookupError as msg:
   143             sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
   235         w(u"not found: " + u(msg) + u"\n")
   144         return ""
   236         sys.exit(4)
   145 
   237     except etree.XMLSyntaxError as e:
   146     def ymlassert(context, value, msg):
   238         log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
   147         if options.trace:
   239         for entry in log:
   148             if not value:
   240             w(u"XML error: " + u(entry.message) + u"\n")
   149                 raise YMLAssert(msg)
   241         sys.exit(5)
   150         return ""
   242     except Exception as msg:
   151 
   243         w(msg)
   152     ymlns = etree.FunctionNamespace("http://fdik.org/yml")
   244         sys.exit(5)
   153     ymlns.prefix = "yml"
       
   154     ymlns['debug'] = ymldebug
       
   155     ymlns['assert'] = ymlassert
       
   156 
       
   157     if options.xpath:
       
   158         tree = etree.fromstring(rtext)
       
   159         ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
       
   160         rtext = u""
       
   161         try:
       
   162             for rtree in ltree:
       
   163                 rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
       
   164         except:
       
   165             rtext = ltree
       
   166 
       
   167     elif options.yslt or options.xslt:
       
   168         params = {}
       
   169 
       
   170         if options.yslt:
       
   171             backend.clearAll()
       
   172             yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
       
   173             yresult = parse(ymlC, yscript, True, comment)
       
   174             ytext = backend.finish(yresult)
       
   175         else:
       
   176             yscript = fileinput.input(options.xslt, mode="rU")
       
   177             ytext = ""
       
   178             for line in yscript:
       
   179                 ytext += line
       
   180 
       
   181         doc = etree.fromstring(rtext)
       
   182 
       
   183         xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
       
   184         transform = etree.XSLT(xsltree)
       
   185         
       
   186         if options.params:
       
   187             params = eval(options.params)
       
   188             for key, value in params.iteritems():
       
   189                 if type(value) != unicode:
       
   190                     params[key] = u(value)
       
   191         if options.stringparams:
       
   192             for key, value in eval(options.stringparams).iteritems():
       
   193                 params[key] = u"'" + u(value) + u"'"
       
   194 
       
   195         rresult = transform(doc, **params)
       
   196         # lxml is somewhat buggy
       
   197         try:
       
   198             rtext = u(rresult)
       
   199         except:
       
   200             rtext = etree.tostring(rresult, encoding=unicode)
       
   201             if not rtext:
       
   202                 rtext = codecs.decode(str(rresult), "utf-8")
       
   203 
       
   204     if options.normalization != "none":
       
   205         rtext = unicodedata.normalize(options.normalization, rtext)
       
   206 
       
   207     if options.pretty:
       
   208         plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
       
   209     else:
       
   210         if isinstance(rtext, unicode):
       
   211             plaintext = codecs.encode(rtext, options.encoding)
       
   212         else:
       
   213             plaintext = str(rtext)
       
   214 
       
   215     try:
       
   216         if plaintext[-1] == "\n":
       
   217             plaintext = plaintext[:-1]
       
   218     except: pass
       
   219 
       
   220     if options.outputFile and options.outputFile != "-":
       
   221         outfile = open(options.outputFile, "w")
       
   222         outfile.write(plaintext)
       
   223         outfile.close()
       
   224     else:
       
   225         print(plaintext)
       
   226 
       
   227 except KeyboardInterrupt:
       
   228     w("\n")
       
   229     sys.exit(1)
       
   230 except YMLAssert as msg:
       
   231     w(u"YML Assertion failed: " + u(msg) + u"\n")
       
   232     sys.exit(2)
       
   233 except KeyError as msg:
       
   234     w(u"not found: " + u(msg) + u"\n")
       
   235     sys.exit(4)
       
   236 except LookupError as msg:
       
   237     w(u"not found: " + u(msg) + u"\n")
       
   238     sys.exit(4)
       
   239 except etree.XMLSyntaxError as e:
       
   240     log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
       
   241     for entry in log:
       
   242         w(u"XML error: " + u(entry.message) + u"\n")
       
   243     sys.exit(5)
       
   244 except Exception as msg:
       
   245     w(msg)
       
   246     sys.exit(5)
       
   247 
   245 
   248 
   246 
   249 if __name__ == "__main__":
   247 if __name__ == "__main__":
   250     sys.exit(main())
   248     sys.exit(main())
   251 
   249