|
1 #!/usr/bin/env python |
|
2 # vim: set fileencoding=utf-8 : |
|
3 |
|
4 """\ |
|
5 YML/YSLT 2 processor version 5.4 |
|
6 Copyleft (c), 2009-2011 Volker Birk http://fdik.org/yml/ |
|
7 |
|
8 """ |
|
9 |
|
10 import sys, os, codecs, locale |
|
11 import fileinput, unicodedata |
|
12 from optparse import OptionParser |
|
13 |
|
14 try: |
|
15 from lxml import etree |
|
16 except: |
|
17 sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n") |
|
18 sys.exit(1) |
|
19 |
|
20 from yml2 import ymlCStyle, comment, oldSyntax |
|
21 from pyPEG import parse, u |
|
22 import backend |
|
23 |
|
24 def printInfo(option, opt_str, value, parser): |
|
25 sys.stdout.write(__doc__) |
|
26 sys.exit(0) |
|
27 |
|
28 class YMLAssert(Exception): pass |
|
29 |
|
30 def w(msg): |
|
31 if isinstance(msg, BaseException): |
|
32 try: |
|
33 msg = str(msg) + "\n" |
|
34 except: |
|
35 msg = u(msg) + u"\n" |
|
36 if type(msg) is unicode: |
|
37 msg = codecs.encode(msg, sys.stderr.encoding) |
|
38 sys.stderr.write(msg) |
|
39 |
|
40 optParser = OptionParser() |
|
41 optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax", |
|
42 help="syntax of YML 2 version 1.x (compatibility mode)", default=False) |
|
43 optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers", |
|
44 help="emit line numbers into the resulting XML for debugging purposes", default=False) |
|
45 optParser.add_option("--debug", action="store_true", dest="trace", |
|
46 help="switch on tracing to stderr", default=False) |
|
47 optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS", |
|
48 help="call X/YSLT script with dictionary PARAMS as parameters") |
|
49 optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH", |
|
50 help="execute XPath expression XPATH and print result") |
|
51 optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1], |
|
52 help="encoding of input files (default to locale)") |
|
53 optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH", |
|
54 help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files") |
|
55 optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm", |
|
56 help="does nothing (only there for compatibility reasons)", default=False) |
|
57 optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput", |
|
58 help="use an empty input document", default=False) |
|
59 optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC", |
|
60 help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)") |
|
61 optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE", |
|
62 help="place output in file FILE") |
|
63 optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly", |
|
64 help="parse only, then output pyAST as text to stdout", default=False) |
|
65 optParser.add_option("-P", "--pretty", action="store_true", default=False, |
|
66 help="pretty print output adding whitespace") |
|
67 optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS", |
|
68 help="call X/YSLT script with dictionary STRINGPARAMS as string parameters") |
|
69 optParser.add_option("-x", "--xml", action="store_true", default=False, |
|
70 help="input document is XML already") |
|
71 optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT", |
|
72 help="execute XSLT script XSLTSCRIPT") |
|
73 optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT", |
|
74 help="execute YSLT script YSLTSCRIPT") |
|
75 optParser.add_option("-Y", "--xml2yml", action="store_true", default=False, |
|
76 help="convert XML to normalized YML code") |
|
77 optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit") |
|
78 (options, args) = optParser.parse_args() |
|
79 |
|
80 if options.old_syntax: |
|
81 oldSyntax() |
|
82 |
|
83 if options.trace: |
|
84 backend.enable_tracing = True |
|
85 |
|
86 if options.emitlinenumbers: |
|
87 backend.emitlinenumbers = True |
|
88 |
|
89 if options.includePathText: |
|
90 backend.includePath = options.includePathText.split(':') |
|
91 |
|
92 backend.encoding = options.encoding |
|
93 |
|
94 dirs = os.environ.get('YML_PATH', '.').split(':') |
|
95 backend.includePath.extend(dirs) |
|
96 |
|
97 if options.xml2yml: |
|
98 for directory in backend.includePath: |
|
99 try: |
|
100 name = directory + "/xml2yml.ysl2" |
|
101 f = open(name, "r") |
|
102 f.close() |
|
103 break |
|
104 except: |
|
105 pass |
|
106 |
|
107 options.yslt = name |
|
108 options.xml = True |
|
109 |
|
110 if (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath): |
|
111 sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n") |
|
112 sys.exit(1) |
|
113 |
|
114 try: |
|
115 ymlC = ymlCStyle() |
|
116 |
|
117 rtext = u"" |
|
118 |
|
119 if not options.emptyinput: |
|
120 files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding)) |
|
121 # fileinput suffer from two nasty bugs : |
|
122 # - ignoring open hook with stdin |
|
123 # - iterator requires ctrl-D to be pressed twice on siome platform |
|
124 if args in [[],['-']] : |
|
125 files._files=[] |
|
126 files._buffer=[unicode(line, options.encoding) for line in sys.stdin.readlines()] |
|
127 |
|
128 if options.xml: |
|
129 rtext = "" |
|
130 for line in files: |
|
131 rtext += line |
|
132 else: |
|
133 result = parse(ymlC, files, True, comment) |
|
134 if options.parseonly: |
|
135 print result |
|
136 sys.exit(0) |
|
137 else: |
|
138 rtext = backend.finish(result) |
|
139 |
|
140 if not rtext: |
|
141 rtext = u"<empty/>" |
|
142 |
|
143 def ymldebug(context, text): |
|
144 if options.trace: |
|
145 sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n") |
|
146 return "" |
|
147 |
|
148 def ymlassert(context, value, msg): |
|
149 if options.trace: |
|
150 if not value: |
|
151 raise YMLAssert(msg) |
|
152 return "" |
|
153 |
|
154 ymlns = etree.FunctionNamespace("http://fdik.org/yml") |
|
155 ymlns.prefix = "yml" |
|
156 ymlns['debug'] = ymldebug |
|
157 ymlns['assert'] = ymlassert |
|
158 |
|
159 if options.xpath: |
|
160 tree = etree.fromstring(rtext) |
|
161 ltree = tree.xpath(codecs.decode(options.xpath, options.encoding)) |
|
162 rtext = u"" |
|
163 try: |
|
164 for rtree in ltree: |
|
165 rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode) |
|
166 except: |
|
167 rtext = ltree |
|
168 |
|
169 elif options.yslt or options.xslt: |
|
170 params = {} |
|
171 |
|
172 if options.yslt: |
|
173 backend.clearAll() |
|
174 yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding)) |
|
175 yresult = parse(ymlC, yscript, True, comment) |
|
176 ytext = backend.finish(yresult) |
|
177 else: |
|
178 yscript = fileinput.input(options.xslt, mode="rU") |
|
179 ytext = "" |
|
180 for line in yscript: |
|
181 ytext += line |
|
182 |
|
183 doc = etree.fromstring(rtext) |
|
184 |
|
185 xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename())) |
|
186 transform = etree.XSLT(xsltree) |
|
187 |
|
188 if options.params: |
|
189 params = eval(options.params) |
|
190 for key, value in params.iteritems(): |
|
191 if type(value) != unicode: |
|
192 params[key] = u(value) |
|
193 if options.stringparams: |
|
194 for key, value in eval(options.stringparams).iteritems(): |
|
195 params[key] = u"'" + u(value) + u"'" |
|
196 |
|
197 rresult = transform(doc, **params) |
|
198 # lxml is somewhat buggy |
|
199 try: |
|
200 rtext = u(rresult) |
|
201 except: |
|
202 rtext = etree.tostring(rresult, encoding=unicode) |
|
203 if not rtext: |
|
204 rtext = codecs.decode(str(rresult), "utf-8") |
|
205 |
|
206 if options.normalization != "none": |
|
207 rtext = unicodedata.normalize(options.normalization, rtext) |
|
208 |
|
209 if options.pretty: |
|
210 plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding) |
|
211 else: |
|
212 if isinstance(rtext, unicode): |
|
213 plaintext = codecs.encode(rtext, options.encoding) |
|
214 else: |
|
215 plaintext = str(rtext) |
|
216 |
|
217 try: |
|
218 if plaintext[-1] == "\n": |
|
219 plaintext = plaintext[:-1] |
|
220 except: pass |
|
221 |
|
222 if options.outputFile and options.outputFile != "-": |
|
223 outfile = open(options.outputFile, "w") |
|
224 outfile.write(plaintext) |
|
225 outfile.close() |
|
226 else: |
|
227 print plaintext |
|
228 |
|
229 except KeyboardInterrupt: |
|
230 w("\n") |
|
231 sys.exit(1) |
|
232 except YMLAssert, msg: |
|
233 w(u"YML Assertion failed: " + u(msg) + u"\n") |
|
234 sys.exit(2) |
|
235 except KeyError, msg: |
|
236 w(u"not found: " + u(msg) + u"\n") |
|
237 sys.exit(4) |
|
238 except LookupError, msg: |
|
239 w(u"not found: " + u(msg) + u"\n") |
|
240 sys.exit(4) |
|
241 except etree.XMLSyntaxError, e: |
|
242 log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL) |
|
243 for entry in log: |
|
244 w(u"XML error: " + u(entry.message) + u"\n") |
|
245 sys.exit(5) |
|
246 except Exception, msg: |
|
247 w(msg) |
|
248 sys.exit(5) |