1 #!/usr/bin/env python |
2 # vim: set fileencoding=utf-8 : |
3 |
4 """\ |
5 YML/YSLT 2 processor version 5.4 |
6 Copyleft (c), 2009-2011 Volker Birk http://fdik.org/yml/ |
7 |
8 """ |
9 |
10 import sys, os, codecs, locale |
11 import fileinput, unicodedata |
12 from optparse import OptionParser |
13 |
14 try: |
15 from lxml import etree |
16 except: |
17 sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n") |
18 sys.exit(1) |
19 |
20 from yml2 import ymlCStyle, comment, oldSyntax |
21 from pyPEG import parse, u |
22 import backend |
23 |
24 def printInfo(option, opt_str, value, parser): |
25 sys.stdout.write(__doc__) |
26 sys.exit(0) |
27 |
28 class YMLAssert(Exception): pass |
29 |
30 def w(msg): |
31 if isinstance(msg, BaseException): |
32 try: |
33 msg = str(msg) + "\n" |
34 except: |
35 msg = u(msg) + u"\n" |
36 if type(msg) is unicode: |
37 msg = codecs.encode(msg, sys.stderr.encoding) |
38 sys.stderr.write(msg) |
39 |
40 optParser = OptionParser() |
41 optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax", |
42 help="syntax of YML 2 version 1.x (compatibility mode)", default=False) |
43 optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers", |
44 help="emit line numbers into the resulting XML for debugging purposes", default=False) |
45 optParser.add_option("--debug", action="store_true", dest="trace", |
46 help="switch on tracing to stderr", default=False) |
47 optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS", |
48 help="call X/YSLT script with dictionary PARAMS as parameters") |
49 optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH", |
50 help="execute XPath expression XPATH and print result") |
51 optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1], |
52 help="encoding of input files (default to locale)") |
53 optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH", |
54 help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files") |
55 optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm", |
56 help="does nothing (only there for compatibility reasons)", default=False) |
57 optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput", |
58 help="use an empty input document", default=False) |
59 optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC", |
60 help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)") |
61 optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE", |
62 help="place output in file FILE") |
63 optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly", |
64 help="parse only, then output pyAST as text to stdout", default=False) |
65 optParser.add_option("-P", "--pretty", action="store_true", default=False, |
66 help="pretty print output adding whitespace") |
67 optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS", |
68 help="call X/YSLT script with dictionary STRINGPARAMS as string parameters") |
69 optParser.add_option("-x", "--xml", action="store_true", default=False, |
70 help="input document is XML already") |
71 optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT", |
72 help="execute XSLT script XSLTSCRIPT") |
73 optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT", |
74 help="execute YSLT script YSLTSCRIPT") |
75 optParser.add_option("-Y", "--xml2yml", action="store_true", default=False, |
76 help="convert XML to normalized YML code") |
77 optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit") |
78 (options, args) = optParser.parse_args() |
79 |
80 if options.old_syntax: |
81 oldSyntax() |
82 |
83 if options.trace: |
84 backend.enable_tracing = True |
85 |
86 if options.emitlinenumbers: |
87 backend.emitlinenumbers = True |
88 |
89 if options.includePathText: |
90 backend.includePath = options.includePathText.split(':') |
91 |
92 backend.encoding = options.encoding |
93 |
94 dirs = os.environ.get('YML_PATH', '.').split(':') |
95 backend.includePath.extend(dirs) |
96 |
97 if options.xml2yml: |
98 for directory in backend.includePath: |
99 try: |
100 name = directory + "/xml2yml.ysl2" |
101 f = open(name, "r") |
102 f.close() |
103 break |
104 except: |
105 pass |
106 |
107 options.yslt = name |
108 options.xml = True |
109 |
110 if (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath): |
111 sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n") |
112 sys.exit(1) |
113 |
114 try: |
115 ymlC = ymlCStyle() |
116 |
117 rtext = u"" |
118 |
119 if not options.emptyinput: |
120 files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding)) |
121 # fileinput suffer from two nasty bugs : |
122 # - ignoring open hook with stdin |
123 # - iterator requires ctrl-D to be pressed twice on siome platform |
124 if args in [[],['-']] : |
125 files._files=[] |
126 files._buffer=[unicode(line, options.encoding) for line in sys.stdin.readlines()] |
127 |
128 if options.xml: |
129 rtext = "" |
130 for line in files: |
131 rtext += line |
132 else: |
133 result = parse(ymlC, files, True, comment) |
134 if options.parseonly: |
135 print result |
136 sys.exit(0) |
137 else: |
138 rtext = backend.finish(result) |
139 |
140 if not rtext: |
141 rtext = u"<empty/>" |
142 |
143 def ymldebug(context, text): |
144 if options.trace: |
145 sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n") |
146 return "" |
147 |
148 def ymlassert(context, value, msg): |
149 if options.trace: |
150 if not value: |
151 raise YMLAssert(msg) |
152 return "" |
153 |
154 ymlns = etree.FunctionNamespace("http://fdik.org/yml") |
155 ymlns.prefix = "yml" |
156 ymlns['debug'] = ymldebug |
157 ymlns['assert'] = ymlassert |
158 |
159 if options.xpath: |
160 tree = etree.fromstring(rtext) |
161 ltree = tree.xpath(codecs.decode(options.xpath, options.encoding)) |
162 rtext = u"" |
163 try: |
164 for rtree in ltree: |
165 rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode) |
166 except: |
167 rtext = ltree |
168 |
169 elif options.yslt or options.xslt: |
170 params = {} |
171 |
172 if options.yslt: |
173 backend.clearAll() |
174 yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding)) |
175 yresult = parse(ymlC, yscript, True, comment) |
176 ytext = backend.finish(yresult) |
177 else: |
178 yscript = fileinput.input(options.xslt, mode="rU") |
179 ytext = "" |
180 for line in yscript: |
181 ytext += line |
182 |
183 doc = etree.fromstring(rtext) |
184 |
185 xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename())) |
186 transform = etree.XSLT(xsltree) |
187 |
188 if options.params: |
189 params = eval(options.params) |
190 for key, value in params.iteritems(): |
191 if type(value) != unicode: |
192 params[key] = u(value) |
193 if options.stringparams: |
194 for key, value in eval(options.stringparams).iteritems(): |
195 params[key] = u"'" + u(value) + u"'" |
196 |
197 rresult = transform(doc, **params) |
198 # lxml is somewhat buggy |
199 try: |
200 rtext = u(rresult) |
201 except: |
202 rtext = etree.tostring(rresult, encoding=unicode) |
203 if not rtext: |
204 rtext = codecs.decode(str(rresult), "utf-8") |
205 |
206 if options.normalization != "none": |
207 rtext = unicodedata.normalize(options.normalization, rtext) |
208 |
209 if options.pretty: |
210 plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding) |
211 else: |
212 if isinstance(rtext, unicode): |
213 plaintext = codecs.encode(rtext, options.encoding) |
214 else: |
215 plaintext = str(rtext) |
216 |
217 try: |
218 if plaintext[-1] == "\n": |
219 plaintext = plaintext[:-1] |
220 except: pass |
221 |
222 if options.outputFile and options.outputFile != "-": |
223 outfile = open(options.outputFile, "w") |
224 outfile.write(plaintext) |
225 outfile.close() |
226 else: |
227 print plaintext |
228 |
229 except KeyboardInterrupt: |
230 w("\n") |
231 sys.exit(1) |
232 except YMLAssert, msg: |
233 w(u"YML Assertion failed: " + u(msg) + u"\n") |
234 sys.exit(2) |
235 except KeyError, msg: |
236 w(u"not found: " + u(msg) + u"\n") |
237 sys.exit(4) |
238 except LookupError, msg: |
239 w(u"not found: " + u(msg) + u"\n") |
240 sys.exit(4) |
241 except etree.XMLSyntaxError, e: |
242 log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL) |
243 for entry in log: |
244 w(u"XML error: " + u(entry.message) + u"\n") |
245 sys.exit(5) |
246 except Exception, msg: |
247 w(msg) |
248 sys.exit(5) |