0
|
1 |
#!/usr/bin/env python
|
|
2 |
# vim: set fileencoding=utf-8 :
|
|
3 |
|
|
4 |
"""\
|
4
|
5 |
YML/YSLT 2 processor version 5.5
|
0
|
6 |
Copyleft (c), 2009-2011 Volker Birk http://fdik.org/yml/
|
|
7 |
|
|
8 |
"""
|
|
9 |
|
|
10 |
import sys, os, codecs, locale
|
|
11 |
import fileinput, unicodedata
|
|
12 |
from optparse import OptionParser
|
|
13 |
|
|
14 |
try:
|
|
15 |
from lxml import etree
|
|
16 |
except:
|
|
17 |
sys.stderr.write("This program needs lxml, see http://codespeak.net/lxml/\n")
|
|
18 |
sys.exit(1)
|
|
19 |
|
|
20 |
from yml2 import ymlCStyle, comment, oldSyntax
|
|
21 |
from pyPEG import parse, u
|
|
22 |
import backend
|
|
23 |
|
|
24 |
def printInfo(option, opt_str, value, parser):
|
|
25 |
sys.stdout.write(__doc__)
|
|
26 |
sys.exit(0)
|
|
27 |
|
|
28 |
class YMLAssert(Exception): pass
|
|
29 |
|
|
30 |
def w(msg):
|
|
31 |
if isinstance(msg, BaseException):
|
|
32 |
try:
|
|
33 |
msg = str(msg) + "\n"
|
|
34 |
except:
|
|
35 |
msg = u(msg) + u"\n"
|
|
36 |
if type(msg) is unicode:
|
|
37 |
msg = codecs.encode(msg, sys.stderr.encoding)
|
|
38 |
sys.stderr.write(msg)
|
|
39 |
|
|
40 |
optParser = OptionParser()
|
|
41 |
optParser.add_option("-C", "--old-syntax", action="store_true", dest="old_syntax",
|
|
42 |
help="syntax of YML 2 version 1.x (compatibility mode)", default=False)
|
|
43 |
optParser.add_option("-D", "--emit-linenumbers", action="store_true", dest="emitlinenumbers",
|
|
44 |
help="emit line numbers into the resulting XML for debugging purposes", default=False)
|
|
45 |
optParser.add_option("--debug", action="store_true", dest="trace",
|
|
46 |
help="switch on tracing to stderr", default=False)
|
|
47 |
optParser.add_option("-d", "--paramdict", dest="params", metavar="PARAMS",
|
|
48 |
help="call X/YSLT script with dictionary PARAMS as parameters")
|
|
49 |
optParser.add_option("-e", "--xpath", dest="xpath", metavar="XPATH",
|
|
50 |
help="execute XPath expression XPATH and print result")
|
|
51 |
optParser.add_option("-E", "--encoding", dest="encoding", metavar="ENCODING", default=locale.getdefaultlocale()[1],
|
|
52 |
help="encoding of input files (default to locale)")
|
|
53 |
optParser.add_option("-I", "--include", dest="includePathText", metavar="INCLUDE_PATH",
|
|
54 |
help="precede YML_PATH by a colon separated INCLUDE_PATH to search for include files")
|
|
55 |
optParser.add_option("-m", "--omit-empty-parm-tags", action="store_true", dest="omitemptyparm",
|
|
56 |
help="does nothing (only there for compatibility reasons)", default=False)
|
|
57 |
optParser.add_option("-M", "--empty-input-document", action="store_true", dest="emptyinput",
|
|
58 |
help="use an empty input document", default=False)
|
|
59 |
optParser.add_option("-n", "--normalization", dest="normalization", metavar="NORMALIZATION", default="NFC",
|
|
60 |
help="Unicode normalization (none, NFD, NFKD, NFC, NFKC, FCD, default is NFC)")
|
|
61 |
optParser.add_option("-o", "--output", dest="outputFile", metavar="FILE",
|
|
62 |
help="place output in file FILE")
|
|
63 |
optParser.add_option("-p", "--parse-only", action="store_true", dest="parseonly",
|
|
64 |
help="parse only, then output pyAST as text to stdout", default=False)
|
|
65 |
optParser.add_option("-P", "--pretty", action="store_true", default=False,
|
|
66 |
help="pretty print output adding whitespace")
|
|
67 |
optParser.add_option("-s", "--stringparamdict", dest="stringparams", metavar="STRINGPARAMS",
|
|
68 |
help="call X/YSLT script with dictionary STRINGPARAMS as string parameters")
|
|
69 |
optParser.add_option("-x", "--xml", action="store_true", default=False,
|
|
70 |
help="input document is XML already")
|
|
71 |
optParser.add_option("-X", "--xslt", dest="xslt", metavar="XSLTSCRIPT",
|
|
72 |
help="execute XSLT script XSLTSCRIPT")
|
|
73 |
optParser.add_option("-y", "--yslt", dest="yslt", metavar="YSLTSCRIPT",
|
|
74 |
help="execute YSLT script YSLTSCRIPT")
|
|
75 |
optParser.add_option("-Y", "--xml2yml", action="store_true", default=False,
|
|
76 |
help="convert XML to normalized YML code")
|
|
77 |
optParser.add_option("-V", "--version", action="callback", callback=printInfo, help="show version info and exit")
|
|
78 |
(options, args) = optParser.parse_args()
|
|
79 |
|
|
80 |
if options.old_syntax:
|
|
81 |
oldSyntax()
|
|
82 |
|
|
83 |
if options.trace:
|
|
84 |
backend.enable_tracing = True
|
|
85 |
|
|
86 |
if options.emitlinenumbers:
|
|
87 |
backend.emitlinenumbers = True
|
|
88 |
|
|
89 |
if options.includePathText:
|
|
90 |
backend.includePath = options.includePathText.split(':')
|
|
91 |
|
|
92 |
backend.encoding = options.encoding
|
|
93 |
|
|
94 |
dirs = os.environ.get('YML_PATH', '.').split(':')
|
|
95 |
backend.includePath.extend(dirs)
|
|
96 |
|
|
97 |
if options.xml2yml:
|
|
98 |
for directory in backend.includePath:
|
|
99 |
try:
|
|
100 |
name = directory + "/xml2yml.ysl2"
|
|
101 |
f = open(name, "r")
|
|
102 |
f.close()
|
|
103 |
break
|
|
104 |
except:
|
|
105 |
pass
|
|
106 |
|
|
107 |
options.yslt = name
|
|
108 |
options.xml = True
|
|
109 |
|
|
110 |
if (options.xslt and options.yslt) or (options.xslt and options.xpath) or (options.yslt and options.xpath):
|
|
111 |
sys.stderr.write("Cannot combine --xpath, --xslt and --yslt params\n")
|
|
112 |
sys.exit(1)
|
|
113 |
|
|
114 |
try:
|
|
115 |
ymlC = ymlCStyle()
|
|
116 |
|
|
117 |
rtext = u""
|
|
118 |
|
|
119 |
if not options.emptyinput:
|
|
120 |
files = fileinput.input(args, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
|
|
121 |
# fileinput suffer from two nasty bugs :
|
|
122 |
# - ignoring open hook with stdin
|
|
123 |
# - iterator requires ctrl-D to be pressed twice on siome platform
|
|
124 |
if args in [[],['-']] :
|
|
125 |
files._files=[]
|
|
126 |
files._buffer=[unicode(line, options.encoding) for line in sys.stdin.readlines()]
|
|
127 |
|
|
128 |
if options.xml:
|
|
129 |
rtext = ""
|
|
130 |
for line in files:
|
|
131 |
rtext += line
|
|
132 |
else:
|
|
133 |
result = parse(ymlC, files, True, comment)
|
|
134 |
if options.parseonly:
|
|
135 |
print result
|
|
136 |
sys.exit(0)
|
|
137 |
else:
|
|
138 |
rtext = backend.finish(result)
|
|
139 |
|
|
140 |
if not rtext:
|
|
141 |
rtext = u"<empty/>"
|
|
142 |
|
|
143 |
def ymldebug(context, text):
|
|
144 |
if options.trace:
|
|
145 |
sys.stderr.write("Debug: " + codecs.encode(u(text), options.encoding) + "\n")
|
|
146 |
return ""
|
|
147 |
|
|
148 |
def ymlassert(context, value, msg):
|
|
149 |
if options.trace:
|
|
150 |
if not value:
|
|
151 |
raise YMLAssert(msg)
|
|
152 |
return ""
|
|
153 |
|
|
154 |
ymlns = etree.FunctionNamespace("http://fdik.org/yml")
|
|
155 |
ymlns.prefix = "yml"
|
|
156 |
ymlns['debug'] = ymldebug
|
|
157 |
ymlns['assert'] = ymlassert
|
|
158 |
|
|
159 |
if options.xpath:
|
|
160 |
tree = etree.fromstring(rtext)
|
|
161 |
ltree = tree.xpath(codecs.decode(options.xpath, options.encoding))
|
|
162 |
rtext = u""
|
|
163 |
try:
|
|
164 |
for rtree in ltree:
|
|
165 |
rtext += etree.tostring(rtree, pretty_print=options.pretty, encoding=unicode)
|
|
166 |
except:
|
|
167 |
rtext = ltree
|
|
168 |
|
|
169 |
elif options.yslt or options.xslt:
|
|
170 |
params = {}
|
|
171 |
|
|
172 |
if options.yslt:
|
|
173 |
backend.clearAll()
|
|
174 |
yscript = fileinput.input(options.yslt, mode="rU", openhook=fileinput.hook_encoded(options.encoding))
|
|
175 |
yresult = parse(ymlC, yscript, True, comment)
|
|
176 |
ytext = backend.finish(yresult)
|
|
177 |
else:
|
|
178 |
yscript = fileinput.input(options.xslt, mode="rU")
|
|
179 |
ytext = ""
|
|
180 |
for line in yscript:
|
|
181 |
ytext += line
|
|
182 |
|
|
183 |
doc = etree.fromstring(rtext)
|
|
184 |
|
|
185 |
xsltree = etree.XML(ytext, base_url=os.path.abspath(yscript.filename()))
|
|
186 |
transform = etree.XSLT(xsltree)
|
|
187 |
|
|
188 |
if options.params:
|
|
189 |
params = eval(options.params)
|
|
190 |
for key, value in params.iteritems():
|
|
191 |
if type(value) != unicode:
|
|
192 |
params[key] = u(value)
|
|
193 |
if options.stringparams:
|
|
194 |
for key, value in eval(options.stringparams).iteritems():
|
|
195 |
params[key] = u"'" + u(value) + u"'"
|
|
196 |
|
|
197 |
rresult = transform(doc, **params)
|
|
198 |
# lxml is somewhat buggy
|
|
199 |
try:
|
|
200 |
rtext = u(rresult)
|
|
201 |
except:
|
|
202 |
rtext = etree.tostring(rresult, encoding=unicode)
|
|
203 |
if not rtext:
|
|
204 |
rtext = codecs.decode(str(rresult), "utf-8")
|
|
205 |
|
|
206 |
if options.normalization != "none":
|
|
207 |
rtext = unicodedata.normalize(options.normalization, rtext)
|
|
208 |
|
|
209 |
if options.pretty:
|
|
210 |
plaintext = etree.tostring(etree.fromstring(rtext), pretty_print=True, xml_declaration=True, encoding=options.encoding)
|
|
211 |
else:
|
|
212 |
if isinstance(rtext, unicode):
|
|
213 |
plaintext = codecs.encode(rtext, options.encoding)
|
|
214 |
else:
|
|
215 |
plaintext = str(rtext)
|
|
216 |
|
|
217 |
try:
|
|
218 |
if plaintext[-1] == "\n":
|
|
219 |
plaintext = plaintext[:-1]
|
|
220 |
except: pass
|
|
221 |
|
|
222 |
if options.outputFile and options.outputFile != "-":
|
|
223 |
outfile = open(options.outputFile, "w")
|
|
224 |
outfile.write(plaintext)
|
|
225 |
outfile.close()
|
|
226 |
else:
|
|
227 |
print plaintext
|
|
228 |
|
|
229 |
except KeyboardInterrupt:
|
|
230 |
w("\n")
|
|
231 |
sys.exit(1)
|
|
232 |
except YMLAssert, msg:
|
|
233 |
w(u"YML Assertion failed: " + u(msg) + u"\n")
|
|
234 |
sys.exit(2)
|
|
235 |
except KeyError, msg:
|
|
236 |
w(u"not found: " + u(msg) + u"\n")
|
|
237 |
sys.exit(4)
|
|
238 |
except LookupError, msg:
|
|
239 |
w(u"not found: " + u(msg) + u"\n")
|
|
240 |
sys.exit(4)
|
|
241 |
except etree.XMLSyntaxError, e:
|
|
242 |
log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
|
|
243 |
for entry in log:
|
|
244 |
w(u"XML error: " + u(entry.message) + u"\n")
|
|
245 |
sys.exit(5)
|
|
246 |
except Exception, msg:
|
|
247 |
w(msg)
|
|
248 |
sys.exit(5)
|