|
1 # |
|
2 # minixsv, Release 0.3 |
|
3 # file: xmlifbase.py |
|
4 # |
|
5 # abstract XML interface class |
|
6 # |
|
7 # history: |
|
8 # 2004-09-09 rl created |
|
9 # 2004-09-22 rl XML interface classes completely re-designed |
|
10 # 2004-09-23 rl added filename and line number support |
|
11 # 2004-09-29 rl URL processing added |
|
12 # 2004-10-01 rl URL processing improved |
|
13 # 2004-10-03 rl xPath processing moved from pyxsval.py |
|
14 # 2004-10-12 rl XML text processing added |
|
15 # |
|
16 # Copyright (c) 2004 by Roland Leuthe. All rights reserved. |
|
17 # |
|
18 # -------------------------------------------------------------------- |
|
19 # The minixsv XML schema validator is |
|
20 # |
|
21 # Copyright (c) 2004 by Roland Leuthe |
|
22 # |
|
23 # By obtaining, using, and/or copying this software and/or its |
|
24 # associated documentation, you agree that you have read, understood, |
|
25 # and will comply with the following terms and conditions: |
|
26 # |
|
27 # Permission to use, copy, modify, and distribute this software and |
|
28 # its associated documentation for any purpose and without fee is |
|
29 # hereby granted, provided that the above copyright notice appears in |
|
30 # all copies, and that both that copyright notice and this permission |
|
31 # notice appear in supporting documentation, and that the name of |
|
32 # the author not be used in advertising or publicity |
|
33 # pertaining to distribution of the software without specific, written |
|
34 # prior permission. |
|
35 # |
|
36 # THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD |
|
37 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- |
|
38 # ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR |
|
39 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY |
|
40 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
41 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
|
42 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
|
43 # OF THIS SOFTWARE. |
|
44 # -------------------------------------------------------------------- |
|
45 |
|
46 |
|
47 import string |
|
48 import os |
|
49 import re |
|
50 import urllib |
|
51 import urlparse |
|
52 |
|
53 |
|
54 _reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml" |
|
55 |
|
56 |
|
57 ######################################## |
|
58 # define XML interface base class |
|
59 # All not implemented methods have to be overloaded by the derived class!! |
|
60 # |
|
61 |
|
62 class XmlInterfaceBase: |
|
63 |
|
64 def __init__(self, verbose): |
|
65 self.verbose = verbose |
|
66 pass |
|
67 |
|
68 ########################################################## |
|
69 # calls the parser for 'file' |
|
70 # returns the respective XML tree for the parsed XML file |
|
71 # 'file' may be a file path or an URI |
|
72 |
|
73 def parse (self, file, baseUrl): |
|
74 raise NotImplementedError |
|
75 |
|
76 |
|
77 ########################################################## |
|
78 # calls the parser for 'text' |
|
79 # returns the respective XML tree for the parsed XML text string |
|
80 |
|
81 def parseString (self, text): |
|
82 raise NotImplementedError |
|
83 |
|
84 |
|
85 ########################################################## |
|
86 # calls the parser for 'file' and processes all include directives |
|
87 # returns the respective XML tree wrapper for the parsed XML file |
|
88 |
|
89 def extParse (self, file, baseUrl): |
|
90 treeWrapper = self.parse (file, baseUrl) |
|
91 return self._expandIncludes (treeWrapper) |
|
92 |
|
93 def _expandIncludes (self, treeWrapper): |
|
94 rootNodeWrapper = treeWrapper.getRootNode() |
|
95 nsAlias = self.extractNamespaceAlias(rootNodeWrapper.getTagName()) |
|
96 |
|
97 for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"): |
|
98 includeUrl = includeNodeWrapper.getAttribute("schemaLocation") |
|
99 if self.verbose: |
|
100 print "including %s..." %(includeUrl) |
|
101 treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl()) |
|
102 rootNodeWrapper.removeChild (includeNodeWrapper) |
|
103 return treeWrapper |
|
104 |
|
105 ########################################################## |
|
106 # calls the parser for 'file' and processes all include directives |
|
107 # returns the respective XML tree wrapper for the parsed XML file |
|
108 |
|
109 def extParseString (self, text): |
|
110 treeWrapper = self.parseString (text) |
|
111 return self._expandIncludes (treeWrapper) |
|
112 |
|
113 |
|
114 ########################################################## |
|
115 # returns namespace part of given 'ncName' |
|
116 |
|
117 def extractNamespaceAlias (self, ncName): |
|
118 namespaceAlias, localName = self._splitNamespaceLocalName (ncName) |
|
119 return namespaceAlias |
|
120 |
|
121 |
|
122 ########################################################## |
|
123 # returns local tag name of given 'ncName' |
|
124 |
|
125 def extractLocalName (self, ncName): |
|
126 namespaceAlias, localName = self._splitNamespaceLocalName (ncName) |
|
127 return localName |
|
128 |
|
129 |
|
130 ########################################################## |
|
131 # add namespace alias to localName |
|
132 |
|
133 def addNamespaceAlias (self, namespaceAlias, localName): |
|
134 if namespaceAlias != "": |
|
135 return namespaceAlias + localName |
|
136 else: |
|
137 return localName |
|
138 |
|
139 |
|
140 ############################################################### |
|
141 # PRIVATE methods |
|
142 ############################################################### |
|
143 |
|
144 ########################################################## |
|
145 # convert input parameter 'file' into a valid URL |
|
146 |
|
147 def _convertToUrl (self, file): |
|
148 matchObject = _reSplitUrlApplication.match(file) |
|
149 if matchObject: |
|
150 # given file is an absolute URL |
|
151 if matchObject.group(1) == 'file': |
|
152 path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string |
|
153 url = "file:" + path |
|
154 else: |
|
155 url = file |
|
156 elif not os.path.isabs(file): |
|
157 # given file is a relative URL |
|
158 url = file |
|
159 else: |
|
160 # given file is not a valid URL => treated as local filename |
|
161 url = "file:" + urllib.pathname2url (file) |
|
162 |
|
163 return url |
|
164 |
|
165 def _convertToAbsUrl (self, url, baseUrl): |
|
166 application = urlparse.urlsplit(url)[0] |
|
167 if application == '': |
|
168 if baseUrl != "": |
|
169 url = urlparse.urljoin (baseUrl, url) |
|
170 else: |
|
171 url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url)) |
|
172 return url |
|
173 |
|
174 ########################################################## |
|
175 # split 'ncName' into namespace and local name |
|
176 |
|
177 def _splitNamespaceLocalName (self, ncName): |
|
178 namespaceAlias = None |
|
179 |
|
180 namespaceEndIndex = string.find (ncName, '}') |
|
181 if namespaceEndIndex != -1: |
|
182 namespaceAlias = ncName[:namespaceEndIndex+1] |
|
183 localName = ncName[namespaceEndIndex+1:] |
|
184 else: |
|
185 namespaceEndIndex = string.find (ncName, ':') |
|
186 if namespaceEndIndex != -1: |
|
187 namespaceAlias = ncName[:namespaceEndIndex+1] |
|
188 localName = ncName[namespaceEndIndex+1:] |
|
189 else: |
|
190 namespaceAlias = "" |
|
191 localName = ncName |
|
192 return namespaceAlias, localName |
|
193 |
|
194 |
|
195 |
|
196 ######################################## |
|
197 # define tree wrapper base class |
|
198 # All not implemented methods have to be overloaded by the derived class!! |
|
199 # |
|
200 |
|
201 class TreeWrapperBase: |
|
202 |
|
203 def __init__(self, xmlIf, tree): |
|
204 self.xmlIf = xmlIf |
|
205 self.tree = tree |
|
206 |
|
207 ########################################################## |
|
208 # includes the given XML/XSD file 'file' into the XML tree 'tree' |
|
209 # before 'nextSibling' ('nextSibling' is not removed!) |
|
210 # returns the extended XML tree (containing included XML/XSD file) |
|
211 # |
|
212 # Note: Root tag of 'tree' and 'file' must match! |
|
213 |
|
214 def insertSubtree (self, nextSibling, file, baseUrl): |
|
215 raise NotImplementedError |
|
216 |
|
217 |
|
218 ########################################################## |
|
219 # returns root node of given XML tree 'tree' |
|
220 |
|
221 def getRootNode (self): |
|
222 raise NotImplementedError |
|
223 |
|
224 |
|
225 ########################################################## |
|
226 # returns 'tree' of given XML tree |
|
227 |
|
228 def getTree (self): |
|
229 return self.tree |
|
230 |
|
231 ######################################## |
|
232 # define node wrapper base class |
|
233 # All not implemented methods have to be overloaded by the derived class!! |
|
234 # |
|
235 |
|
236 class ElementWrapperBase: |
|
237 |
|
238 def __init__(self, xmlIf, treeWrapper, element): |
|
239 self.xmlIf = xmlIf |
|
240 self.treeWrapper = treeWrapper |
|
241 self.element = element |
|
242 |
|
243 ########################################################## |
|
244 # returns tag name of given XML node 'node' |
|
245 |
|
246 def getTagName (self): |
|
247 raise NotImplementedError |
|
248 |
|
249 |
|
250 ########################################################## |
|
251 # returns child element nodes (list) of given XML node 'node' |
|
252 # 'filterTag' is optional, 'filterTag' = '*' must be supported |
|
253 |
|
254 def getChildren (self, filterTag=None): |
|
255 raise NotImplementedError |
|
256 |
|
257 |
|
258 ########################################################## |
|
259 # returns all descendants of node whose tag match 'tagName' |
|
260 # 'filterTag' is optional, 'filterTag' = '*' must be supported |
|
261 |
|
262 def getElementsByTagName (self, filterTag=None): |
|
263 raise NotImplementedError |
|
264 |
|
265 |
|
266 ########################################################## |
|
267 # remove given child node from children of current node |
|
268 |
|
269 def removeChild (self, childNodeWrapper): |
|
270 raise NotImplementedError |
|
271 |
|
272 |
|
273 ########################################################## |
|
274 # returns dictionary with all attributes of this node |
|
275 |
|
276 def getAttributeDict (self): |
|
277 raise NotImplementedError |
|
278 |
|
279 |
|
280 ########################################################## |
|
281 # returns attribute value of given attributeName |
|
282 # or None if there is no suitable attribute |
|
283 |
|
284 def getAttribute (self, attributeName): |
|
285 raise NotImplementedError |
|
286 |
|
287 ########################################################## |
|
288 # returns 1 if attribute 'attributeName' exists |
|
289 # 0 if not |
|
290 |
|
291 def hasAttribute (self, attributeName): |
|
292 raise NotImplementedError |
|
293 |
|
294 |
|
295 ########################################################## |
|
296 # sets value of attribute 'attributeName' to 'attributeValue' |
|
297 # if the attribute does not yet exist, it will be created |
|
298 |
|
299 def setAttribute (self, attributeName, attributeValue): |
|
300 raise NotImplementedError |
|
301 |
|
302 |
|
303 ########################################################## |
|
304 # returns element value of this node |
|
305 |
|
306 def getElementValue (self): |
|
307 raise NotImplementedError |
|
308 |
|
309 |
|
310 ########################################################## |
|
311 # sets element value of this node to 'value' |
|
312 |
|
313 def setElementValue (self, value): |
|
314 raise NotImplementedError |
|
315 |
|
316 |
|
317 ########################################################## |
|
318 # returns local tag name of given XML node 'node' (without namespace) |
|
319 |
|
320 def getLocalName (self): |
|
321 return self.xmlIf.extractLocalName (self.getTagName()) |
|
322 |
|
323 |
|
324 ########################################################## |
|
325 # returns namespace of tag name of given XML node 'node' |
|
326 |
|
327 def getNamespaceURI (self): |
|
328 return extractNamespaceAlias (self.getTagName()) |
|
329 |
|
330 |
|
331 ########################################################## |
|
332 # returns child element nodes (list) of given XML node 'node' |
|
333 # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported |
|
334 # 'namespaceAlias' has to contain corresponding namespace |
|
335 |
|
336 def getChildrenNS (self, namespaceAlias, filterTag=None): |
|
337 if not filterTag in (None, '*'): |
|
338 filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) |
|
339 return self.getChildren(filterTag) |
|
340 |
|
341 |
|
342 ########################################################## |
|
343 # returns first child element of given XML node 'node' |
|
344 # or None if there is no suitable child element |
|
345 # 'filterTag' is optional, 'filterTag' = '*' must be supported |
|
346 |
|
347 def getFirstChild (self, filterTag=None): |
|
348 children = self.getChildren(filterTag) |
|
349 if children != []: |
|
350 return children[0] |
|
351 else: |
|
352 return None |
|
353 |
|
354 |
|
355 ########################################################## |
|
356 # returns first child element of given XML node 'node' |
|
357 # or None if there is no suitable child element |
|
358 # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported |
|
359 # 'namespaceAlias' has to contain corresponding namespace |
|
360 |
|
361 def getFirstChildNS (self, namespaceAlias, filterTag=None): |
|
362 if not filterTag in (None, '*'): |
|
363 filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) |
|
364 return self.getFirstChild(filterTag) |
|
365 |
|
366 |
|
367 ########################################################## |
|
368 # returns all descendants of node whose tag match 'localName' of the given namespace |
|
369 # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported |
|
370 # 'namespaceAlias' has to contain corresponding namespace |
|
371 |
|
372 def getElementsByTagNameNS (self, namespaceAlias, filterTag=None): |
|
373 if not filterTag in (None, '*'): |
|
374 filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) |
|
375 return self.getElementsByTagName (filterTag) |
|
376 |
|
377 |
|
378 ########################################################## |
|
379 # returns attribute if it exists or default value if not |
|
380 |
|
381 def getAttributeOrDefault (self, attributeName, default): |
|
382 if self.hasAttribute (attributeName): |
|
383 return self.getAttribute (attributeName) |
|
384 else: |
|
385 return default |
|
386 |
|
387 ########################################################## |
|
388 # returns the current start line number of the element node in the XML file |
|
389 |
|
390 def getStartLineNumber (self): |
|
391 return self.element.startLineNumber |
|
392 |
|
393 |
|
394 ########################################################## |
|
395 # returns the current end line number of the element node in the XML file |
|
396 |
|
397 def getEndLineNumber (self): |
|
398 return self.element.endLineNumber |
|
399 |
|
400 |
|
401 ########################################################## |
|
402 # returns the URL of the XML file the node belongs to |
|
403 |
|
404 def getUrl (self): |
|
405 return self.element.url |
|
406 |
|
407 def getAbsUrl (self): |
|
408 return self.element.absUrl |
|
409 |
|
410 |
|
411 ########################################################## |
|
412 # returns the file path of the XML file the node belongs to |
|
413 |
|
414 def getFilePath (self): |
|
415 return self.element.filePath |
|
416 |
|
417 |
|
418 ######################################## |
|
419 # retrieve node list or attribute list for specified XPath |
|
420 |
|
421 def getXPathList (self, xPath, defaultNamespace): |
|
422 selectedNodeList = [] |
|
423 selectedAttributeList = [] |
|
424 xPathList = string.split (xPath, "|") |
|
425 for xRelPath in xPathList: |
|
426 descendantOrSelf = 0 |
|
427 if xRelPath[:3] == ".//": |
|
428 descendantOrSelf = 1 |
|
429 xRelPath = xRelPath[3:] |
|
430 # no namespaces supported! |
|
431 xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/")) |
|
432 childList = [self,] |
|
433 isAttributeList = 0 |
|
434 for localStep in xPathLocalStepList: |
|
435 stepChildList = [] |
|
436 if localStep == "": |
|
437 raise IOError ("Invalid xPath '%s'!" %(xRelPath)) |
|
438 elif localStep == ".": |
|
439 continue |
|
440 elif localStep[0] == '@': |
|
441 for childNode in childList: |
|
442 attrName = localStep[1:] |
|
443 if attrName == '*': |
|
444 stepChildList.extend(childNode.getAttributeDict().values()) |
|
445 elif childNode.hasAttribute(attrName): |
|
446 stepChildList.append (childNode.getAttribute(attrName)) |
|
447 childList = stepChildList |
|
448 isAttributeList = 1 |
|
449 else: |
|
450 if descendantOrSelf: |
|
451 descendantOrSelf = 0 |
|
452 stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep) |
|
453 else: |
|
454 for childNode in childList: |
|
455 stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep)) |
|
456 childList = stepChildList |
|
457 |
|
458 if isAttributeList: |
|
459 selectedAttributeList.extend (childList) |
|
460 else: |
|
461 selectedNodeList.extend (childList) |
|
462 return selectedNodeList, selectedAttributeList |
|
463 |
|
464 |
|
465 |