--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/minixsv/xmlifbase.py Wed Jan 31 16:31:39 2007 +0100
@@ -0,0 +1,465 @@
+#
+# minixsv, Release 0.3
+# file: xmlifbase.py
+#
+# abstract XML interface class
+#
+# history:
+# 2004-09-09 rl created
+# 2004-09-22 rl XML interface classes completely re-designed
+# 2004-09-23 rl added filename and line number support
+# 2004-09-29 rl URL processing added
+# 2004-10-01 rl URL processing improved
+# 2004-10-03 rl xPath processing moved from pyxsval.py
+# 2004-10-12 rl XML text processing added
+#
+# Copyright (c) 2004 by Roland Leuthe. All rights reserved.
+#
+# --------------------------------------------------------------------
+# The minixsv XML schema validator is
+#
+# Copyright (c) 2004 by Roland Leuthe
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+
+import string
+import os
+import re
+import urllib
+import urlparse
+
+
+_reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml"
+
+
+########################################
+# define XML interface base class
+# All not implemented methods have to be overloaded by the derived class!!
+#
+
+class XmlInterfaceBase:
+
+ def __init__(self, verbose):
+ self.verbose = verbose
+ pass
+
+ ##########################################################
+ # calls the parser for 'file'
+ # returns the respective XML tree for the parsed XML file
+ # 'file' may be a file path or an URI
+
+ def parse (self, file, baseUrl):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # calls the parser for 'text'
+ # returns the respective XML tree for the parsed XML text string
+
+ def parseString (self, text):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # calls the parser for 'file' and processes all include directives
+ # returns the respective XML tree wrapper for the parsed XML file
+
+ def extParse (self, file, baseUrl):
+ treeWrapper = self.parse (file, baseUrl)
+ return self._expandIncludes (treeWrapper)
+
+ def _expandIncludes (self, treeWrapper):
+ rootNodeWrapper = treeWrapper.getRootNode()
+ nsAlias = self.extractNamespaceAlias(rootNodeWrapper.getTagName())
+
+ for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"):
+ includeUrl = includeNodeWrapper.getAttribute("schemaLocation")
+ if self.verbose:
+ print "including %s..." %(includeUrl)
+ treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl())
+ rootNodeWrapper.removeChild (includeNodeWrapper)
+ return treeWrapper
+
+ ##########################################################
+ # calls the parser for 'file' and processes all include directives
+ # returns the respective XML tree wrapper for the parsed XML file
+
+ def extParseString (self, text):
+ treeWrapper = self.parseString (text)
+ return self._expandIncludes (treeWrapper)
+
+
+ ##########################################################
+ # returns namespace part of given 'ncName'
+
+ def extractNamespaceAlias (self, ncName):
+ namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
+ return namespaceAlias
+
+
+ ##########################################################
+ # returns local tag name of given 'ncName'
+
+ def extractLocalName (self, ncName):
+ namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
+ return localName
+
+
+ ##########################################################
+ # add namespace alias to localName
+
+ def addNamespaceAlias (self, namespaceAlias, localName):
+ if namespaceAlias != "":
+ return namespaceAlias + localName
+ else:
+ return localName
+
+
+ ###############################################################
+ # PRIVATE methods
+ ###############################################################
+
+ ##########################################################
+ # convert input parameter 'file' into a valid URL
+
+ def _convertToUrl (self, file):
+ matchObject = _reSplitUrlApplication.match(file)
+ if matchObject:
+ # given file is an absolute URL
+ if matchObject.group(1) == 'file':
+ path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string
+ url = "file:" + path
+ else:
+ url = file
+ elif not os.path.isabs(file):
+ # given file is a relative URL
+ url = file
+ else:
+ # given file is not a valid URL => treated as local filename
+ url = "file:" + urllib.pathname2url (file)
+
+ return url
+
+ def _convertToAbsUrl (self, url, baseUrl):
+ application = urlparse.urlsplit(url)[0]
+ if application == '':
+ if baseUrl != "":
+ url = urlparse.urljoin (baseUrl, url)
+ else:
+ url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url))
+ return url
+
+ ##########################################################
+ # split 'ncName' into namespace and local name
+
+ def _splitNamespaceLocalName (self, ncName):
+ namespaceAlias = None
+
+ namespaceEndIndex = string.find (ncName, '}')
+ if namespaceEndIndex != -1:
+ namespaceAlias = ncName[:namespaceEndIndex+1]
+ localName = ncName[namespaceEndIndex+1:]
+ else:
+ namespaceEndIndex = string.find (ncName, ':')
+ if namespaceEndIndex != -1:
+ namespaceAlias = ncName[:namespaceEndIndex+1]
+ localName = ncName[namespaceEndIndex+1:]
+ else:
+ namespaceAlias = ""
+ localName = ncName
+ return namespaceAlias, localName
+
+
+
+########################################
+# define tree wrapper base class
+# All not implemented methods have to be overloaded by the derived class!!
+#
+
+class TreeWrapperBase:
+
+ def __init__(self, xmlIf, tree):
+ self.xmlIf = xmlIf
+ self.tree = tree
+
+ ##########################################################
+ # includes the given XML/XSD file 'file' into the XML tree 'tree'
+ # before 'nextSibling' ('nextSibling' is not removed!)
+ # returns the extended XML tree (containing included XML/XSD file)
+ #
+ # Note: Root tag of 'tree' and 'file' must match!
+
+ def insertSubtree (self, nextSibling, file, baseUrl):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns root node of given XML tree 'tree'
+
+ def getRootNode (self):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns 'tree' of given XML tree
+
+ def getTree (self):
+ return self.tree
+
+########################################
+# define node wrapper base class
+# All not implemented methods have to be overloaded by the derived class!!
+#
+
+class ElementWrapperBase:
+
+ def __init__(self, xmlIf, treeWrapper, element):
+ self.xmlIf = xmlIf
+ self.treeWrapper = treeWrapper
+ self.element = element
+
+ ##########################################################
+ # returns tag name of given XML node 'node'
+
+ def getTagName (self):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns child element nodes (list) of given XML node 'node'
+ # 'filterTag' is optional, 'filterTag' = '*' must be supported
+
+ def getChildren (self, filterTag=None):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns all descendants of node whose tag match 'tagName'
+ # 'filterTag' is optional, 'filterTag' = '*' must be supported
+
+ def getElementsByTagName (self, filterTag=None):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # remove given child node from children of current node
+
+ def removeChild (self, childNodeWrapper):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns dictionary with all attributes of this node
+
+ def getAttributeDict (self):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns attribute value of given attributeName
+ # or None if there is no suitable attribute
+
+ def getAttribute (self, attributeName):
+ raise NotImplementedError
+
+ ##########################################################
+ # returns 1 if attribute 'attributeName' exists
+ # 0 if not
+
+ def hasAttribute (self, attributeName):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # sets value of attribute 'attributeName' to 'attributeValue'
+ # if the attribute does not yet exist, it will be created
+
+ def setAttribute (self, attributeName, attributeValue):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns element value of this node
+
+ def getElementValue (self):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # sets element value of this node to 'value'
+
+ def setElementValue (self, value):
+ raise NotImplementedError
+
+
+ ##########################################################
+ # returns local tag name of given XML node 'node' (without namespace)
+
+ def getLocalName (self):
+ return self.xmlIf.extractLocalName (self.getTagName())
+
+
+ ##########################################################
+ # returns namespace of tag name of given XML node 'node'
+
+ def getNamespaceURI (self):
+ return extractNamespaceAlias (self.getTagName())
+
+
+ ##########################################################
+ # returns child element nodes (list) of given XML node 'node'
+ # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
+ # 'namespaceAlias' has to contain corresponding namespace
+
+ def getChildrenNS (self, namespaceAlias, filterTag=None):
+ if not filterTag in (None, '*'):
+ filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
+ return self.getChildren(filterTag)
+
+
+ ##########################################################
+ # returns first child element of given XML node 'node'
+ # or None if there is no suitable child element
+ # 'filterTag' is optional, 'filterTag' = '*' must be supported
+
+ def getFirstChild (self, filterTag=None):
+ children = self.getChildren(filterTag)
+ if children != []:
+ return children[0]
+ else:
+ return None
+
+
+ ##########################################################
+ # returns first child element of given XML node 'node'
+ # or None if there is no suitable child element
+ # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
+ # 'namespaceAlias' has to contain corresponding namespace
+
+ def getFirstChildNS (self, namespaceAlias, filterTag=None):
+ if not filterTag in (None, '*'):
+ filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
+ return self.getFirstChild(filterTag)
+
+
+ ##########################################################
+ # returns all descendants of node whose tag match 'localName' of the given namespace
+ # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
+ # 'namespaceAlias' has to contain corresponding namespace
+
+ def getElementsByTagNameNS (self, namespaceAlias, filterTag=None):
+ if not filterTag in (None, '*'):
+ filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
+ return self.getElementsByTagName (filterTag)
+
+
+ ##########################################################
+ # returns attribute if it exists or default value if not
+
+ def getAttributeOrDefault (self, attributeName, default):
+ if self.hasAttribute (attributeName):
+ return self.getAttribute (attributeName)
+ else:
+ return default
+
+ ##########################################################
+ # returns the current start line number of the element node in the XML file
+
+ def getStartLineNumber (self):
+ return self.element.startLineNumber
+
+
+ ##########################################################
+ # returns the current end line number of the element node in the XML file
+
+ def getEndLineNumber (self):
+ return self.element.endLineNumber
+
+
+ ##########################################################
+ # returns the URL of the XML file the node belongs to
+
+ def getUrl (self):
+ return self.element.url
+
+ def getAbsUrl (self):
+ return self.element.absUrl
+
+
+ ##########################################################
+ # returns the file path of the XML file the node belongs to
+
+ def getFilePath (self):
+ return self.element.filePath
+
+
+ ########################################
+ # retrieve node list or attribute list for specified XPath
+
+ def getXPathList (self, xPath, defaultNamespace):
+ selectedNodeList = []
+ selectedAttributeList = []
+ xPathList = string.split (xPath, "|")
+ for xRelPath in xPathList:
+ descendantOrSelf = 0
+ if xRelPath[:3] == ".//":
+ descendantOrSelf = 1
+ xRelPath = xRelPath[3:]
+ # no namespaces supported!
+ xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/"))
+ childList = [self,]
+ isAttributeList = 0
+ for localStep in xPathLocalStepList:
+ stepChildList = []
+ if localStep == "":
+ raise IOError ("Invalid xPath '%s'!" %(xRelPath))
+ elif localStep == ".":
+ continue
+ elif localStep[0] == '@':
+ for childNode in childList:
+ attrName = localStep[1:]
+ if attrName == '*':
+ stepChildList.extend(childNode.getAttributeDict().values())
+ elif childNode.hasAttribute(attrName):
+ stepChildList.append (childNode.getAttribute(attrName))
+ childList = stepChildList
+ isAttributeList = 1
+ else:
+ if descendantOrSelf:
+ descendantOrSelf = 0
+ stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep)
+ else:
+ for childNode in childList:
+ stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep))
+ childList = stepChildList
+
+ if isAttributeList:
+ selectedAttributeList.extend (childList)
+ else:
+ selectedNodeList.extend (childList)
+ return selectedNodeList, selectedAttributeList
+
+
+