etisserant@0: # etisserant@0: # minixsv, Release 0.3 etisserant@0: # file: xmlifbase.py etisserant@0: # etisserant@0: # abstract XML interface class etisserant@0: # etisserant@0: # history: etisserant@0: # 2004-09-09 rl created etisserant@0: # 2004-09-22 rl XML interface classes completely re-designed etisserant@0: # 2004-09-23 rl added filename and line number support etisserant@0: # 2004-09-29 rl URL processing added etisserant@0: # 2004-10-01 rl URL processing improved etisserant@0: # 2004-10-03 rl xPath processing moved from pyxsval.py etisserant@0: # 2004-10-12 rl XML text processing added etisserant@0: # etisserant@0: # Copyright (c) 2004 by Roland Leuthe. All rights reserved. etisserant@0: # etisserant@0: # -------------------------------------------------------------------- etisserant@0: # The minixsv XML schema validator is etisserant@0: # etisserant@0: # Copyright (c) 2004 by Roland Leuthe etisserant@0: # etisserant@0: # By obtaining, using, and/or copying this software and/or its etisserant@0: # associated documentation, you agree that you have read, understood, etisserant@0: # and will comply with the following terms and conditions: etisserant@0: # etisserant@0: # Permission to use, copy, modify, and distribute this software and etisserant@0: # its associated documentation for any purpose and without fee is etisserant@0: # hereby granted, provided that the above copyright notice appears in etisserant@0: # all copies, and that both that copyright notice and this permission etisserant@0: # notice appear in supporting documentation, and that the name of etisserant@0: # the author not be used in advertising or publicity etisserant@0: # pertaining to distribution of the software without specific, written etisserant@0: # prior permission. etisserant@0: # etisserant@0: # THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD etisserant@0: # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- etisserant@0: # ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR etisserant@0: # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY etisserant@0: # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, etisserant@0: # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS etisserant@0: # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE etisserant@0: # OF THIS SOFTWARE. etisserant@0: # -------------------------------------------------------------------- etisserant@0: etisserant@0: etisserant@0: import string etisserant@0: import os etisserant@0: import re etisserant@0: import urllib etisserant@0: import urlparse etisserant@0: etisserant@0: etisserant@0: _reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml" etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # define XML interface base class etisserant@0: # All not implemented methods have to be overloaded by the derived class!! etisserant@0: # etisserant@0: etisserant@0: class XmlInterfaceBase: etisserant@0: etisserant@0: def __init__(self, verbose): etisserant@0: self.verbose = verbose etisserant@0: pass etisserant@0: etisserant@0: ########################################################## etisserant@0: # calls the parser for 'file' etisserant@0: # returns the respective XML tree for the parsed XML file etisserant@0: # 'file' may be a file path or an URI etisserant@0: etisserant@0: def parse (self, file, baseUrl): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # calls the parser for 'text' etisserant@0: # returns the respective XML tree for the parsed XML text string etisserant@0: etisserant@0: def parseString (self, text): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # calls the parser for 'file' and processes all include directives etisserant@0: # returns the respective XML tree wrapper for the parsed XML file etisserant@0: etisserant@0: def extParse (self, file, baseUrl): etisserant@0: treeWrapper = self.parse (file, baseUrl) etisserant@0: return self._expandIncludes (treeWrapper) etisserant@0: etisserant@0: def _expandIncludes (self, treeWrapper): etisserant@0: rootNodeWrapper = treeWrapper.getRootNode() etisserant@0: nsAlias = self.extractNamespaceAlias(rootNodeWrapper.getTagName()) etisserant@0: etisserant@0: for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"): etisserant@0: includeUrl = includeNodeWrapper.getAttribute("schemaLocation") etisserant@0: if self.verbose: etisserant@0: print "including %s..." %(includeUrl) etisserant@0: treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl()) etisserant@0: rootNodeWrapper.removeChild (includeNodeWrapper) etisserant@0: return treeWrapper etisserant@0: etisserant@0: ########################################################## etisserant@0: # calls the parser for 'file' and processes all include directives etisserant@0: # returns the respective XML tree wrapper for the parsed XML file etisserant@0: etisserant@0: def extParseString (self, text): etisserant@0: treeWrapper = self.parseString (text) etisserant@0: return self._expandIncludes (treeWrapper) etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns namespace part of given 'ncName' etisserant@0: etisserant@0: def extractNamespaceAlias (self, ncName): etisserant@0: namespaceAlias, localName = self._splitNamespaceLocalName (ncName) etisserant@0: return namespaceAlias etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns local tag name of given 'ncName' etisserant@0: etisserant@0: def extractLocalName (self, ncName): etisserant@0: namespaceAlias, localName = self._splitNamespaceLocalName (ncName) etisserant@0: return localName etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # add namespace alias to localName etisserant@0: etisserant@0: def addNamespaceAlias (self, namespaceAlias, localName): etisserant@0: if namespaceAlias != "": etisserant@0: return namespaceAlias + localName etisserant@0: else: etisserant@0: return localName etisserant@0: etisserant@0: etisserant@0: ############################################################### etisserant@0: # PRIVATE methods etisserant@0: ############################################################### etisserant@0: etisserant@0: ########################################################## etisserant@0: # convert input parameter 'file' into a valid URL etisserant@0: etisserant@0: def _convertToUrl (self, file): etisserant@0: matchObject = _reSplitUrlApplication.match(file) etisserant@0: if matchObject: etisserant@0: # given file is an absolute URL etisserant@0: if matchObject.group(1) == 'file': etisserant@0: path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string etisserant@0: url = "file:" + path etisserant@0: else: etisserant@0: url = file etisserant@0: elif not os.path.isabs(file): etisserant@0: # given file is a relative URL etisserant@0: url = file etisserant@0: else: etisserant@0: # given file is not a valid URL => treated as local filename etisserant@0: url = "file:" + urllib.pathname2url (file) etisserant@0: etisserant@0: return url etisserant@0: etisserant@0: def _convertToAbsUrl (self, url, baseUrl): etisserant@0: application = urlparse.urlsplit(url)[0] etisserant@0: if application == '': etisserant@0: if baseUrl != "": etisserant@0: url = urlparse.urljoin (baseUrl, url) etisserant@0: else: etisserant@0: url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url)) etisserant@0: return url etisserant@0: etisserant@0: ########################################################## etisserant@0: # split 'ncName' into namespace and local name etisserant@0: etisserant@0: def _splitNamespaceLocalName (self, ncName): etisserant@0: namespaceAlias = None etisserant@0: etisserant@0: namespaceEndIndex = string.find (ncName, '}') etisserant@0: if namespaceEndIndex != -1: etisserant@0: namespaceAlias = ncName[:namespaceEndIndex+1] etisserant@0: localName = ncName[namespaceEndIndex+1:] etisserant@0: else: etisserant@0: namespaceEndIndex = string.find (ncName, ':') etisserant@0: if namespaceEndIndex != -1: etisserant@0: namespaceAlias = ncName[:namespaceEndIndex+1] etisserant@0: localName = ncName[namespaceEndIndex+1:] etisserant@0: else: etisserant@0: namespaceAlias = "" etisserant@0: localName = ncName etisserant@0: return namespaceAlias, localName etisserant@0: etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # define tree wrapper base class etisserant@0: # All not implemented methods have to be overloaded by the derived class!! etisserant@0: # etisserant@0: etisserant@0: class TreeWrapperBase: etisserant@0: etisserant@0: def __init__(self, xmlIf, tree): etisserant@0: self.xmlIf = xmlIf etisserant@0: self.tree = tree etisserant@0: etisserant@0: ########################################################## etisserant@0: # includes the given XML/XSD file 'file' into the XML tree 'tree' etisserant@0: # before 'nextSibling' ('nextSibling' is not removed!) etisserant@0: # returns the extended XML tree (containing included XML/XSD file) etisserant@0: # etisserant@0: # Note: Root tag of 'tree' and 'file' must match! etisserant@0: etisserant@0: def insertSubtree (self, nextSibling, file, baseUrl): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns root node of given XML tree 'tree' etisserant@0: etisserant@0: def getRootNode (self): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns 'tree' of given XML tree etisserant@0: etisserant@0: def getTree (self): etisserant@0: return self.tree etisserant@0: etisserant@0: ######################################## etisserant@0: # define node wrapper base class etisserant@0: # All not implemented methods have to be overloaded by the derived class!! etisserant@0: # etisserant@0: etisserant@0: class ElementWrapperBase: etisserant@0: etisserant@0: def __init__(self, xmlIf, treeWrapper, element): etisserant@0: self.xmlIf = xmlIf etisserant@0: self.treeWrapper = treeWrapper etisserant@0: self.element = element etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns tag name of given XML node 'node' etisserant@0: etisserant@0: def getTagName (self): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns child element nodes (list) of given XML node 'node' etisserant@0: # 'filterTag' is optional, 'filterTag' = '*' must be supported etisserant@0: etisserant@0: def getChildren (self, filterTag=None): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns all descendants of node whose tag match 'tagName' etisserant@0: # 'filterTag' is optional, 'filterTag' = '*' must be supported etisserant@0: etisserant@0: def getElementsByTagName (self, filterTag=None): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # remove given child node from children of current node etisserant@0: etisserant@0: def removeChild (self, childNodeWrapper): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns dictionary with all attributes of this node etisserant@0: etisserant@0: def getAttributeDict (self): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns attribute value of given attributeName etisserant@0: # or None if there is no suitable attribute etisserant@0: etisserant@0: def getAttribute (self, attributeName): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns 1 if attribute 'attributeName' exists etisserant@0: # 0 if not etisserant@0: etisserant@0: def hasAttribute (self, attributeName): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # sets value of attribute 'attributeName' to 'attributeValue' etisserant@0: # if the attribute does not yet exist, it will be created etisserant@0: etisserant@0: def setAttribute (self, attributeName, attributeValue): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns element value of this node etisserant@0: etisserant@0: def getElementValue (self): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # sets element value of this node to 'value' etisserant@0: etisserant@0: def setElementValue (self, value): etisserant@0: raise NotImplementedError etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns local tag name of given XML node 'node' (without namespace) etisserant@0: etisserant@0: def getLocalName (self): etisserant@0: return self.xmlIf.extractLocalName (self.getTagName()) etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns namespace of tag name of given XML node 'node' etisserant@0: etisserant@0: def getNamespaceURI (self): etisserant@0: return extractNamespaceAlias (self.getTagName()) etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns child element nodes (list) of given XML node 'node' etisserant@0: # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported etisserant@0: # 'namespaceAlias' has to contain corresponding namespace etisserant@0: etisserant@0: def getChildrenNS (self, namespaceAlias, filterTag=None): etisserant@0: if not filterTag in (None, '*'): etisserant@0: filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) etisserant@0: return self.getChildren(filterTag) etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns first child element of given XML node 'node' etisserant@0: # or None if there is no suitable child element etisserant@0: # 'filterTag' is optional, 'filterTag' = '*' must be supported etisserant@0: etisserant@0: def getFirstChild (self, filterTag=None): etisserant@0: children = self.getChildren(filterTag) etisserant@0: if children != []: etisserant@0: return children[0] etisserant@0: else: etisserant@0: return None etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns first child element of given XML node 'node' etisserant@0: # or None if there is no suitable child element etisserant@0: # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported etisserant@0: # 'namespaceAlias' has to contain corresponding namespace etisserant@0: etisserant@0: def getFirstChildNS (self, namespaceAlias, filterTag=None): etisserant@0: if not filterTag in (None, '*'): etisserant@0: filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) etisserant@0: return self.getFirstChild(filterTag) etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns all descendants of node whose tag match 'localName' of the given namespace etisserant@0: # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported etisserant@0: # 'namespaceAlias' has to contain corresponding namespace etisserant@0: etisserant@0: def getElementsByTagNameNS (self, namespaceAlias, filterTag=None): etisserant@0: if not filterTag in (None, '*'): etisserant@0: filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) etisserant@0: return self.getElementsByTagName (filterTag) etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns attribute if it exists or default value if not etisserant@0: etisserant@0: def getAttributeOrDefault (self, attributeName, default): etisserant@0: if self.hasAttribute (attributeName): etisserant@0: return self.getAttribute (attributeName) etisserant@0: else: etisserant@0: return default etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns the current start line number of the element node in the XML file etisserant@0: etisserant@0: def getStartLineNumber (self): etisserant@0: return self.element.startLineNumber etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns the current end line number of the element node in the XML file etisserant@0: etisserant@0: def getEndLineNumber (self): etisserant@0: return self.element.endLineNumber etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns the URL of the XML file the node belongs to etisserant@0: etisserant@0: def getUrl (self): etisserant@0: return self.element.url etisserant@0: etisserant@0: def getAbsUrl (self): etisserant@0: return self.element.absUrl etisserant@0: etisserant@0: etisserant@0: ########################################################## etisserant@0: # returns the file path of the XML file the node belongs to etisserant@0: etisserant@0: def getFilePath (self): etisserant@0: return self.element.filePath etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # retrieve node list or attribute list for specified XPath etisserant@0: etisserant@0: def getXPathList (self, xPath, defaultNamespace): etisserant@0: selectedNodeList = [] etisserant@0: selectedAttributeList = [] etisserant@0: xPathList = string.split (xPath, "|") etisserant@0: for xRelPath in xPathList: etisserant@0: descendantOrSelf = 0 etisserant@0: if xRelPath[:3] == ".//": etisserant@0: descendantOrSelf = 1 etisserant@0: xRelPath = xRelPath[3:] etisserant@0: # no namespaces supported! etisserant@0: xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/")) etisserant@0: childList = [self,] etisserant@0: isAttributeList = 0 etisserant@0: for localStep in xPathLocalStepList: etisserant@0: stepChildList = [] etisserant@0: if localStep == "": etisserant@0: raise IOError ("Invalid xPath '%s'!" %(xRelPath)) etisserant@0: elif localStep == ".": etisserant@0: continue etisserant@0: elif localStep[0] == '@': etisserant@0: for childNode in childList: etisserant@0: attrName = localStep[1:] etisserant@0: if attrName == '*': etisserant@0: stepChildList.extend(childNode.getAttributeDict().values()) etisserant@0: elif childNode.hasAttribute(attrName): etisserant@0: stepChildList.append (childNode.getAttribute(attrName)) etisserant@0: childList = stepChildList etisserant@0: isAttributeList = 1 etisserant@0: else: etisserant@0: if descendantOrSelf: etisserant@0: descendantOrSelf = 0 etisserant@0: stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep) etisserant@0: else: etisserant@0: for childNode in childList: etisserant@0: stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep)) etisserant@0: childList = stepChildList etisserant@0: etisserant@0: if isAttributeList: etisserant@0: selectedAttributeList.extend (childList) etisserant@0: else: etisserant@0: selectedNodeList.extend (childList) etisserant@0: return selectedNodeList, selectedAttributeList etisserant@0: etisserant@0: etisserant@0: