diff -r 000000000000 -r b622defdfd98 minixsv/xmlifbase.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/minixsv/xmlifbase.py Wed Jan 31 16:31:39 2007 +0100 @@ -0,0 +1,465 @@ +# +# minixsv, Release 0.3 +# file: xmlifbase.py +# +# abstract XML interface class +# +# history: +# 2004-09-09 rl created +# 2004-09-22 rl XML interface classes completely re-designed +# 2004-09-23 rl added filename and line number support +# 2004-09-29 rl URL processing added +# 2004-10-01 rl URL processing improved +# 2004-10-03 rl xPath processing moved from pyxsval.py +# 2004-10-12 rl XML text processing added +# +# Copyright (c) 2004 by Roland Leuthe. All rights reserved. +# +# -------------------------------------------------------------------- +# The minixsv XML schema validator is +# +# Copyright (c) 2004 by Roland Leuthe +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + + +import string +import os +import re +import urllib +import urlparse + + +_reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml" + + +######################################## +# define XML interface base class +# All not implemented methods have to be overloaded by the derived class!! +# + +class XmlInterfaceBase: + + def __init__(self, verbose): + self.verbose = verbose + pass + + ########################################################## + # calls the parser for 'file' + # returns the respective XML tree for the parsed XML file + # 'file' may be a file path or an URI + + def parse (self, file, baseUrl): + raise NotImplementedError + + + ########################################################## + # calls the parser for 'text' + # returns the respective XML tree for the parsed XML text string + + def parseString (self, text): + raise NotImplementedError + + + ########################################################## + # calls the parser for 'file' and processes all include directives + # returns the respective XML tree wrapper for the parsed XML file + + def extParse (self, file, baseUrl): + treeWrapper = self.parse (file, baseUrl) + return self._expandIncludes (treeWrapper) + + def _expandIncludes (self, treeWrapper): + rootNodeWrapper = treeWrapper.getRootNode() + nsAlias = self.extractNamespaceAlias(rootNodeWrapper.getTagName()) + + for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"): + includeUrl = includeNodeWrapper.getAttribute("schemaLocation") + if self.verbose: + print "including %s..." %(includeUrl) + treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl()) + rootNodeWrapper.removeChild (includeNodeWrapper) + return treeWrapper + + ########################################################## + # calls the parser for 'file' and processes all include directives + # returns the respective XML tree wrapper for the parsed XML file + + def extParseString (self, text): + treeWrapper = self.parseString (text) + return self._expandIncludes (treeWrapper) + + + ########################################################## + # returns namespace part of given 'ncName' + + def extractNamespaceAlias (self, ncName): + namespaceAlias, localName = self._splitNamespaceLocalName (ncName) + return namespaceAlias + + + ########################################################## + # returns local tag name of given 'ncName' + + def extractLocalName (self, ncName): + namespaceAlias, localName = self._splitNamespaceLocalName (ncName) + return localName + + + ########################################################## + # add namespace alias to localName + + def addNamespaceAlias (self, namespaceAlias, localName): + if namespaceAlias != "": + return namespaceAlias + localName + else: + return localName + + + ############################################################### + # PRIVATE methods + ############################################################### + + ########################################################## + # convert input parameter 'file' into a valid URL + + def _convertToUrl (self, file): + matchObject = _reSplitUrlApplication.match(file) + if matchObject: + # given file is an absolute URL + if matchObject.group(1) == 'file': + path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string + url = "file:" + path + else: + url = file + elif not os.path.isabs(file): + # given file is a relative URL + url = file + else: + # given file is not a valid URL => treated as local filename + url = "file:" + urllib.pathname2url (file) + + return url + + def _convertToAbsUrl (self, url, baseUrl): + application = urlparse.urlsplit(url)[0] + if application == '': + if baseUrl != "": + url = urlparse.urljoin (baseUrl, url) + else: + url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url)) + return url + + ########################################################## + # split 'ncName' into namespace and local name + + def _splitNamespaceLocalName (self, ncName): + namespaceAlias = None + + namespaceEndIndex = string.find (ncName, '}') + if namespaceEndIndex != -1: + namespaceAlias = ncName[:namespaceEndIndex+1] + localName = ncName[namespaceEndIndex+1:] + else: + namespaceEndIndex = string.find (ncName, ':') + if namespaceEndIndex != -1: + namespaceAlias = ncName[:namespaceEndIndex+1] + localName = ncName[namespaceEndIndex+1:] + else: + namespaceAlias = "" + localName = ncName + return namespaceAlias, localName + + + +######################################## +# define tree wrapper base class +# All not implemented methods have to be overloaded by the derived class!! +# + +class TreeWrapperBase: + + def __init__(self, xmlIf, tree): + self.xmlIf = xmlIf + self.tree = tree + + ########################################################## + # includes the given XML/XSD file 'file' into the XML tree 'tree' + # before 'nextSibling' ('nextSibling' is not removed!) + # returns the extended XML tree (containing included XML/XSD file) + # + # Note: Root tag of 'tree' and 'file' must match! + + def insertSubtree (self, nextSibling, file, baseUrl): + raise NotImplementedError + + + ########################################################## + # returns root node of given XML tree 'tree' + + def getRootNode (self): + raise NotImplementedError + + + ########################################################## + # returns 'tree' of given XML tree + + def getTree (self): + return self.tree + +######################################## +# define node wrapper base class +# All not implemented methods have to be overloaded by the derived class!! +# + +class ElementWrapperBase: + + def __init__(self, xmlIf, treeWrapper, element): + self.xmlIf = xmlIf + self.treeWrapper = treeWrapper + self.element = element + + ########################################################## + # returns tag name of given XML node 'node' + + def getTagName (self): + raise NotImplementedError + + + ########################################################## + # returns child element nodes (list) of given XML node 'node' + # 'filterTag' is optional, 'filterTag' = '*' must be supported + + def getChildren (self, filterTag=None): + raise NotImplementedError + + + ########################################################## + # returns all descendants of node whose tag match 'tagName' + # 'filterTag' is optional, 'filterTag' = '*' must be supported + + def getElementsByTagName (self, filterTag=None): + raise NotImplementedError + + + ########################################################## + # remove given child node from children of current node + + def removeChild (self, childNodeWrapper): + raise NotImplementedError + + + ########################################################## + # returns dictionary with all attributes of this node + + def getAttributeDict (self): + raise NotImplementedError + + + ########################################################## + # returns attribute value of given attributeName + # or None if there is no suitable attribute + + def getAttribute (self, attributeName): + raise NotImplementedError + + ########################################################## + # returns 1 if attribute 'attributeName' exists + # 0 if not + + def hasAttribute (self, attributeName): + raise NotImplementedError + + + ########################################################## + # sets value of attribute 'attributeName' to 'attributeValue' + # if the attribute does not yet exist, it will be created + + def setAttribute (self, attributeName, attributeValue): + raise NotImplementedError + + + ########################################################## + # returns element value of this node + + def getElementValue (self): + raise NotImplementedError + + + ########################################################## + # sets element value of this node to 'value' + + def setElementValue (self, value): + raise NotImplementedError + + + ########################################################## + # returns local tag name of given XML node 'node' (without namespace) + + def getLocalName (self): + return self.xmlIf.extractLocalName (self.getTagName()) + + + ########################################################## + # returns namespace of tag name of given XML node 'node' + + def getNamespaceURI (self): + return extractNamespaceAlias (self.getTagName()) + + + ########################################################## + # returns child element nodes (list) of given XML node 'node' + # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported + # 'namespaceAlias' has to contain corresponding namespace + + def getChildrenNS (self, namespaceAlias, filterTag=None): + if not filterTag in (None, '*'): + filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) + return self.getChildren(filterTag) + + + ########################################################## + # returns first child element of given XML node 'node' + # or None if there is no suitable child element + # 'filterTag' is optional, 'filterTag' = '*' must be supported + + def getFirstChild (self, filterTag=None): + children = self.getChildren(filterTag) + if children != []: + return children[0] + else: + return None + + + ########################################################## + # returns first child element of given XML node 'node' + # or None if there is no suitable child element + # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported + # 'namespaceAlias' has to contain corresponding namespace + + def getFirstChildNS (self, namespaceAlias, filterTag=None): + if not filterTag in (None, '*'): + filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) + return self.getFirstChild(filterTag) + + + ########################################################## + # returns all descendants of node whose tag match 'localName' of the given namespace + # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported + # 'namespaceAlias' has to contain corresponding namespace + + def getElementsByTagNameNS (self, namespaceAlias, filterTag=None): + if not filterTag in (None, '*'): + filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) + return self.getElementsByTagName (filterTag) + + + ########################################################## + # returns attribute if it exists or default value if not + + def getAttributeOrDefault (self, attributeName, default): + if self.hasAttribute (attributeName): + return self.getAttribute (attributeName) + else: + return default + + ########################################################## + # returns the current start line number of the element node in the XML file + + def getStartLineNumber (self): + return self.element.startLineNumber + + + ########################################################## + # returns the current end line number of the element node in the XML file + + def getEndLineNumber (self): + return self.element.endLineNumber + + + ########################################################## + # returns the URL of the XML file the node belongs to + + def getUrl (self): + return self.element.url + + def getAbsUrl (self): + return self.element.absUrl + + + ########################################################## + # returns the file path of the XML file the node belongs to + + def getFilePath (self): + return self.element.filePath + + + ######################################## + # retrieve node list or attribute list for specified XPath + + def getXPathList (self, xPath, defaultNamespace): + selectedNodeList = [] + selectedAttributeList = [] + xPathList = string.split (xPath, "|") + for xRelPath in xPathList: + descendantOrSelf = 0 + if xRelPath[:3] == ".//": + descendantOrSelf = 1 + xRelPath = xRelPath[3:] + # no namespaces supported! + xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/")) + childList = [self,] + isAttributeList = 0 + for localStep in xPathLocalStepList: + stepChildList = [] + if localStep == "": + raise IOError ("Invalid xPath '%s'!" %(xRelPath)) + elif localStep == ".": + continue + elif localStep[0] == '@': + for childNode in childList: + attrName = localStep[1:] + if attrName == '*': + stepChildList.extend(childNode.getAttributeDict().values()) + elif childNode.hasAttribute(attrName): + stepChildList.append (childNode.getAttribute(attrName)) + childList = stepChildList + isAttributeList = 1 + else: + if descendantOrSelf: + descendantOrSelf = 0 + stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep) + else: + for childNode in childList: + stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep)) + childList = stepChildList + + if isAttributeList: + selectedAttributeList.extend (childList) + else: + selectedNodeList.extend (childList) + return selectedNodeList, selectedAttributeList + + +