diff -r aaa80b48bead -r dcfe7c07ba1c minixsv/xmlifbase.py --- a/minixsv/xmlifbase.py Tue Jan 22 10:57:41 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,465 +0,0 @@ -# -# minixsv, Release 0.3 -# file: xmlifbase.py -# -# abstract XML interface class -# -# history: -# 2004-09-09 rl created -# 2004-09-22 rl XML interface classes completely re-designed -# 2004-09-23 rl added filename and line number support -# 2004-09-29 rl URL processing added -# 2004-10-01 rl URL processing improved -# 2004-10-03 rl xPath processing moved from pyxsval.py -# 2004-10-12 rl XML text processing added -# -# Copyright (c) 2004 by Roland Leuthe. All rights reserved. -# -# -------------------------------------------------------------------- -# The minixsv XML schema validator is -# -# Copyright (c) 2004 by Roland Leuthe -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - - -import string -import os -import re -import urllib -import urlparse - - -_reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml" - - -######################################## -# define XML interface base class -# All not implemented methods have to be overloaded by the derived class!! -# - -class XmlInterfaceBase: - - def __init__(self, verbose): - self.verbose = verbose - pass - - ########################################################## - # calls the parser for 'file' - # returns the respective XML tree for the parsed XML file - # 'file' may be a file path or an URI - - def parse (self, file, baseUrl): - raise NotImplementedError - - - ########################################################## - # calls the parser for 'text' - # returns the respective XML tree for the parsed XML text string - - def parseString (self, text): - raise NotImplementedError - - - ########################################################## - # calls the parser for 'file' and processes all include directives - # returns the respective XML tree wrapper for the parsed XML file - - def extParse (self, file, baseUrl): - treeWrapper = self.parse (file, baseUrl) - return self._expandIncludes (treeWrapper) - - def _expandIncludes (self, treeWrapper): - rootNodeWrapper = treeWrapper.getRootNode() - nsAlias = self.extractNamespaceAlias(rootNodeWrapper.getTagName()) - - for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"): - includeUrl = includeNodeWrapper.getAttribute("schemaLocation") - if self.verbose: - print "including %s..." %(includeUrl) - treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl()) - rootNodeWrapper.removeChild (includeNodeWrapper) - return treeWrapper - - ########################################################## - # calls the parser for 'file' and processes all include directives - # returns the respective XML tree wrapper for the parsed XML file - - def extParseString (self, text): - treeWrapper = self.parseString (text) - return self._expandIncludes (treeWrapper) - - - ########################################################## - # returns namespace part of given 'ncName' - - def extractNamespaceAlias (self, ncName): - namespaceAlias, localName = self._splitNamespaceLocalName (ncName) - return namespaceAlias - - - ########################################################## - # returns local tag name of given 'ncName' - - def extractLocalName (self, ncName): - namespaceAlias, localName = self._splitNamespaceLocalName (ncName) - return localName - - - ########################################################## - # add namespace alias to localName - - def addNamespaceAlias (self, namespaceAlias, localName): - if namespaceAlias != "": - return namespaceAlias + localName - else: - return localName - - - ############################################################### - # PRIVATE methods - ############################################################### - - ########################################################## - # convert input parameter 'file' into a valid URL - - def _convertToUrl (self, file): - matchObject = _reSplitUrlApplication.match(file) - if matchObject: - # given file is an absolute URL - if matchObject.group(1) == 'file': - path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string - url = "file:" + path - else: - url = file - elif not os.path.isabs(file): - # given file is a relative URL - url = file - else: - # given file is not a valid URL => treated as local filename - url = "file:" + urllib.pathname2url (file) - - return url - - def _convertToAbsUrl (self, url, baseUrl): - application = urlparse.urlsplit(url)[0] - if application == '': - if baseUrl != "": - url = urlparse.urljoin (baseUrl, url) - else: - url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url)) - return url - - ########################################################## - # split 'ncName' into namespace and local name - - def _splitNamespaceLocalName (self, ncName): - namespaceAlias = None - - namespaceEndIndex = string.find (ncName, '}') - if namespaceEndIndex != -1: - namespaceAlias = ncName[:namespaceEndIndex+1] - localName = ncName[namespaceEndIndex+1:] - else: - namespaceEndIndex = string.find (ncName, ':') - if namespaceEndIndex != -1: - namespaceAlias = ncName[:namespaceEndIndex+1] - localName = ncName[namespaceEndIndex+1:] - else: - namespaceAlias = "" - localName = ncName - return namespaceAlias, localName - - - -######################################## -# define tree wrapper base class -# All not implemented methods have to be overloaded by the derived class!! -# - -class TreeWrapperBase: - - def __init__(self, xmlIf, tree): - self.xmlIf = xmlIf - self.tree = tree - - ########################################################## - # includes the given XML/XSD file 'file' into the XML tree 'tree' - # before 'nextSibling' ('nextSibling' is not removed!) - # returns the extended XML tree (containing included XML/XSD file) - # - # Note: Root tag of 'tree' and 'file' must match! - - def insertSubtree (self, nextSibling, file, baseUrl): - raise NotImplementedError - - - ########################################################## - # returns root node of given XML tree 'tree' - - def getRootNode (self): - raise NotImplementedError - - - ########################################################## - # returns 'tree' of given XML tree - - def getTree (self): - return self.tree - -######################################## -# define node wrapper base class -# All not implemented methods have to be overloaded by the derived class!! -# - -class ElementWrapperBase: - - def __init__(self, xmlIf, treeWrapper, element): - self.xmlIf = xmlIf - self.treeWrapper = treeWrapper - self.element = element - - ########################################################## - # returns tag name of given XML node 'node' - - def getTagName (self): - raise NotImplementedError - - - ########################################################## - # returns child element nodes (list) of given XML node 'node' - # 'filterTag' is optional, 'filterTag' = '*' must be supported - - def getChildren (self, filterTag=None): - raise NotImplementedError - - - ########################################################## - # returns all descendants of node whose tag match 'tagName' - # 'filterTag' is optional, 'filterTag' = '*' must be supported - - def getElementsByTagName (self, filterTag=None): - raise NotImplementedError - - - ########################################################## - # remove given child node from children of current node - - def removeChild (self, childNodeWrapper): - raise NotImplementedError - - - ########################################################## - # returns dictionary with all attributes of this node - - def getAttributeDict (self): - raise NotImplementedError - - - ########################################################## - # returns attribute value of given attributeName - # or None if there is no suitable attribute - - def getAttribute (self, attributeName): - raise NotImplementedError - - ########################################################## - # returns 1 if attribute 'attributeName' exists - # 0 if not - - def hasAttribute (self, attributeName): - raise NotImplementedError - - - ########################################################## - # sets value of attribute 'attributeName' to 'attributeValue' - # if the attribute does not yet exist, it will be created - - def setAttribute (self, attributeName, attributeValue): - raise NotImplementedError - - - ########################################################## - # returns element value of this node - - def getElementValue (self): - raise NotImplementedError - - - ########################################################## - # sets element value of this node to 'value' - - def setElementValue (self, value): - raise NotImplementedError - - - ########################################################## - # returns local tag name of given XML node 'node' (without namespace) - - def getLocalName (self): - return self.xmlIf.extractLocalName (self.getTagName()) - - - ########################################################## - # returns namespace of tag name of given XML node 'node' - - def getNamespaceURI (self): - return extractNamespaceAlias (self.getTagName()) - - - ########################################################## - # returns child element nodes (list) of given XML node 'node' - # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported - # 'namespaceAlias' has to contain corresponding namespace - - def getChildrenNS (self, namespaceAlias, filterTag=None): - if not filterTag in (None, '*'): - filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) - return self.getChildren(filterTag) - - - ########################################################## - # returns first child element of given XML node 'node' - # or None if there is no suitable child element - # 'filterTag' is optional, 'filterTag' = '*' must be supported - - def getFirstChild (self, filterTag=None): - children = self.getChildren(filterTag) - if children != []: - return children[0] - else: - return None - - - ########################################################## - # returns first child element of given XML node 'node' - # or None if there is no suitable child element - # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported - # 'namespaceAlias' has to contain corresponding namespace - - def getFirstChildNS (self, namespaceAlias, filterTag=None): - if not filterTag in (None, '*'): - filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) - return self.getFirstChild(filterTag) - - - ########################################################## - # returns all descendants of node whose tag match 'localName' of the given namespace - # 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported - # 'namespaceAlias' has to contain corresponding namespace - - def getElementsByTagNameNS (self, namespaceAlias, filterTag=None): - if not filterTag in (None, '*'): - filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag) - return self.getElementsByTagName (filterTag) - - - ########################################################## - # returns attribute if it exists or default value if not - - def getAttributeOrDefault (self, attributeName, default): - if self.hasAttribute (attributeName): - return self.getAttribute (attributeName) - else: - return default - - ########################################################## - # returns the current start line number of the element node in the XML file - - def getStartLineNumber (self): - return self.element.startLineNumber - - - ########################################################## - # returns the current end line number of the element node in the XML file - - def getEndLineNumber (self): - return self.element.endLineNumber - - - ########################################################## - # returns the URL of the XML file the node belongs to - - def getUrl (self): - return self.element.url - - def getAbsUrl (self): - return self.element.absUrl - - - ########################################################## - # returns the file path of the XML file the node belongs to - - def getFilePath (self): - return self.element.filePath - - - ######################################## - # retrieve node list or attribute list for specified XPath - - def getXPathList (self, xPath, defaultNamespace): - selectedNodeList = [] - selectedAttributeList = [] - xPathList = string.split (xPath, "|") - for xRelPath in xPathList: - descendantOrSelf = 0 - if xRelPath[:3] == ".//": - descendantOrSelf = 1 - xRelPath = xRelPath[3:] - # no namespaces supported! - xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/")) - childList = [self,] - isAttributeList = 0 - for localStep in xPathLocalStepList: - stepChildList = [] - if localStep == "": - raise IOError ("Invalid xPath '%s'!" %(xRelPath)) - elif localStep == ".": - continue - elif localStep[0] == '@': - for childNode in childList: - attrName = localStep[1:] - if attrName == '*': - stepChildList.extend(childNode.getAttributeDict().values()) - elif childNode.hasAttribute(attrName): - stepChildList.append (childNode.getAttribute(attrName)) - childList = stepChildList - isAttributeList = 1 - else: - if descendantOrSelf: - descendantOrSelf = 0 - stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep) - else: - for childNode in childList: - stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep)) - childList = stepChildList - - if isAttributeList: - selectedAttributeList.extend (childList) - else: - selectedNodeList.extend (childList) - return selectedNodeList, selectedAttributeList - - -