minixsv/xmlifbase.py
changeset 0 b622defdfd98
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/minixsv/xmlifbase.py	Wed Jan 31 16:31:39 2007 +0100
@@ -0,0 +1,465 @@
+#
+# minixsv, Release 0.3
+# file: xmlifbase.py
+#
+# abstract XML interface class
+#
+# history:
+# 2004-09-09 rl   created
+# 2004-09-22 rl   XML interface classes completely re-designed
+# 2004-09-23 rl   added filename and line number support
+# 2004-09-29 rl   URL processing added
+# 2004-10-01 rl   URL processing improved
+# 2004-10-03 rl   xPath processing moved from pyxsval.py
+# 2004-10-12 rl   XML text processing added
+#
+# Copyright (c) 2004 by Roland Leuthe.  All rights reserved.
+#
+# --------------------------------------------------------------------
+# The minixsv XML schema validator is
+#
+# Copyright (c) 2004 by Roland Leuthe
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS.  IN NO EVENT SHALL THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+
+import string
+import os
+import re
+import urllib
+import urlparse
+
+
+_reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml"
+
+
+########################################
+# define XML interface base class
+# All not implemented methods have to be overloaded by the derived class!!
+#
+
+class XmlInterfaceBase:
+
+    def __init__(self, verbose):
+        self.verbose = verbose
+        pass
+
+    ##########################################################
+    #  calls the parser for 'file'
+    #  returns the respective XML tree for the parsed XML file
+    #  'file' may be a file path or an URI
+
+    def parse (self, file, baseUrl):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  calls the parser for 'text'
+    #  returns the respective XML tree for the parsed XML text string
+
+    def parseString (self, text):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  calls the parser for 'file' and processes all include directives
+    #  returns the respective XML tree wrapper for the parsed XML file
+
+    def extParse (self, file, baseUrl):
+        treeWrapper = self.parse (file, baseUrl)
+        return self._expandIncludes (treeWrapper)
+
+    def _expandIncludes (self, treeWrapper):
+        rootNodeWrapper = treeWrapper.getRootNode()
+        nsAlias  = self.extractNamespaceAlias(rootNodeWrapper.getTagName())
+
+        for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"):
+            includeUrl = includeNodeWrapper.getAttribute("schemaLocation")
+            if self.verbose:
+                print "including %s..." %(includeUrl)
+            treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl())
+            rootNodeWrapper.removeChild (includeNodeWrapper)
+        return treeWrapper
+
+    ##########################################################
+    #  calls the parser for 'file' and processes all include directives
+    #  returns the respective XML tree wrapper for the parsed XML file
+
+    def extParseString (self, text):
+        treeWrapper = self.parseString (text)
+        return self._expandIncludes (treeWrapper)
+
+
+    ##########################################################
+    #  returns namespace part of given 'ncName'
+
+    def extractNamespaceAlias (self, ncName):
+        namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
+        return namespaceAlias
+
+
+    ##########################################################
+    #  returns local tag name of given 'ncName'
+
+    def extractLocalName (self, ncName):
+        namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
+        return localName
+
+
+    ##########################################################
+    #  add namespace alias to localName
+
+    def addNamespaceAlias (self, namespaceAlias, localName):
+        if namespaceAlias != "":
+            return namespaceAlias + localName
+        else:
+            return localName
+
+
+    ###############################################################
+    # PRIVATE methods
+    ###############################################################
+
+    ##########################################################
+    #  convert input parameter 'file' into a valid URL
+
+    def _convertToUrl (self, file):
+        matchObject = _reSplitUrlApplication.match(file)
+        if matchObject:
+            # given file is an absolute URL
+            if matchObject.group(1) == 'file':
+                path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string
+                url = "file:" + path
+            else:
+                url = file
+        elif not os.path.isabs(file):
+            # given file is a relative URL
+            url = file
+        else:
+            # given file is not a valid URL => treated as local filename
+            url = "file:" + urllib.pathname2url (file)
+
+        return url
+
+    def _convertToAbsUrl (self, url, baseUrl):
+        application = urlparse.urlsplit(url)[0]
+        if application == '':
+            if baseUrl != "":
+                url = urlparse.urljoin (baseUrl, url)
+            else:
+                url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url))
+        return url
+
+    ##########################################################
+    #  split 'ncName' into namespace and local name
+
+    def _splitNamespaceLocalName (self, ncName):
+        namespaceAlias = None
+
+        namespaceEndIndex = string.find (ncName, '}')
+        if namespaceEndIndex != -1:
+            namespaceAlias = ncName[:namespaceEndIndex+1]
+            localName      = ncName[namespaceEndIndex+1:]
+        else:
+            namespaceEndIndex = string.find (ncName, ':')
+            if namespaceEndIndex != -1:
+                namespaceAlias = ncName[:namespaceEndIndex+1]
+                localName      = ncName[namespaceEndIndex+1:]
+            else:
+                namespaceAlias = ""
+                localName      = ncName
+        return namespaceAlias, localName
+
+
+
+########################################
+# define tree wrapper base class
+# All not implemented methods have to be overloaded by the derived class!!
+#
+
+class TreeWrapperBase:
+
+    def __init__(self, xmlIf, tree):
+        self.xmlIf        = xmlIf
+        self.tree         = tree
+
+    ##########################################################
+    #  includes the given XML/XSD file 'file' into the XML tree 'tree'
+    #  before 'nextSibling' ('nextSibling' is not removed!)
+    #  returns the extended XML tree (containing included XML/XSD file)
+    #
+    #  Note: Root tag of 'tree' and 'file' must match!
+
+    def insertSubtree (self, nextSibling, file, baseUrl):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns root node of given XML tree 'tree'
+
+    def getRootNode (self):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns 'tree' of given XML tree
+
+    def getTree (self):
+        return self.tree
+
+########################################
+# define node wrapper base class
+# All not implemented methods have to be overloaded by the derived class!!
+#
+
+class ElementWrapperBase:
+
+    def __init__(self, xmlIf, treeWrapper, element):
+        self.xmlIf           = xmlIf
+        self.treeWrapper     = treeWrapper
+        self.element         = element
+
+    ##########################################################
+    #  returns tag name of given XML node 'node'
+
+    def getTagName (self):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns child element nodes (list) of given XML node 'node'
+    #  'filterTag' is optional, 'filterTag' = '*' must be supported
+
+    def getChildren (self, filterTag=None):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns all descendants of node whose tag match 'tagName'
+    #  'filterTag' is optional, 'filterTag' = '*' must be supported
+
+    def getElementsByTagName (self, filterTag=None):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  remove given child node from children of current node
+
+    def removeChild (self, childNodeWrapper):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns dictionary with all attributes of this node
+
+    def getAttributeDict (self):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns attribute value of given attributeName
+    #  or None if there is no suitable attribute
+
+    def getAttribute (self, attributeName):
+        raise NotImplementedError
+
+    ##########################################################
+    #  returns 1 if attribute 'attributeName' exists
+    #          0 if not
+
+    def hasAttribute (self, attributeName):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  sets value of attribute 'attributeName' to 'attributeValue'
+    #  if the attribute does not yet exist, it will be created
+
+    def setAttribute (self, attributeName, attributeValue):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns element value of this node
+
+    def getElementValue (self):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  sets element value of this node to 'value'
+
+    def setElementValue (self, value):
+        raise NotImplementedError
+
+
+    ##########################################################
+    #  returns local tag name of given XML node 'node' (without namespace)
+
+    def getLocalName (self):
+        return self.xmlIf.extractLocalName (self.getTagName())
+
+
+    ##########################################################
+    #  returns namespace of tag name of given XML node 'node'
+
+    def getNamespaceURI (self):
+        return extractNamespaceAlias (self.getTagName())
+
+
+    ##########################################################
+    #  returns child element nodes (list) of given XML node 'node'
+    #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
+    #  'namespaceAlias' has to contain corresponding namespace
+
+    def getChildrenNS (self, namespaceAlias, filterTag=None):
+        if not filterTag in (None, '*'):
+            filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
+        return self.getChildren(filterTag)
+
+
+    ##########################################################
+    #  returns first child element of given XML node 'node'
+    #  or None if there is no suitable child element
+    #  'filterTag' is optional, 'filterTag' = '*' must be supported
+
+    def getFirstChild (self, filterTag=None):
+        children = self.getChildren(filterTag)
+        if children != []:
+            return children[0]
+        else:
+            return None
+
+
+    ##########################################################
+    #  returns first child element of given XML node 'node'
+    #  or None if there is no suitable child element
+    #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
+    #  'namespaceAlias' has to contain corresponding namespace
+
+    def getFirstChildNS (self, namespaceAlias, filterTag=None):
+        if not filterTag in (None, '*'):
+            filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
+        return self.getFirstChild(filterTag)
+
+
+    ##########################################################
+    #  returns all descendants of node whose tag match 'localName' of the given namespace
+    #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
+    #  'namespaceAlias' has to contain corresponding namespace
+
+    def getElementsByTagNameNS (self, namespaceAlias, filterTag=None):
+        if not filterTag in (None, '*'):
+            filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
+        return self.getElementsByTagName (filterTag)
+
+
+    ##########################################################
+    #  returns attribute if it exists or default value if not
+
+    def getAttributeOrDefault (self, attributeName, default):
+        if self.hasAttribute (attributeName):
+            return self.getAttribute (attributeName)
+        else:
+            return default
+
+    ##########################################################
+    #  returns the current start line number of the element node in the XML file
+
+    def getStartLineNumber (self):
+        return self.element.startLineNumber
+
+
+    ##########################################################
+    #  returns the current end line number of the element node in the XML file
+
+    def getEndLineNumber (self):
+        return self.element.endLineNumber
+
+
+    ##########################################################
+    #  returns the URL of the XML file the node belongs to
+
+    def getUrl (self):
+        return self.element.url
+    
+    def getAbsUrl (self):
+        return self.element.absUrl
+
+
+    ##########################################################
+    #  returns the file path of the XML file the node belongs to
+
+    def getFilePath (self):
+        return self.element.filePath
+
+
+    ########################################
+    # retrieve node list or attribute list for specified XPath
+
+    def getXPathList (self, xPath, defaultNamespace):
+        selectedNodeList = []
+        selectedAttributeList = []
+        xPathList = string.split (xPath, "|")
+        for xRelPath in xPathList:
+            descendantOrSelf = 0
+            if xRelPath[:3] == ".//":
+                descendantOrSelf = 1
+                xRelPath = xRelPath[3:]
+            # no namespaces supported!
+            xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/"))
+            childList = [self,]
+            isAttributeList = 0
+            for localStep in xPathLocalStepList:
+                stepChildList = []
+                if localStep == "":
+                    raise IOError ("Invalid xPath '%s'!" %(xRelPath))
+                elif localStep == ".":
+                    continue
+                elif localStep[0] == '@':
+                    for childNode in childList:
+                        attrName = localStep[1:]
+                        if attrName == '*':
+                            stepChildList.extend(childNode.getAttributeDict().values())
+                        elif childNode.hasAttribute(attrName):
+                            stepChildList.append (childNode.getAttribute(attrName))
+                    childList = stepChildList
+                    isAttributeList = 1
+                else:
+                    if descendantOrSelf:
+                        descendantOrSelf = 0
+                        stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep)
+                    else:
+                        for childNode in childList:
+                            stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep))
+                    childList = stepChildList
+
+            if isAttributeList:
+                selectedAttributeList.extend (childList)
+            else:
+                selectedNodeList.extend (childList)
+        return selectedNodeList, selectedAttributeList
+
+
+