minixsv/xmlifbase.py
author lbessard
Tue, 10 Jul 2007 14:29:31 +0200
changeset 31 d833bf7567b1
parent 0 b622defdfd98
permissions -rw-r--r--
Bug on variable location fixed
#
# minixsv, Release 0.3
# file: xmlifbase.py
#
# abstract XML interface class
#
# history:
# 2004-09-09 rl   created
# 2004-09-22 rl   XML interface classes completely re-designed
# 2004-09-23 rl   added filename and line number support
# 2004-09-29 rl   URL processing added
# 2004-10-01 rl   URL processing improved
# 2004-10-03 rl   xPath processing moved from pyxsval.py
# 2004-10-12 rl   XML text processing added
#
# Copyright (c) 2004 by Roland Leuthe.  All rights reserved.
#
# --------------------------------------------------------------------
# The minixsv XML schema validator is
#
# Copyright (c) 2004 by Roland Leuthe
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS.  IN NO EVENT SHALL THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------


import string
import os
import re
import urllib
import urlparse


_reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml"


########################################
# define XML interface base class
# All not implemented methods have to be overloaded by the derived class!!
#

class XmlInterfaceBase:

    def __init__(self, verbose):
        self.verbose = verbose
        pass

    ##########################################################
    #  calls the parser for 'file'
    #  returns the respective XML tree for the parsed XML file
    #  'file' may be a file path or an URI

    def parse (self, file, baseUrl):
        raise NotImplementedError


    ##########################################################
    #  calls the parser for 'text'
    #  returns the respective XML tree for the parsed XML text string

    def parseString (self, text):
        raise NotImplementedError


    ##########################################################
    #  calls the parser for 'file' and processes all include directives
    #  returns the respective XML tree wrapper for the parsed XML file

    def extParse (self, file, baseUrl):
        treeWrapper = self.parse (file, baseUrl)
        return self._expandIncludes (treeWrapper)

    def _expandIncludes (self, treeWrapper):
        rootNodeWrapper = treeWrapper.getRootNode()
        nsAlias  = self.extractNamespaceAlias(rootNodeWrapper.getTagName())

        for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"):
            includeUrl = includeNodeWrapper.getAttribute("schemaLocation")
            if self.verbose:
                print "including %s..." %(includeUrl)
            treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl())
            rootNodeWrapper.removeChild (includeNodeWrapper)
        return treeWrapper

    ##########################################################
    #  calls the parser for 'file' and processes all include directives
    #  returns the respective XML tree wrapper for the parsed XML file

    def extParseString (self, text):
        treeWrapper = self.parseString (text)
        return self._expandIncludes (treeWrapper)


    ##########################################################
    #  returns namespace part of given 'ncName'

    def extractNamespaceAlias (self, ncName):
        namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
        return namespaceAlias


    ##########################################################
    #  returns local tag name of given 'ncName'

    def extractLocalName (self, ncName):
        namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
        return localName


    ##########################################################
    #  add namespace alias to localName

    def addNamespaceAlias (self, namespaceAlias, localName):
        if namespaceAlias != "":
            return namespaceAlias + localName
        else:
            return localName


    ###############################################################
    # PRIVATE methods
    ###############################################################

    ##########################################################
    #  convert input parameter 'file' into a valid URL

    def _convertToUrl (self, file):
        matchObject = _reSplitUrlApplication.match(file)
        if matchObject:
            # given file is an absolute URL
            if matchObject.group(1) == 'file':
                path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string
                url = "file:" + path
            else:
                url = file
        elif not os.path.isabs(file):
            # given file is a relative URL
            url = file
        else:
            # given file is not a valid URL => treated as local filename
            url = "file:" + urllib.pathname2url (file)

        return url

    def _convertToAbsUrl (self, url, baseUrl):
        application = urlparse.urlsplit(url)[0]
        if application == '':
            if baseUrl != "":
                url = urlparse.urljoin (baseUrl, url)
            else:
                url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url))
        return url

    ##########################################################
    #  split 'ncName' into namespace and local name

    def _splitNamespaceLocalName (self, ncName):
        namespaceAlias = None

        namespaceEndIndex = string.find (ncName, '}')
        if namespaceEndIndex != -1:
            namespaceAlias = ncName[:namespaceEndIndex+1]
            localName      = ncName[namespaceEndIndex+1:]
        else:
            namespaceEndIndex = string.find (ncName, ':')
            if namespaceEndIndex != -1:
                namespaceAlias = ncName[:namespaceEndIndex+1]
                localName      = ncName[namespaceEndIndex+1:]
            else:
                namespaceAlias = ""
                localName      = ncName
        return namespaceAlias, localName



########################################
# define tree wrapper base class
# All not implemented methods have to be overloaded by the derived class!!
#

class TreeWrapperBase:

    def __init__(self, xmlIf, tree):
        self.xmlIf        = xmlIf
        self.tree         = tree

    ##########################################################
    #  includes the given XML/XSD file 'file' into the XML tree 'tree'
    #  before 'nextSibling' ('nextSibling' is not removed!)
    #  returns the extended XML tree (containing included XML/XSD file)
    #
    #  Note: Root tag of 'tree' and 'file' must match!

    def insertSubtree (self, nextSibling, file, baseUrl):
        raise NotImplementedError


    ##########################################################
    #  returns root node of given XML tree 'tree'

    def getRootNode (self):
        raise NotImplementedError


    ##########################################################
    #  returns 'tree' of given XML tree

    def getTree (self):
        return self.tree

########################################
# define node wrapper base class
# All not implemented methods have to be overloaded by the derived class!!
#

class ElementWrapperBase:

    def __init__(self, xmlIf, treeWrapper, element):
        self.xmlIf           = xmlIf
        self.treeWrapper     = treeWrapper
        self.element         = element

    ##########################################################
    #  returns tag name of given XML node 'node'

    def getTagName (self):
        raise NotImplementedError


    ##########################################################
    #  returns child element nodes (list) of given XML node 'node'
    #  'filterTag' is optional, 'filterTag' = '*' must be supported

    def getChildren (self, filterTag=None):
        raise NotImplementedError


    ##########################################################
    #  returns all descendants of node whose tag match 'tagName'
    #  'filterTag' is optional, 'filterTag' = '*' must be supported

    def getElementsByTagName (self, filterTag=None):
        raise NotImplementedError


    ##########################################################
    #  remove given child node from children of current node

    def removeChild (self, childNodeWrapper):
        raise NotImplementedError


    ##########################################################
    #  returns dictionary with all attributes of this node

    def getAttributeDict (self):
        raise NotImplementedError


    ##########################################################
    #  returns attribute value of given attributeName
    #  or None if there is no suitable attribute

    def getAttribute (self, attributeName):
        raise NotImplementedError

    ##########################################################
    #  returns 1 if attribute 'attributeName' exists
    #          0 if not

    def hasAttribute (self, attributeName):
        raise NotImplementedError


    ##########################################################
    #  sets value of attribute 'attributeName' to 'attributeValue'
    #  if the attribute does not yet exist, it will be created

    def setAttribute (self, attributeName, attributeValue):
        raise NotImplementedError


    ##########################################################
    #  returns element value of this node

    def getElementValue (self):
        raise NotImplementedError


    ##########################################################
    #  sets element value of this node to 'value'

    def setElementValue (self, value):
        raise NotImplementedError


    ##########################################################
    #  returns local tag name of given XML node 'node' (without namespace)

    def getLocalName (self):
        return self.xmlIf.extractLocalName (self.getTagName())


    ##########################################################
    #  returns namespace of tag name of given XML node 'node'

    def getNamespaceURI (self):
        return extractNamespaceAlias (self.getTagName())


    ##########################################################
    #  returns child element nodes (list) of given XML node 'node'
    #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
    #  'namespaceAlias' has to contain corresponding namespace

    def getChildrenNS (self, namespaceAlias, filterTag=None):
        if not filterTag in (None, '*'):
            filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
        return self.getChildren(filterTag)


    ##########################################################
    #  returns first child element of given XML node 'node'
    #  or None if there is no suitable child element
    #  'filterTag' is optional, 'filterTag' = '*' must be supported

    def getFirstChild (self, filterTag=None):
        children = self.getChildren(filterTag)
        if children != []:
            return children[0]
        else:
            return None


    ##########################################################
    #  returns first child element of given XML node 'node'
    #  or None if there is no suitable child element
    #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
    #  'namespaceAlias' has to contain corresponding namespace

    def getFirstChildNS (self, namespaceAlias, filterTag=None):
        if not filterTag in (None, '*'):
            filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
        return self.getFirstChild(filterTag)


    ##########################################################
    #  returns all descendants of node whose tag match 'localName' of the given namespace
    #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
    #  'namespaceAlias' has to contain corresponding namespace

    def getElementsByTagNameNS (self, namespaceAlias, filterTag=None):
        if not filterTag in (None, '*'):
            filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
        return self.getElementsByTagName (filterTag)


    ##########################################################
    #  returns attribute if it exists or default value if not

    def getAttributeOrDefault (self, attributeName, default):
        if self.hasAttribute (attributeName):
            return self.getAttribute (attributeName)
        else:
            return default

    ##########################################################
    #  returns the current start line number of the element node in the XML file

    def getStartLineNumber (self):
        return self.element.startLineNumber


    ##########################################################
    #  returns the current end line number of the element node in the XML file

    def getEndLineNumber (self):
        return self.element.endLineNumber


    ##########################################################
    #  returns the URL of the XML file the node belongs to

    def getUrl (self):
        return self.element.url
    
    def getAbsUrl (self):
        return self.element.absUrl


    ##########################################################
    #  returns the file path of the XML file the node belongs to

    def getFilePath (self):
        return self.element.filePath


    ########################################
    # retrieve node list or attribute list for specified XPath

    def getXPathList (self, xPath, defaultNamespace):
        selectedNodeList = []
        selectedAttributeList = []
        xPathList = string.split (xPath, "|")
        for xRelPath in xPathList:
            descendantOrSelf = 0
            if xRelPath[:3] == ".//":
                descendantOrSelf = 1
                xRelPath = xRelPath[3:]
            # no namespaces supported!
            xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/"))
            childList = [self,]
            isAttributeList = 0
            for localStep in xPathLocalStepList:
                stepChildList = []
                if localStep == "":
                    raise IOError ("Invalid xPath '%s'!" %(xRelPath))
                elif localStep == ".":
                    continue
                elif localStep[0] == '@':
                    for childNode in childList:
                        attrName = localStep[1:]
                        if attrName == '*':
                            stepChildList.extend(childNode.getAttributeDict().values())
                        elif childNode.hasAttribute(attrName):
                            stepChildList.append (childNode.getAttribute(attrName))
                    childList = stepChildList
                    isAttributeList = 1
                else:
                    if descendantOrSelf:
                        descendantOrSelf = 0
                        stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep)
                    else:
                        for childNode in childList:
                            stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep))
                    childList = stepChildList

            if isAttributeList:
                selectedAttributeList.extend (childList)
            else:
                selectedNodeList.extend (childList)
        return selectedNodeList, selectedAttributeList