#
# minixsv, Release 0.3
# file: xmlifbase.py
#
# abstract XML interface class
#
# history:
# 2004-09-09 rl created
# 2004-09-22 rl XML interface classes completely re-designed
# 2004-09-23 rl added filename and line number support
# 2004-09-29 rl URL processing added
# 2004-10-01 rl URL processing improved
# 2004-10-03 rl xPath processing moved from pyxsval.py
# 2004-10-12 rl XML text processing added
#
# Copyright (c) 2004 by Roland Leuthe. All rights reserved.
#
# --------------------------------------------------------------------
# The minixsv XML schema validator is
#
# Copyright (c) 2004 by Roland Leuthe
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
import string
import os
import re
import urllib
import urlparse
_reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml"
########################################
# define XML interface base class
# All not implemented methods have to be overloaded by the derived class!!
#
class XmlInterfaceBase:
def __init__(self, verbose):
self.verbose = verbose
pass
##########################################################
# calls the parser for 'file'
# returns the respective XML tree for the parsed XML file
# 'file' may be a file path or an URI
def parse (self, file, baseUrl):
raise NotImplementedError
##########################################################
# calls the parser for 'text'
# returns the respective XML tree for the parsed XML text string
def parseString (self, text):
raise NotImplementedError
##########################################################
# calls the parser for 'file' and processes all include directives
# returns the respective XML tree wrapper for the parsed XML file
def extParse (self, file, baseUrl):
treeWrapper = self.parse (file, baseUrl)
return self._expandIncludes (treeWrapper)
def _expandIncludes (self, treeWrapper):
rootNodeWrapper = treeWrapper.getRootNode()
nsAlias = self.extractNamespaceAlias(rootNodeWrapper.getTagName())
for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"):
includeUrl = includeNodeWrapper.getAttribute("schemaLocation")
if self.verbose:
print "including %s..." %(includeUrl)
treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl())
rootNodeWrapper.removeChild (includeNodeWrapper)
return treeWrapper
##########################################################
# calls the parser for 'file' and processes all include directives
# returns the respective XML tree wrapper for the parsed XML file
def extParseString (self, text):
treeWrapper = self.parseString (text)
return self._expandIncludes (treeWrapper)
##########################################################
# returns namespace part of given 'ncName'
def extractNamespaceAlias (self, ncName):
namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
return namespaceAlias
##########################################################
# returns local tag name of given 'ncName'
def extractLocalName (self, ncName):
namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
return localName
##########################################################
# add namespace alias to localName
def addNamespaceAlias (self, namespaceAlias, localName):
if namespaceAlias != "":
return namespaceAlias + localName
else:
return localName
###############################################################
# PRIVATE methods
###############################################################
##########################################################
# convert input parameter 'file' into a valid URL
def _convertToUrl (self, file):
matchObject = _reSplitUrlApplication.match(file)
if matchObject:
# given file is an absolute URL
if matchObject.group(1) == 'file':
path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string
url = "file:" + path
else:
url = file
elif not os.path.isabs(file):
# given file is a relative URL
url = file
else:
# given file is not a valid URL => treated as local filename
url = "file:" + urllib.pathname2url (file)
return url
def _convertToAbsUrl (self, url, baseUrl):
application = urlparse.urlsplit(url)[0]
if application == '':
if baseUrl != "":
url = urlparse.urljoin (baseUrl, url)
else:
url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url))
return url
##########################################################
# split 'ncName' into namespace and local name
def _splitNamespaceLocalName (self, ncName):
namespaceAlias = None
namespaceEndIndex = string.find (ncName, '}')
if namespaceEndIndex != -1:
namespaceAlias = ncName[:namespaceEndIndex+1]
localName = ncName[namespaceEndIndex+1:]
else:
namespaceEndIndex = string.find (ncName, ':')
if namespaceEndIndex != -1:
namespaceAlias = ncName[:namespaceEndIndex+1]
localName = ncName[namespaceEndIndex+1:]
else:
namespaceAlias = ""
localName = ncName
return namespaceAlias, localName
########################################
# define tree wrapper base class
# All not implemented methods have to be overloaded by the derived class!!
#
class TreeWrapperBase:
def __init__(self, xmlIf, tree):
self.xmlIf = xmlIf
self.tree = tree
##########################################################
# includes the given XML/XSD file 'file' into the XML tree 'tree'
# before 'nextSibling' ('nextSibling' is not removed!)
# returns the extended XML tree (containing included XML/XSD file)
#
# Note: Root tag of 'tree' and 'file' must match!
def insertSubtree (self, nextSibling, file, baseUrl):
raise NotImplementedError
##########################################################
# returns root node of given XML tree 'tree'
def getRootNode (self):
raise NotImplementedError
##########################################################
# returns 'tree' of given XML tree
def getTree (self):
return self.tree
########################################
# define node wrapper base class
# All not implemented methods have to be overloaded by the derived class!!
#
class ElementWrapperBase:
def __init__(self, xmlIf, treeWrapper, element):
self.xmlIf = xmlIf
self.treeWrapper = treeWrapper
self.element = element
##########################################################
# returns tag name of given XML node 'node'
def getTagName (self):
raise NotImplementedError
##########################################################
# returns child element nodes (list) of given XML node 'node'
# 'filterTag' is optional, 'filterTag' = '*' must be supported
def getChildren (self, filterTag=None):
raise NotImplementedError
##########################################################
# returns all descendants of node whose tag match 'tagName'
# 'filterTag' is optional, 'filterTag' = '*' must be supported
def getElementsByTagName (self, filterTag=None):
raise NotImplementedError
##########################################################
# remove given child node from children of current node
def removeChild (self, childNodeWrapper):
raise NotImplementedError
##########################################################
# returns dictionary with all attributes of this node
def getAttributeDict (self):
raise NotImplementedError
##########################################################
# returns attribute value of given attributeName
# or None if there is no suitable attribute
def getAttribute (self, attributeName):
raise NotImplementedError
##########################################################
# returns 1 if attribute 'attributeName' exists
# 0 if not
def hasAttribute (self, attributeName):
raise NotImplementedError
##########################################################
# sets value of attribute 'attributeName' to 'attributeValue'
# if the attribute does not yet exist, it will be created
def setAttribute (self, attributeName, attributeValue):
raise NotImplementedError
##########################################################
# returns element value of this node
def getElementValue (self):
raise NotImplementedError
##########################################################
# sets element value of this node to 'value'
def setElementValue (self, value):
raise NotImplementedError
##########################################################
# returns local tag name of given XML node 'node' (without namespace)
def getLocalName (self):
return self.xmlIf.extractLocalName (self.getTagName())
##########################################################
# returns namespace of tag name of given XML node 'node'
def getNamespaceURI (self):
return extractNamespaceAlias (self.getTagName())
##########################################################
# returns child element nodes (list) of given XML node 'node'
# 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
# 'namespaceAlias' has to contain corresponding namespace
def getChildrenNS (self, namespaceAlias, filterTag=None):
if not filterTag in (None, '*'):
filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
return self.getChildren(filterTag)
##########################################################
# returns first child element of given XML node 'node'
# or None if there is no suitable child element
# 'filterTag' is optional, 'filterTag' = '*' must be supported
def getFirstChild (self, filterTag=None):
children = self.getChildren(filterTag)
if children != []:
return children[0]
else:
return None
##########################################################
# returns first child element of given XML node 'node'
# or None if there is no suitable child element
# 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
# 'namespaceAlias' has to contain corresponding namespace
def getFirstChildNS (self, namespaceAlias, filterTag=None):
if not filterTag in (None, '*'):
filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
return self.getFirstChild(filterTag)
##########################################################
# returns all descendants of node whose tag match 'localName' of the given namespace
# 'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
# 'namespaceAlias' has to contain corresponding namespace
def getElementsByTagNameNS (self, namespaceAlias, filterTag=None):
if not filterTag in (None, '*'):
filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
return self.getElementsByTagName (filterTag)
##########################################################
# returns attribute if it exists or default value if not
def getAttributeOrDefault (self, attributeName, default):
if self.hasAttribute (attributeName):
return self.getAttribute (attributeName)
else:
return default
##########################################################
# returns the current start line number of the element node in the XML file
def getStartLineNumber (self):
return self.element.startLineNumber
##########################################################
# returns the current end line number of the element node in the XML file
def getEndLineNumber (self):
return self.element.endLineNumber
##########################################################
# returns the URL of the XML file the node belongs to
def getUrl (self):
return self.element.url
def getAbsUrl (self):
return self.element.absUrl
##########################################################
# returns the file path of the XML file the node belongs to
def getFilePath (self):
return self.element.filePath
########################################
# retrieve node list or attribute list for specified XPath
def getXPathList (self, xPath, defaultNamespace):
selectedNodeList = []
selectedAttributeList = []
xPathList = string.split (xPath, "|")
for xRelPath in xPathList:
descendantOrSelf = 0
if xRelPath[:3] == ".//":
descendantOrSelf = 1
xRelPath = xRelPath[3:]
# no namespaces supported!
xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/"))
childList = [self,]
isAttributeList = 0
for localStep in xPathLocalStepList:
stepChildList = []
if localStep == "":
raise IOError ("Invalid xPath '%s'!" %(xRelPath))
elif localStep == ".":
continue
elif localStep[0] == '@':
for childNode in childList:
attrName = localStep[1:]
if attrName == '*':
stepChildList.extend(childNode.getAttributeDict().values())
elif childNode.hasAttribute(attrName):
stepChildList.append (childNode.getAttribute(attrName))
childList = stepChildList
isAttributeList = 1
else:
if descendantOrSelf:
descendantOrSelf = 0
stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep)
else:
for childNode in childList:
stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep))
childList = stepChildList
if isAttributeList:
selectedAttributeList.extend (childList)
else:
selectedNodeList.extend (childList)
return selectedNodeList, selectedAttributeList