minixsv/xmlifbase.py
changeset 0 b622defdfd98
equal deleted inserted replaced
-1:000000000000 0:b622defdfd98
       
     1 #
       
     2 # minixsv, Release 0.3
       
     3 # file: xmlifbase.py
       
     4 #
       
     5 # abstract XML interface class
       
     6 #
       
     7 # history:
       
     8 # 2004-09-09 rl   created
       
     9 # 2004-09-22 rl   XML interface classes completely re-designed
       
    10 # 2004-09-23 rl   added filename and line number support
       
    11 # 2004-09-29 rl   URL processing added
       
    12 # 2004-10-01 rl   URL processing improved
       
    13 # 2004-10-03 rl   xPath processing moved from pyxsval.py
       
    14 # 2004-10-12 rl   XML text processing added
       
    15 #
       
    16 # Copyright (c) 2004 by Roland Leuthe.  All rights reserved.
       
    17 #
       
    18 # --------------------------------------------------------------------
       
    19 # The minixsv XML schema validator is
       
    20 #
       
    21 # Copyright (c) 2004 by Roland Leuthe
       
    22 #
       
    23 # By obtaining, using, and/or copying this software and/or its
       
    24 # associated documentation, you agree that you have read, understood,
       
    25 # and will comply with the following terms and conditions:
       
    26 #
       
    27 # Permission to use, copy, modify, and distribute this software and
       
    28 # its associated documentation for any purpose and without fee is
       
    29 # hereby granted, provided that the above copyright notice appears in
       
    30 # all copies, and that both that copyright notice and this permission
       
    31 # notice appear in supporting documentation, and that the name of
       
    32 # the author not be used in advertising or publicity
       
    33 # pertaining to distribution of the software without specific, written
       
    34 # prior permission.
       
    35 #
       
    36 # THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
       
    37 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
       
    38 # ABILITY AND FITNESS.  IN NO EVENT SHALL THE AUTHOR
       
    39 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
       
    40 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
       
    41 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
       
    42 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
       
    43 # OF THIS SOFTWARE.
       
    44 # --------------------------------------------------------------------
       
    45 
       
    46 
       
    47 import string
       
    48 import os
       
    49 import re
       
    50 import urllib
       
    51 import urlparse
       
    52 
       
    53 
       
    54 _reSplitUrlApplication = re.compile (r"(file|http|ftp|gopher):(.+)") # "file:///d:\test.xml" => "file" + "///d:\test.xml"
       
    55 
       
    56 
       
    57 ########################################
       
    58 # define XML interface base class
       
    59 # All not implemented methods have to be overloaded by the derived class!!
       
    60 #
       
    61 
       
    62 class XmlInterfaceBase:
       
    63 
       
    64     def __init__(self, verbose):
       
    65         self.verbose = verbose
       
    66         pass
       
    67 
       
    68     ##########################################################
       
    69     #  calls the parser for 'file'
       
    70     #  returns the respective XML tree for the parsed XML file
       
    71     #  'file' may be a file path or an URI
       
    72 
       
    73     def parse (self, file, baseUrl):
       
    74         raise NotImplementedError
       
    75 
       
    76 
       
    77     ##########################################################
       
    78     #  calls the parser for 'text'
       
    79     #  returns the respective XML tree for the parsed XML text string
       
    80 
       
    81     def parseString (self, text):
       
    82         raise NotImplementedError
       
    83 
       
    84 
       
    85     ##########################################################
       
    86     #  calls the parser for 'file' and processes all include directives
       
    87     #  returns the respective XML tree wrapper for the parsed XML file
       
    88 
       
    89     def extParse (self, file, baseUrl):
       
    90         treeWrapper = self.parse (file, baseUrl)
       
    91         return self._expandIncludes (treeWrapper)
       
    92 
       
    93     def _expandIncludes (self, treeWrapper):
       
    94         rootNodeWrapper = treeWrapper.getRootNode()
       
    95         nsAlias  = self.extractNamespaceAlias(rootNodeWrapper.getTagName())
       
    96 
       
    97         for includeNodeWrapper in rootNodeWrapper.getChildrenNS(nsAlias, "include"):
       
    98             includeUrl = includeNodeWrapper.getAttribute("schemaLocation")
       
    99             if self.verbose:
       
   100                 print "including %s..." %(includeUrl)
       
   101             treeWrapper.insertSubtree (includeNodeWrapper, includeUrl, rootNodeWrapper.getAbsUrl())
       
   102             rootNodeWrapper.removeChild (includeNodeWrapper)
       
   103         return treeWrapper
       
   104 
       
   105     ##########################################################
       
   106     #  calls the parser for 'file' and processes all include directives
       
   107     #  returns the respective XML tree wrapper for the parsed XML file
       
   108 
       
   109     def extParseString (self, text):
       
   110         treeWrapper = self.parseString (text)
       
   111         return self._expandIncludes (treeWrapper)
       
   112 
       
   113 
       
   114     ##########################################################
       
   115     #  returns namespace part of given 'ncName'
       
   116 
       
   117     def extractNamespaceAlias (self, ncName):
       
   118         namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
       
   119         return namespaceAlias
       
   120 
       
   121 
       
   122     ##########################################################
       
   123     #  returns local tag name of given 'ncName'
       
   124 
       
   125     def extractLocalName (self, ncName):
       
   126         namespaceAlias, localName = self._splitNamespaceLocalName (ncName)
       
   127         return localName
       
   128 
       
   129 
       
   130     ##########################################################
       
   131     #  add namespace alias to localName
       
   132 
       
   133     def addNamespaceAlias (self, namespaceAlias, localName):
       
   134         if namespaceAlias != "":
       
   135             return namespaceAlias + localName
       
   136         else:
       
   137             return localName
       
   138 
       
   139 
       
   140     ###############################################################
       
   141     # PRIVATE methods
       
   142     ###############################################################
       
   143 
       
   144     ##########################################################
       
   145     #  convert input parameter 'file' into a valid URL
       
   146 
       
   147     def _convertToUrl (self, file):
       
   148         matchObject = _reSplitUrlApplication.match(file)
       
   149         if matchObject:
       
   150             # given file is an absolute URL
       
   151             if matchObject.group(1) == 'file':
       
   152                 path = re.sub(':', '|', matchObject.group(2)) # replace ':' by '|' in the path string
       
   153                 url = "file:" + path
       
   154             else:
       
   155                 url = file
       
   156         elif not os.path.isabs(file):
       
   157             # given file is a relative URL
       
   158             url = file
       
   159         else:
       
   160             # given file is not a valid URL => treated as local filename
       
   161             url = "file:" + urllib.pathname2url (file)
       
   162 
       
   163         return url
       
   164 
       
   165     def _convertToAbsUrl (self, url, baseUrl):
       
   166         application = urlparse.urlsplit(url)[0]
       
   167         if application == '':
       
   168             if baseUrl != "":
       
   169                 url = urlparse.urljoin (baseUrl, url)
       
   170             else:
       
   171                 url = "file:" + urllib.pathname2url (os.path.join(os.getcwd(), url))
       
   172         return url
       
   173 
       
   174     ##########################################################
       
   175     #  split 'ncName' into namespace and local name
       
   176 
       
   177     def _splitNamespaceLocalName (self, ncName):
       
   178         namespaceAlias = None
       
   179 
       
   180         namespaceEndIndex = string.find (ncName, '}')
       
   181         if namespaceEndIndex != -1:
       
   182             namespaceAlias = ncName[:namespaceEndIndex+1]
       
   183             localName      = ncName[namespaceEndIndex+1:]
       
   184         else:
       
   185             namespaceEndIndex = string.find (ncName, ':')
       
   186             if namespaceEndIndex != -1:
       
   187                 namespaceAlias = ncName[:namespaceEndIndex+1]
       
   188                 localName      = ncName[namespaceEndIndex+1:]
       
   189             else:
       
   190                 namespaceAlias = ""
       
   191                 localName      = ncName
       
   192         return namespaceAlias, localName
       
   193 
       
   194 
       
   195 
       
   196 ########################################
       
   197 # define tree wrapper base class
       
   198 # All not implemented methods have to be overloaded by the derived class!!
       
   199 #
       
   200 
       
   201 class TreeWrapperBase:
       
   202 
       
   203     def __init__(self, xmlIf, tree):
       
   204         self.xmlIf        = xmlIf
       
   205         self.tree         = tree
       
   206 
       
   207     ##########################################################
       
   208     #  includes the given XML/XSD file 'file' into the XML tree 'tree'
       
   209     #  before 'nextSibling' ('nextSibling' is not removed!)
       
   210     #  returns the extended XML tree (containing included XML/XSD file)
       
   211     #
       
   212     #  Note: Root tag of 'tree' and 'file' must match!
       
   213 
       
   214     def insertSubtree (self, nextSibling, file, baseUrl):
       
   215         raise NotImplementedError
       
   216 
       
   217 
       
   218     ##########################################################
       
   219     #  returns root node of given XML tree 'tree'
       
   220 
       
   221     def getRootNode (self):
       
   222         raise NotImplementedError
       
   223 
       
   224 
       
   225     ##########################################################
       
   226     #  returns 'tree' of given XML tree
       
   227 
       
   228     def getTree (self):
       
   229         return self.tree
       
   230 
       
   231 ########################################
       
   232 # define node wrapper base class
       
   233 # All not implemented methods have to be overloaded by the derived class!!
       
   234 #
       
   235 
       
   236 class ElementWrapperBase:
       
   237 
       
   238     def __init__(self, xmlIf, treeWrapper, element):
       
   239         self.xmlIf           = xmlIf
       
   240         self.treeWrapper     = treeWrapper
       
   241         self.element         = element
       
   242 
       
   243     ##########################################################
       
   244     #  returns tag name of given XML node 'node'
       
   245 
       
   246     def getTagName (self):
       
   247         raise NotImplementedError
       
   248 
       
   249 
       
   250     ##########################################################
       
   251     #  returns child element nodes (list) of given XML node 'node'
       
   252     #  'filterTag' is optional, 'filterTag' = '*' must be supported
       
   253 
       
   254     def getChildren (self, filterTag=None):
       
   255         raise NotImplementedError
       
   256 
       
   257 
       
   258     ##########################################################
       
   259     #  returns all descendants of node whose tag match 'tagName'
       
   260     #  'filterTag' is optional, 'filterTag' = '*' must be supported
       
   261 
       
   262     def getElementsByTagName (self, filterTag=None):
       
   263         raise NotImplementedError
       
   264 
       
   265 
       
   266     ##########################################################
       
   267     #  remove given child node from children of current node
       
   268 
       
   269     def removeChild (self, childNodeWrapper):
       
   270         raise NotImplementedError
       
   271 
       
   272 
       
   273     ##########################################################
       
   274     #  returns dictionary with all attributes of this node
       
   275 
       
   276     def getAttributeDict (self):
       
   277         raise NotImplementedError
       
   278 
       
   279 
       
   280     ##########################################################
       
   281     #  returns attribute value of given attributeName
       
   282     #  or None if there is no suitable attribute
       
   283 
       
   284     def getAttribute (self, attributeName):
       
   285         raise NotImplementedError
       
   286 
       
   287     ##########################################################
       
   288     #  returns 1 if attribute 'attributeName' exists
       
   289     #          0 if not
       
   290 
       
   291     def hasAttribute (self, attributeName):
       
   292         raise NotImplementedError
       
   293 
       
   294 
       
   295     ##########################################################
       
   296     #  sets value of attribute 'attributeName' to 'attributeValue'
       
   297     #  if the attribute does not yet exist, it will be created
       
   298 
       
   299     def setAttribute (self, attributeName, attributeValue):
       
   300         raise NotImplementedError
       
   301 
       
   302 
       
   303     ##########################################################
       
   304     #  returns element value of this node
       
   305 
       
   306     def getElementValue (self):
       
   307         raise NotImplementedError
       
   308 
       
   309 
       
   310     ##########################################################
       
   311     #  sets element value of this node to 'value'
       
   312 
       
   313     def setElementValue (self, value):
       
   314         raise NotImplementedError
       
   315 
       
   316 
       
   317     ##########################################################
       
   318     #  returns local tag name of given XML node 'node' (without namespace)
       
   319 
       
   320     def getLocalName (self):
       
   321         return self.xmlIf.extractLocalName (self.getTagName())
       
   322 
       
   323 
       
   324     ##########################################################
       
   325     #  returns namespace of tag name of given XML node 'node'
       
   326 
       
   327     def getNamespaceURI (self):
       
   328         return extractNamespaceAlias (self.getTagName())
       
   329 
       
   330 
       
   331     ##########################################################
       
   332     #  returns child element nodes (list) of given XML node 'node'
       
   333     #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
       
   334     #  'namespaceAlias' has to contain corresponding namespace
       
   335 
       
   336     def getChildrenNS (self, namespaceAlias, filterTag=None):
       
   337         if not filterTag in (None, '*'):
       
   338             filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
       
   339         return self.getChildren(filterTag)
       
   340 
       
   341 
       
   342     ##########################################################
       
   343     #  returns first child element of given XML node 'node'
       
   344     #  or None if there is no suitable child element
       
   345     #  'filterTag' is optional, 'filterTag' = '*' must be supported
       
   346 
       
   347     def getFirstChild (self, filterTag=None):
       
   348         children = self.getChildren(filterTag)
       
   349         if children != []:
       
   350             return children[0]
       
   351         else:
       
   352             return None
       
   353 
       
   354 
       
   355     ##########################################################
       
   356     #  returns first child element of given XML node 'node'
       
   357     #  or None if there is no suitable child element
       
   358     #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
       
   359     #  'namespaceAlias' has to contain corresponding namespace
       
   360 
       
   361     def getFirstChildNS (self, namespaceAlias, filterTag=None):
       
   362         if not filterTag in (None, '*'):
       
   363             filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
       
   364         return self.getFirstChild(filterTag)
       
   365 
       
   366 
       
   367     ##########################################################
       
   368     #  returns all descendants of node whose tag match 'localName' of the given namespace
       
   369     #  'filterTag' (localTagName) is optional, 'filterTag' = '*' must be supported
       
   370     #  'namespaceAlias' has to contain corresponding namespace
       
   371 
       
   372     def getElementsByTagNameNS (self, namespaceAlias, filterTag=None):
       
   373         if not filterTag in (None, '*'):
       
   374             filterTag = self.xmlIf.addNamespaceAlias(namespaceAlias, filterTag)
       
   375         return self.getElementsByTagName (filterTag)
       
   376 
       
   377 
       
   378     ##########################################################
       
   379     #  returns attribute if it exists or default value if not
       
   380 
       
   381     def getAttributeOrDefault (self, attributeName, default):
       
   382         if self.hasAttribute (attributeName):
       
   383             return self.getAttribute (attributeName)
       
   384         else:
       
   385             return default
       
   386 
       
   387     ##########################################################
       
   388     #  returns the current start line number of the element node in the XML file
       
   389 
       
   390     def getStartLineNumber (self):
       
   391         return self.element.startLineNumber
       
   392 
       
   393 
       
   394     ##########################################################
       
   395     #  returns the current end line number of the element node in the XML file
       
   396 
       
   397     def getEndLineNumber (self):
       
   398         return self.element.endLineNumber
       
   399 
       
   400 
       
   401     ##########################################################
       
   402     #  returns the URL of the XML file the node belongs to
       
   403 
       
   404     def getUrl (self):
       
   405         return self.element.url
       
   406     
       
   407     def getAbsUrl (self):
       
   408         return self.element.absUrl
       
   409 
       
   410 
       
   411     ##########################################################
       
   412     #  returns the file path of the XML file the node belongs to
       
   413 
       
   414     def getFilePath (self):
       
   415         return self.element.filePath
       
   416 
       
   417 
       
   418     ########################################
       
   419     # retrieve node list or attribute list for specified XPath
       
   420 
       
   421     def getXPathList (self, xPath, defaultNamespace):
       
   422         selectedNodeList = []
       
   423         selectedAttributeList = []
       
   424         xPathList = string.split (xPath, "|")
       
   425         for xRelPath in xPathList:
       
   426             descendantOrSelf = 0
       
   427             if xRelPath[:3] == ".//":
       
   428                 descendantOrSelf = 1
       
   429                 xRelPath = xRelPath[3:]
       
   430             # no namespaces supported!
       
   431             xPathLocalStepList = map(lambda xPath: self.xmlIf.extractLocalName(xPath), string.split (xRelPath, "/"))
       
   432             childList = [self,]
       
   433             isAttributeList = 0
       
   434             for localStep in xPathLocalStepList:
       
   435                 stepChildList = []
       
   436                 if localStep == "":
       
   437                     raise IOError ("Invalid xPath '%s'!" %(xRelPath))
       
   438                 elif localStep == ".":
       
   439                     continue
       
   440                 elif localStep[0] == '@':
       
   441                     for childNode in childList:
       
   442                         attrName = localStep[1:]
       
   443                         if attrName == '*':
       
   444                             stepChildList.extend(childNode.getAttributeDict().values())
       
   445                         elif childNode.hasAttribute(attrName):
       
   446                             stepChildList.append (childNode.getAttribute(attrName))
       
   447                     childList = stepChildList
       
   448                     isAttributeList = 1
       
   449                 else:
       
   450                     if descendantOrSelf:
       
   451                         descendantOrSelf = 0
       
   452                         stepChildList = self.getElementsByTagNameNS(defaultNamespace, localStep)
       
   453                     else:
       
   454                         for childNode in childList:
       
   455                             stepChildList.extend (childNode.getChildrenNS(defaultNamespace, localStep))
       
   456                     childList = stepChildList
       
   457 
       
   458             if isAttributeList:
       
   459                 selectedAttributeList.extend (childList)
       
   460             else:
       
   461                 selectedNodeList.extend (childList)
       
   462         return selectedNodeList, selectedAttributeList
       
   463 
       
   464 
       
   465