minixsv/pyxsval.py
changeset 0 b622defdfd98
equal deleted inserted replaced
-1:000000000000 0:b622defdfd98
       
     1 #
       
     2 # minixsv, Release 0.3
       
     3 # file: pyxsval.py
       
     4 #
       
     5 # API for XML schema validator
       
     6 #
       
     7 # history:
       
     8 # 2004-09-09 rl   created
       
     9 # 2004-09-29 rl   adapted to re-designed XML interface classes,
       
    10 #                 ErrorHandler separated, URL processing added, some bugs fixed
       
    11 # 2004-10-07 rl   Validator classes extracted into separate files
       
    12 # 2004-10-12 rl   API re-worked, XML text processing added
       
    13 #
       
    14 # Copyright (c) 2004 by Roland Leuthe.  All rights reserved.
       
    15 #
       
    16 # --------------------------------------------------------------------
       
    17 # The minixsv XML schema validator is
       
    18 #
       
    19 # Copyright (c) 2004 by Roland Leuthe
       
    20 #
       
    21 # By obtaining, using, and/or copying this software and/or its
       
    22 # associated documentation, you agree that you have read, understood,
       
    23 # and will comply with the following terms and conditions:
       
    24 #
       
    25 # Permission to use, copy, modify, and distribute this software and
       
    26 # its associated documentation for any purpose and without fee is
       
    27 # hereby granted, provided that the above copyright notice appears in
       
    28 # all copies, and that both that copyright notice and this permission
       
    29 # notice appear in supporting documentation, and that the name of
       
    30 # the author not be used in advertising or publicity
       
    31 # pertaining to distribution of the software without specific, written
       
    32 # prior permission.
       
    33 #
       
    34 # THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
       
    35 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
       
    36 # ABILITY AND FITNESS.  IN NO EVENT SHALL THE AUTHOR
       
    37 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
       
    38 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
       
    39 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
       
    40 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
       
    41 # OF THIS SOFTWARE.
       
    42 # --------------------------------------------------------------------
       
    43 
       
    44 __all__ = [
       
    45     # public symbols
       
    46     "parseAndValidate",
       
    47     "parseAndValidateString",
       
    48     "parseAndValidateXmlInput",
       
    49     "parseAndValidateXmlInputString",
       
    50     "parseAndValidateXmlSchema",
       
    51     "parseAndValidateXmlSchemaString",
       
    52     "XsValidator",
       
    53     "XMLIF_MINIDOM",
       
    54     "XMLIF_ELEMENTTREE"
       
    55     ]
       
    56 
       
    57 
       
    58 from xsvalErrorHandler import *
       
    59 from xsvalBase import XsValBase
       
    60 from xsvalSchema import XsValSchema
       
    61 
       
    62 
       
    63 XMLIF_MINIDOM     = "XMLIF_MINIDOM"
       
    64 XMLIF_ELEMENTTREE = "XMLIF_ELEMENTTREE"
       
    65 
       
    66 
       
    67 _XS_VAL_DEFAULT_ERROR_LIMIT = 20
       
    68 
       
    69 _OWNDIR = os.path.dirname(__file__)
       
    70 
       
    71 
       
    72 ########################################
       
    73 # convenience function for validating 
       
    74 # 1. XML schema file
       
    75 # 2. XML input file
       
    76 # If xsdFile is specified, it will be used for validation
       
    77 # If xsdFile=None, the schemaLocation attribute is expected in the root tag of the XML input file
       
    78 #
       
    79 def parseAndValidate (inputFile, xsdFile=None, **kw):
       
    80     return parseAndValidateXmlInput (inputFile, xsdFile, 1, **kw)
       
    81 
       
    82 
       
    83 ########################################
       
    84 # convenience function for validating 
       
    85 # 1. text string containing the XML schema
       
    86 # 2. text string containing the XML input
       
    87 # If xsdText is given, it will be used for validation
       
    88 # If xsdText=None, the schemaLocation attribute is expected in the root tag of the XML input
       
    89 #
       
    90 def parseAndValidateString (inputText, xsdText=None, **kw):
       
    91     return parseAndValidateXmlInputString (inputText, xsdText, 1, **kw)
       
    92 
       
    93 
       
    94 ########################################
       
    95 # factory for validating 
       
    96 # 1. XML schema file (only if validateSchema=1)
       
    97 # 2. XML input file
       
    98 # If xsdFile is specified, it will be used for validation
       
    99 # If xsdFile=None, the schemaLocation attribute is expected in the root tag of the XML input file
       
   100 #
       
   101 def parseAndValidateXmlInput (inputFile, xsdFile=None, validateSchema=0, **kw):
       
   102     xsValidator = XsValidator (**kw)
       
   103     # parse XML input file
       
   104     inputTreeWrapper = xsValidator.parse (inputFile)
       
   105     # validate XML input file
       
   106     return xsValidator.validateXmlInput (inputFile, inputTreeWrapper, xsdFile, validateSchema)
       
   107 
       
   108 
       
   109 ########################################
       
   110 # factory for validating 
       
   111 # 1. text string containing the XML schema (only if validateSchema=1)
       
   112 # 2. text string containing the XML input
       
   113 # If xsdText is given, it will be used for validation
       
   114 # If xsdText=None, the schemaLocation attribute is expected in the root tag of the XML input
       
   115 #
       
   116 def parseAndValidateXmlInputString (inputText, xsdText=None, validateSchema=0, **kw):
       
   117     xsValidator = XsValidator (**kw)
       
   118     # parse XML input text string
       
   119     inputTreeWrapper = xsValidator.parseString (inputText)
       
   120     # validate XML input text string
       
   121     return xsValidator.validateXmlInputString (inputTreeWrapper, xsdText, validateSchema)
       
   122 
       
   123 
       
   124 ########################################
       
   125 # factory for validating only given XML schema file
       
   126 #
       
   127 def parseAndValidateXmlSchema (xsdFile, **kw):
       
   128     xsValidator = XsValidator (**kw)
       
   129     # parse XML schema file
       
   130     xsdTreeWrapper = xsValidator.parse (xsdFile)
       
   131     # validate XML schema file
       
   132     return xsValidator.validateXmlSchema (xsdFile, xsdTreeWrapper)
       
   133 
       
   134 
       
   135 ########################################
       
   136 # factory for validating only given XML schema file
       
   137 #
       
   138 def parseAndValidateXmlSchemaString (xsdText, **kw):
       
   139     xsValidator = XsValidator (**kw)
       
   140     # parse XML schema
       
   141     xsdTreeWrapper = xsValidator.parseString (xsdText)
       
   142     # validate XML schema
       
   143     return xsValidator.validateXmlSchema ("", xsdTreeWrapper)
       
   144 
       
   145 
       
   146 ########################################
       
   147 # XML schema validator class
       
   148 #
       
   149 class XsValidator:
       
   150     def __init__(self, xmlIfClass=XMLIF_MINIDOM,
       
   151                  warningProc=IGNORE_WARNINGS, errorLimit=_XS_VAL_DEFAULT_ERROR_LIMIT, verbose=0):
       
   152 
       
   153         self.warningProc   = warningProc
       
   154         self.errorLimit    = errorLimit
       
   155         self.verbose       = verbose
       
   156 
       
   157         # enable interface to minidom
       
   158         if xmlIfClass == XMLIF_MINIDOM:
       
   159             import minidomif
       
   160             self.xmlIf = minidomif.MiniDomInterface(verbose)
       
   161 
       
   162         # enable interface to elementtree
       
   163         elif xmlIfClass == XMLIF_ELEMENTTREE:
       
   164             import elemtreeif
       
   165             self.xmlIf = elemtreeif.ElementTreeInterface(verbose)
       
   166 
       
   167         # create error handler
       
   168         self.errorHandler  = ErrorHandler (errorLimit, warningProc, verbose)
       
   169 
       
   170 
       
   171     ########################################
       
   172     # parse XML file
       
   173     # 'file' may be a filepath or an URI
       
   174     #
       
   175     def parse (self, file, baseUrl=""):
       
   176         try:
       
   177             self._verbosePrint ("Parsing %s..." %(file))
       
   178             treeWrapper = self.xmlIf.extParse(file, baseUrl)
       
   179         except AssertionError, errstr:
       
   180             self.errorHandler.raiseError (errstr)
       
   181         return treeWrapper
       
   182 
       
   183 
       
   184     ########################################
       
   185     # parse text string containing XML 
       
   186     #
       
   187     def parseString (self, text):
       
   188         try:
       
   189             self._verbosePrint ("Parsing XML text string...")
       
   190             treeWrapper = self.xmlIf.extParseString(text)
       
   191         except AssertionError, errstr:
       
   192             self.errorHandler.raiseError (errstr)
       
   193         return treeWrapper
       
   194 
       
   195 
       
   196     ########################################
       
   197     # validate XML input
       
   198     #
       
   199     def validateXmlInput (self, xmlInputFile, inputTreeWrapper, xsdFile=None, validateSchema=0):
       
   200         # parse XML schema file
       
   201         if xsdFile != None:
       
   202             xsdTreeWrapper = self.parse (xsdFile)
       
   203         else:
       
   204             # a schemaLocation attribute is expected in the root tag of the XML input file
       
   205             xsdFile = self._retrieveReferencedXsdFile (inputTreeWrapper)
       
   206             xsdTreeWrapper = self.parse (xsdFile, inputTreeWrapper.getRootNode().getAbsUrl())
       
   207 
       
   208         return self._validateXmlInput (xmlInputFile, xsdFile, inputTreeWrapper, xsdTreeWrapper, validateSchema)
       
   209 
       
   210 
       
   211     ########################################
       
   212     # validate XML input
       
   213     #
       
   214     def validateXmlInputString (self, inputTreeWrapper, xsdText=None, validateSchema=0):
       
   215         # parse XML schema file
       
   216         if xsdText != None:
       
   217             xsdFile = "schema text"
       
   218             xsdTreeWrapper = self.parseString (xsdText)
       
   219         else:
       
   220             # a schemaLocation attribute is expected in the root tag of the XML input file
       
   221             xsdFile = self._retrieveReferencedXsdFile (inputTreeWrapper)
       
   222             xsdTreeWrapper = self.parse (xsdFile, inputTreeWrapper.getRootNode().getAbsUrl())
       
   223 
       
   224         return self._validateXmlInput ("input text", xsdFile, inputTreeWrapper, xsdTreeWrapper, validateSchema)
       
   225 
       
   226 
       
   227     ########################################
       
   228     # validate XML schema separately
       
   229     #
       
   230     def validateXmlSchema (self, xsdFile, xsdTreeWrapper):
       
   231         # parse minixsv internal schema
       
   232         rulesTreeWrapper = self.parse(os.path.join (_OWNDIR, "xsStructs.xsd"))
       
   233 
       
   234         self._verbosePrint ("Validating %s..." %(xsdFile))
       
   235         xsvGivenXsdFile = XsValSchema (self.xmlIf, self.errorHandler)
       
   236         xsvGivenXsdFile.validate(xsdTreeWrapper, rulesTreeWrapper)
       
   237         self.errorHandler.flushOutput()
       
   238         return xsdTreeWrapper
       
   239 
       
   240 
       
   241     ########################################
       
   242     # validate XML input tree and xsd tree if requested
       
   243     #
       
   244     def _validateXmlInput (self, xmlInputFile, xsdFile, inputTreeWrapper, xsdTreeWrapper, validateSchema=0):
       
   245         # validate XML schema file if requested
       
   246         if validateSchema:
       
   247             self.validateXmlSchema (xsdFile, xsdTreeWrapper)
       
   248 
       
   249         try:
       
   250             xsdTreeWrapper.insertSubtree (xsdTreeWrapper.getRootNode().getFirstChild(), os.path.join (_OWNDIR, "datatypes2.xsd"), None)
       
   251         except AssertionError, errstr:
       
   252             self.errorHandler.raiseError (errstr)
       
   253 
       
   254         self._verbosePrint ("Validating %s..." %(xmlInputFile))
       
   255         xsvInputFile = XsValBase (self.xmlIf, self.errorHandler)
       
   256         xsvInputFile.validate(inputTreeWrapper, xsdTreeWrapper)
       
   257         self.errorHandler.flushOutput()
       
   258         return inputTreeWrapper
       
   259 
       
   260 
       
   261     ########################################
       
   262     # retrieve XML schema location from XML input tree
       
   263     #
       
   264     def _retrieveReferencedXsdFile (self, inputTreeWrapper):
       
   265         # a schemaLocation attribute is expected in the root tag of the XML input file
       
   266         for attributeName, attributeValue in inputTreeWrapper.getRootNode().getAttributeDict().items():
       
   267             if self.xmlIf.extractLocalName(attributeName) in ("noNamespaceSchemaLocation", "schemaLocation"):
       
   268                 xsdFile = attributeValue
       
   269                 return xsdFile
       
   270         else:
       
   271             self.errorHandler.raiseError ("No schema file specified!")
       
   272 
       
   273 
       
   274     ########################################
       
   275     # print if verbose flag is set
       
   276     #
       
   277     def _verbosePrint (self, text):
       
   278         if self.verbose:
       
   279             print text
       
   280 
       
   281 
       
   282 
       
   283