etisserant@0: # etisserant@0: # minixsv, Release 0.3 etisserant@0: # file: pyxsval.py etisserant@0: # etisserant@0: # API for XML schema validator etisserant@0: # etisserant@0: # history: etisserant@0: # 2004-09-09 rl created etisserant@0: # 2004-09-29 rl adapted to re-designed XML interface classes, etisserant@0: # ErrorHandler separated, URL processing added, some bugs fixed etisserant@0: # 2004-10-07 rl Validator classes extracted into separate files etisserant@0: # 2004-10-12 rl API re-worked, XML text processing added etisserant@0: # etisserant@0: # Copyright (c) 2004 by Roland Leuthe. All rights reserved. etisserant@0: # etisserant@0: # -------------------------------------------------------------------- etisserant@0: # The minixsv XML schema validator is etisserant@0: # etisserant@0: # Copyright (c) 2004 by Roland Leuthe etisserant@0: # etisserant@0: # By obtaining, using, and/or copying this software and/or its etisserant@0: # associated documentation, you agree that you have read, understood, etisserant@0: # and will comply with the following terms and conditions: etisserant@0: # etisserant@0: # Permission to use, copy, modify, and distribute this software and etisserant@0: # its associated documentation for any purpose and without fee is etisserant@0: # hereby granted, provided that the above copyright notice appears in etisserant@0: # all copies, and that both that copyright notice and this permission etisserant@0: # notice appear in supporting documentation, and that the name of etisserant@0: # the author not be used in advertising or publicity etisserant@0: # pertaining to distribution of the software without specific, written etisserant@0: # prior permission. etisserant@0: # etisserant@0: # THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD etisserant@0: # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- etisserant@0: # ABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR etisserant@0: # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY etisserant@0: # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, etisserant@0: # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS etisserant@0: # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE etisserant@0: # OF THIS SOFTWARE. etisserant@0: # -------------------------------------------------------------------- etisserant@0: etisserant@0: __all__ = [ etisserant@0: # public symbols etisserant@0: "parseAndValidate", etisserant@0: "parseAndValidateString", etisserant@0: "parseAndValidateXmlInput", etisserant@0: "parseAndValidateXmlInputString", etisserant@0: "parseAndValidateXmlSchema", etisserant@0: "parseAndValidateXmlSchemaString", etisserant@0: "XsValidator", etisserant@0: "XMLIF_MINIDOM", etisserant@0: "XMLIF_ELEMENTTREE" etisserant@0: ] etisserant@0: etisserant@0: etisserant@0: from xsvalErrorHandler import * etisserant@0: from xsvalBase import XsValBase etisserant@0: from xsvalSchema import XsValSchema etisserant@0: etisserant@0: etisserant@0: XMLIF_MINIDOM = "XMLIF_MINIDOM" etisserant@0: XMLIF_ELEMENTTREE = "XMLIF_ELEMENTTREE" etisserant@0: etisserant@0: etisserant@0: _XS_VAL_DEFAULT_ERROR_LIMIT = 20 etisserant@0: etisserant@0: _OWNDIR = os.path.dirname(__file__) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # convenience function for validating etisserant@0: # 1. XML schema file etisserant@0: # 2. XML input file etisserant@0: # If xsdFile is specified, it will be used for validation etisserant@0: # If xsdFile=None, the schemaLocation attribute is expected in the root tag of the XML input file etisserant@0: # etisserant@0: def parseAndValidate (inputFile, xsdFile=None, **kw): etisserant@0: return parseAndValidateXmlInput (inputFile, xsdFile, 1, **kw) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # convenience function for validating etisserant@0: # 1. text string containing the XML schema etisserant@0: # 2. text string containing the XML input etisserant@0: # If xsdText is given, it will be used for validation etisserant@0: # If xsdText=None, the schemaLocation attribute is expected in the root tag of the XML input etisserant@0: # etisserant@0: def parseAndValidateString (inputText, xsdText=None, **kw): etisserant@0: return parseAndValidateXmlInputString (inputText, xsdText, 1, **kw) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # factory for validating etisserant@0: # 1. XML schema file (only if validateSchema=1) etisserant@0: # 2. XML input file etisserant@0: # If xsdFile is specified, it will be used for validation etisserant@0: # If xsdFile=None, the schemaLocation attribute is expected in the root tag of the XML input file etisserant@0: # etisserant@0: def parseAndValidateXmlInput (inputFile, xsdFile=None, validateSchema=0, **kw): etisserant@0: xsValidator = XsValidator (**kw) etisserant@0: # parse XML input file etisserant@0: inputTreeWrapper = xsValidator.parse (inputFile) etisserant@0: # validate XML input file etisserant@0: return xsValidator.validateXmlInput (inputFile, inputTreeWrapper, xsdFile, validateSchema) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # factory for validating etisserant@0: # 1. text string containing the XML schema (only if validateSchema=1) etisserant@0: # 2. text string containing the XML input etisserant@0: # If xsdText is given, it will be used for validation etisserant@0: # If xsdText=None, the schemaLocation attribute is expected in the root tag of the XML input etisserant@0: # etisserant@0: def parseAndValidateXmlInputString (inputText, xsdText=None, validateSchema=0, **kw): etisserant@0: xsValidator = XsValidator (**kw) etisserant@0: # parse XML input text string etisserant@0: inputTreeWrapper = xsValidator.parseString (inputText) etisserant@0: # validate XML input text string etisserant@0: return xsValidator.validateXmlInputString (inputTreeWrapper, xsdText, validateSchema) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # factory for validating only given XML schema file etisserant@0: # etisserant@0: def parseAndValidateXmlSchema (xsdFile, **kw): etisserant@0: xsValidator = XsValidator (**kw) etisserant@0: # parse XML schema file etisserant@0: xsdTreeWrapper = xsValidator.parse (xsdFile) etisserant@0: # validate XML schema file etisserant@0: return xsValidator.validateXmlSchema (xsdFile, xsdTreeWrapper) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # factory for validating only given XML schema file etisserant@0: # etisserant@0: def parseAndValidateXmlSchemaString (xsdText, **kw): etisserant@0: xsValidator = XsValidator (**kw) etisserant@0: # parse XML schema etisserant@0: xsdTreeWrapper = xsValidator.parseString (xsdText) etisserant@0: # validate XML schema etisserant@0: return xsValidator.validateXmlSchema ("", xsdTreeWrapper) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # XML schema validator class etisserant@0: # etisserant@0: class XsValidator: etisserant@0: def __init__(self, xmlIfClass=XMLIF_MINIDOM, etisserant@0: warningProc=IGNORE_WARNINGS, errorLimit=_XS_VAL_DEFAULT_ERROR_LIMIT, verbose=0): etisserant@0: etisserant@0: self.warningProc = warningProc etisserant@0: self.errorLimit = errorLimit etisserant@0: self.verbose = verbose etisserant@0: etisserant@0: # enable interface to minidom etisserant@0: if xmlIfClass == XMLIF_MINIDOM: etisserant@0: import minidomif etisserant@0: self.xmlIf = minidomif.MiniDomInterface(verbose) etisserant@0: etisserant@0: # enable interface to elementtree etisserant@0: elif xmlIfClass == XMLIF_ELEMENTTREE: etisserant@0: import elemtreeif etisserant@0: self.xmlIf = elemtreeif.ElementTreeInterface(verbose) etisserant@0: etisserant@0: # create error handler etisserant@0: self.errorHandler = ErrorHandler (errorLimit, warningProc, verbose) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # parse XML file etisserant@0: # 'file' may be a filepath or an URI etisserant@0: # etisserant@0: def parse (self, file, baseUrl=""): etisserant@0: try: etisserant@0: self._verbosePrint ("Parsing %s..." %(file)) etisserant@0: treeWrapper = self.xmlIf.extParse(file, baseUrl) etisserant@0: except AssertionError, errstr: etisserant@0: self.errorHandler.raiseError (errstr) etisserant@0: return treeWrapper etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # parse text string containing XML etisserant@0: # etisserant@0: def parseString (self, text): etisserant@0: try: etisserant@0: self._verbosePrint ("Parsing XML text string...") etisserant@0: treeWrapper = self.xmlIf.extParseString(text) etisserant@0: except AssertionError, errstr: etisserant@0: self.errorHandler.raiseError (errstr) etisserant@0: return treeWrapper etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # validate XML input etisserant@0: # etisserant@0: def validateXmlInput (self, xmlInputFile, inputTreeWrapper, xsdFile=None, validateSchema=0): etisserant@0: # parse XML schema file etisserant@0: if xsdFile != None: etisserant@0: xsdTreeWrapper = self.parse (xsdFile) etisserant@0: else: etisserant@0: # a schemaLocation attribute is expected in the root tag of the XML input file etisserant@0: xsdFile = self._retrieveReferencedXsdFile (inputTreeWrapper) etisserant@0: xsdTreeWrapper = self.parse (xsdFile, inputTreeWrapper.getRootNode().getAbsUrl()) etisserant@0: etisserant@0: return self._validateXmlInput (xmlInputFile, xsdFile, inputTreeWrapper, xsdTreeWrapper, validateSchema) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # validate XML input etisserant@0: # etisserant@0: def validateXmlInputString (self, inputTreeWrapper, xsdText=None, validateSchema=0): etisserant@0: # parse XML schema file etisserant@0: if xsdText != None: etisserant@0: xsdFile = "schema text" etisserant@0: xsdTreeWrapper = self.parseString (xsdText) etisserant@0: else: etisserant@0: # a schemaLocation attribute is expected in the root tag of the XML input file etisserant@0: xsdFile = self._retrieveReferencedXsdFile (inputTreeWrapper) etisserant@0: xsdTreeWrapper = self.parse (xsdFile, inputTreeWrapper.getRootNode().getAbsUrl()) etisserant@0: etisserant@0: return self._validateXmlInput ("input text", xsdFile, inputTreeWrapper, xsdTreeWrapper, validateSchema) etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # validate XML schema separately etisserant@0: # etisserant@0: def validateXmlSchema (self, xsdFile, xsdTreeWrapper): etisserant@0: # parse minixsv internal schema etisserant@0: rulesTreeWrapper = self.parse(os.path.join (_OWNDIR, "xsStructs.xsd")) etisserant@0: etisserant@0: self._verbosePrint ("Validating %s..." %(xsdFile)) etisserant@0: xsvGivenXsdFile = XsValSchema (self.xmlIf, self.errorHandler) etisserant@0: xsvGivenXsdFile.validate(xsdTreeWrapper, rulesTreeWrapper) etisserant@0: self.errorHandler.flushOutput() etisserant@0: return xsdTreeWrapper etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # validate XML input tree and xsd tree if requested etisserant@0: # etisserant@0: def _validateXmlInput (self, xmlInputFile, xsdFile, inputTreeWrapper, xsdTreeWrapper, validateSchema=0): etisserant@0: # validate XML schema file if requested etisserant@0: if validateSchema: etisserant@0: self.validateXmlSchema (xsdFile, xsdTreeWrapper) etisserant@0: etisserant@0: try: etisserant@0: xsdTreeWrapper.insertSubtree (xsdTreeWrapper.getRootNode().getFirstChild(), os.path.join (_OWNDIR, "datatypes2.xsd"), None) etisserant@0: except AssertionError, errstr: etisserant@0: self.errorHandler.raiseError (errstr) etisserant@0: etisserant@0: self._verbosePrint ("Validating %s..." %(xmlInputFile)) etisserant@0: xsvInputFile = XsValBase (self.xmlIf, self.errorHandler) etisserant@0: xsvInputFile.validate(inputTreeWrapper, xsdTreeWrapper) etisserant@0: self.errorHandler.flushOutput() etisserant@0: return inputTreeWrapper etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # retrieve XML schema location from XML input tree etisserant@0: # etisserant@0: def _retrieveReferencedXsdFile (self, inputTreeWrapper): etisserant@0: # a schemaLocation attribute is expected in the root tag of the XML input file etisserant@0: for attributeName, attributeValue in inputTreeWrapper.getRootNode().getAttributeDict().items(): etisserant@0: if self.xmlIf.extractLocalName(attributeName) in ("noNamespaceSchemaLocation", "schemaLocation"): etisserant@0: xsdFile = attributeValue etisserant@0: return xsdFile etisserant@0: else: etisserant@0: self.errorHandler.raiseError ("No schema file specified!") etisserant@0: etisserant@0: etisserant@0: ######################################## etisserant@0: # print if verbose flag is set etisserant@0: # etisserant@0: def _verbosePrint (self, text): etisserant@0: if self.verbose: etisserant@0: print text etisserant@0: etisserant@0: etisserant@0: etisserant@0: