xmlclass/xsdschema.py
changeset 592 89ff2738ef20
parent 565 94c11207aa6f
child 674 bbffe4110141
--- a/xmlclass/xsdschema.py	Fri Nov 18 17:40:40 2011 +0100
+++ b/xmlclass/xsdschema.py	Wed Nov 23 00:19:27 2011 +0100
@@ -22,7 +22,7 @@
 #License along with this library; if not, write to the Free Software
 #Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-import re
+import os, re
 import datetime
 from xml.dom import minidom
 from types import *
@@ -143,19 +143,14 @@
     return union
 
 
-def ReduceSimpleType(factory, attributes, elements):
-    # Reduce all the simple type children
-    annotations, children = factory.ReduceElements(elements)
-    
-    typeinfos = children[0]
-    
+def CreateSimpleType(factory, attributes, typeinfos):
     # Initialize type informations
     facets = {}
-    simpleType = {"type": SIMPLETYPE, "final": attributes.get("final", []), "doc": annotations}
+    simpleType = {"type": SIMPLETYPE, "final": attributes.get("final", [])}
     if attributes.has_key("name"):
         simpleType["name"] = attributes["name"]
     
-    if typeinfos["type"] == "restriction":
+    if typeinfos["type"] in ["restriction", "extension"]:
         # Search for base type definition
         if isinstance(typeinfos["base"], (StringType, UnicodeType)):
             basetypeinfos = factory.FindSchemaElement(typeinfos["base"], SIMPLETYPE)
@@ -172,13 +167,13 @@
         
         # Check that derivation is allowed
         if basetypeinfos.has_key("final"):
-            if basetypeinfos["final"].has_key("#all"):
+            if "#all" in basetypeinfos["final"]:
                 raise ValueError("Base type can't be derivated!")
-            if basetypeinfos["final"].has_key("restriction"):
+            if "restriction" in basetypeinfos["final"] and typeinfos["type"] == "restriction":
                 raise ValueError("Base type can't be derivated by restriction!")
         
         # Extract simple type facets
-        for facet in typeinfos["facets"]:
+        for facet in typeinfos.get("facets", []):
             facettype = facet["type"]
             if not basetypeinfos["facets"].has_key(facettype):
                 raise ValueError("\"%s\" facet can't be defined for \"%s\" type!" % (facettype, type))
@@ -186,18 +181,20 @@
                 raise ValueError("\"%s\" facet is fixed on base type!" % facettype)
             value = facet["value"]
             basevalue = basetypeinfos["facets"][facettype][0]
-            if facettype == "enumeration":
+            if facettype in ["enumeration", "pattern"]:
                 value = basetypeinfos["extract"](value, False)
                 if len(facets) == 0:
-                    facets["enumeration"] = ([value], False)
+                    facets[facettype] = ([value], False)
                     continue
-                elif facets.keys() == ["enumeration"]:
-                    facets["enumeration"][0].append(value)
+                elif facets.keys() == [facettype]:
+                    facets[facettype][0].append(value)
                     continue
                 else:
-                    raise ValueError("\"enumeration\" facet can't be defined with another facet type!")
+                    raise ValueError("\"%s\" facet can't be defined with another facet type!" % facettype)
             elif facets.has_key("enumeration"):
                 raise ValueError("\"enumeration\" facet can't be defined with another facet type!")
+            elif facets.has_key("pattern"):
+                raise ValueError("\"pattern\" facet can't be defined with another facet type!")
             elif facets.has_key(facettype):
                 raise ValueError("\"%s\" facet can't be defined two times!" % facettype)
             elif facettype == "length":
@@ -313,10 +310,13 @@
                     elif facetname == "maxExclusive"  and value >= facetvalue:
                         raise ValueError("value must be lesser than %s" % str(facetvalue))
                     elif facetname == "pattern":
-                        model = re.compile("(?:%s)?$" % facetvalue)
+                        model = re.compile("(?:%s)?$" % "|".join(map(lambda x: "(?:%s)" % x, facetvalue)))
                         result = model.match(value)
                         if result is None:
-                            raise ValueError("value doesn't follow the pattern %s" % facetvalue)
+                            if len(facetvalue) > 1:   
+                                raise ValueError("value doesn't follow any of the patterns %s" % ",".join(facetvalue))
+                            else:
+                                raise ValueError("value doesn't follow the pattern %s" % facetvalue[0])
                     elif facetname == "whiteSpace":
                         if facetvalue == "replace":
                             value = GetNormalizedString(value, False)
@@ -344,10 +344,13 @@
                     elif facetname == "maxExclusive"  and value >= facetvalue:
                         return False
                     elif facetname == "pattern":
-                        model = re.compile("(?:%s)?$" % facetvalue)
+                        model = re.compile("(?:%s)?$" % "|".join(map(lambda x: "(?:%s)" % x, facetvalue)))
                         result = model.match(value)
                         if result is None:
-                            raise ValueError("value doesn't follow the pattern %s" % facetvalue)
+                            if len(facetvalue) > 1:   
+                                raise ValueError("value doesn't follow any of the patterns %s" % ",".join(facetvalue))
+                            else:
+                                raise ValueError("value doesn't follow the pattern %s" % facetvalue[0])
             return True
         
         def SimpleTypeInitialValue():
@@ -478,19 +481,25 @@
     simpleType["generate"] = GenerateSimpleType
     return simpleType
 
+def ReduceSimpleType(factory, attributes, elements):
+    # Reduce all the simple type children
+    annotations, children = factory.ReduceElements(elements)
+    
+    simpleType = CreateSimpleType(factory, attributes, children[0])
+    simpleType["doc"] = annotations
+    
+    return simpleType
 
 # Complex type
 
-def ExtractAttributes(factory, elements, base = None):
+def ExtractAttributes(factory, elements, base=None):
+    attrs = []
+    attrnames = {}
     if base is not None:
-        basetypeinfos = factory.FindSchemaElement(base, COMPLEXTYPE)
-        if isinstance(basetypeinfos, (UnicodeType, StringType)):
-            attrnames = {}
-        else:
+        basetypeinfos = factory.FindSchemaElement(base)
+        if not isinstance(basetypeinfos, (UnicodeType, StringType)) and basetypeinfos["type"] == COMPLEXTYPE:
             attrnames = dict(map(lambda x:(x["name"], True), basetypeinfos["attributes"]))
-    else:
-        attrnames = {}
-    attrs = []
+        
     for element in elements:
         if element["type"] == ATTRIBUTE:
             if attrnames.get(element["name"], False):
@@ -524,13 +533,15 @@
     
     while len(children) > 0 and children[0]["type"] in ALL_FACETS:
         restriction["facets"].append(children.pop(0))
-    restriction["attributes"] = ExtractAttributes(factory, children)
+    restriction["attributes"] = ExtractAttributes(factory, children, restriction["base"])
     return restriction
 
 
 def ReduceExtension(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    extension = {"type": "extension", "attributes": [], "elements": [], "base": attributes.get("base", None), "doc": annotations}
+    if not attributes.has_key("base"):
+        raise ValueError("No base type has been defined for extension!")
+    extension = {"type": "extension", "attributes": [], "elements": [], "base": attributes["base"], "doc": annotations}
     if len(children) > 0:
         if children[0]["type"] in ["group", "all", CHOICE, "sequence"]:
             group = children.pop(0)
@@ -550,13 +561,33 @@
                     content = elmtgroup.copy()
                     content["name"] = "content"
                     extension["elements"].append(content)
-        extension["attributes"] = ExtractAttributes(factory, children, extension["base"])
+        extension["attributes"] = ExtractAttributes(factory, children)
     return extension
 
 
 def ReduceSimpleContent(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
+    
     simpleContent = children[0].copy()
+    
+    basetypeinfos = factory.FindSchemaElement(simpleContent["base"])
+    if basetypeinfos["type"] == SIMPLETYPE:
+        contenttypeinfos = simpleContent.copy()
+        simpleContent.pop("base")
+    elif basetypeinfos["type"] == COMPLEXTYPE and \
+         len(basetypeinfos["elements"]) == 1 and \
+         basetypeinfos["elements"][0]["name"] == "content" and \
+         basetypeinfos["elements"][0].has_key("elmt_type") and \
+         basetypeinfos["elements"][0]["elmt_type"]["type"] == SIMPLETYPE:
+        contenttypeinfos = simpleContent.copy()
+        contenttypeinfos["base"] = basetypeinfos["elements"][0]["elmt_type"]
+    else:
+        raise ValueError("No compatible base type defined for simpleContent!")
+    contenttypeinfos = CreateSimpleType(factory, attributes, contenttypeinfos)
+    
+    simpleContent["elements"] = [{"name": "content", "type": ELEMENT,
+                                  "elmt_type": contenttypeinfos, "doc": annotations,
+                                  "minOccurs": 1, "maxOccurs": 1}]
     simpleContent["type"] = "simpleContent"
     return simpleContent
 
@@ -570,7 +601,7 @@
 
 def ReduceComplexType(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-        
+    
     if len(children) > 0:
         if children[0]["type"] in ["simpleContent", "complexContent"]:
             complexType = children[0].copy()
@@ -582,15 +613,24 @@
             complexType.update(attributes)
             group = children.pop(0)
             if group["type"] in ["all", "sequence"]:
-                if group["minOccurs"] == 0 or group["maxOccurs"] != 1:
-                    if len(group["elements"]) > 1:
-                        raise ValueError("Not supported yet!")
-                    if group["minOccurs"] == 0:
-                        group["elements"][0]["minOccurs"] = group["minOccurs"]
-                    if group["maxOccurs"] != 1:
-                        group["elements"][0]["maxOccurs"] = group["maxOccurs"]
-                complexType["elements"] = group["elements"]
-                complexType["order"] = group["order"]
+                choice_number = 0
+                for element in group["elements"]:
+                    if element["type"] == CHOICE:
+                        choice_number += 1
+                if (group["minOccurs"] == 0 or group["maxOccurs"] != 1) and len(group["elements"]) > 1 or choice_number > 1:
+                    content = {"type": CHOICE, "name": "content", "choices": [group], "minOccurs": 1, "maxOccurs": 1}
+                    complexType["elements"].append(content)
+                else:
+                    if len(group["elements"]) == 1:
+                        if group["minOccurs"] == 0:
+                            group["elements"][0]["minOccurs"] = group["minOccurs"]
+                        if group["maxOccurs"] != 1:
+                            group["elements"][0]["maxOccurs"] = group["maxOccurs"]
+                    for element in group["elements"]:
+                        if element["type"] == CHOICE:
+                            element["name"] = "content"
+                    complexType["elements"] = group["elements"]
+                    complexType["order"] = group["order"]
             elif group["type"] == CHOICE:
                 content = group.copy()
                 content["name"] = "content"
@@ -673,23 +713,32 @@
     return any
 
 def ReduceElement(factory, attributes, elements):
+    annotations, children = factory.ReduceElements(elements)
+    
+    types = []
+    constraints = []
+    for child in children:
+        if child["type"] == CONSTRAINT:
+            constraints.append(child)
+        else:
+            types.append(child)
+    
     if attributes.has_key("default") and attributes.has_key("fixed"):
         raise ValueError("\"default\" and \"fixed\" can't be defined at the same time!")
     
     if attributes.has_key("ref"):
-        annotations, children = factory.ReduceElements(elements)
-        
         for attr in ["name", "default", "fixed", "form", "block", "type"]:
             if attributes.has_key(attr):
                 raise ValueError("\"ref\" and \"%s\" can't be defined at the same time!" % attr)
         if attributes.has_key("nillable"):
             raise ValueError("\"ref\" and \"nillable\" can't be defined at the same time!")
-        if len(children) > 0:
+        if len(types) > 0:
             raise ValueError("No type and no constraints can be defined where \"ref\" is defined!")
     
         infos = factory.FindSchemaElement(attributes["ref"], ELEMENT)
         if infos is not None:
             element = infos.copy()
+            element["constraints"] = constraints
             element["minOccurs"] = attributes["minOccurs"]
             element["maxOccurs"] = attributes["maxOccurs"]
             return element
@@ -697,12 +746,10 @@
             raise ValueError("\"%s\" base type isn't defined or circular referenced!" % name)
     
     elif attributes.has_key("name"):
-        annotations, children = factory.ReduceElements(elements)
-        
-        element = {"type": ELEMENT, "elmt_type": attributes.get("type", None), "doc": annotations}
-        if len(children) > 0:
+        element = {"type": ELEMENT, "elmt_type": attributes.get("type", None), "constraints": constraints, "doc": annotations}
+        if len(types) > 0:
             if element["elmt_type"] is None:
-                element["elmt_type"] = children[0]
+                element["elmt_type"] = types[0]
             else:
                 raise ValueError("Only one type can be defined for attribute!")
         elif element["elmt_type"] is None:
@@ -738,7 +785,9 @@
         if child["type"] in [ELEMENT, ANY, TAG]:
             choices.append(child)
         elif child["type"] == "sequence":
-            raise ValueError("\"sequence\" in \"choice\" is not supported. Create instead a new complex type!")
+            child["minOccurs"] = child["maxOccurs"] = 1
+            choices.append(child)
+            #raise ValueError("\"sequence\" in \"choice\" is not supported. Create instead a new complex type!")
         elif child["type"] == CHOICE:
             choices.extend(child["choices"])
         elif child["type"] == "group":
@@ -767,12 +816,8 @@
     
     sequence = []
     for child in children:
-        if child["type"] in [ELEMENT, ANY, TAG]:
+        if child["type"] in [ELEMENT, ANY, TAG, CHOICE]:
             sequence.append(child)
-        elif child["type"] == CHOICE:
-            content = child.copy()
-            content["name"] = "content"
-            sequence.append(content)
         elif child["type"] == "sequence":
             sequence.extend(child["elements"])
         elif child["type"] == "group":
@@ -815,23 +860,38 @@
 
 def ReduceUnique(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    raise ValueError("\"unique\" element isn't supported yet!")
-
+    
+    unique = {"type": CONSTRAINT, "const_type": "unique", "selector": children[0], "fields": children[1:]}
+    unique.update(attributes)
+    return unique
+    
 def ReduceKey(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    raise ValueError("\"key\" element isn't supported yet!")
-    
+    
+    key = {"type": CONSTRAINT, "const_type": "key", "selector": children[0], "fields": children[1:]}
+    key.update(attributes)
+    return key
+
 def ReduceKeyRef(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    raise ValueError("\"keyref\" element isn't supported yet!")
+    
+    keyref = {"type": CONSTRAINT, "const_type": "keyref", "selector": children[0], "fields": children[1:]}
+    keyref.update(attributes)
+    return keyref
     
 def ReduceSelector(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    raise ValueError("\"selector\" element isn't supported yet!")
+    
+    selector = {"type": CONSTRAINT, "const_type": "selector"}
+    selector.update(attributes)
+    return selector
 
 def ReduceField(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    raise ValueError("\"field\" element isn't supported yet!")
+    
+    field = {"type": CONSTRAINT, "const_type": "field"}
+    field.update(attributes)
+    return field
     
 
 # Inclusion elements
@@ -842,7 +902,25 @@
 
 def ReduceInclude(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
-    raise ValueError("\"include\" element isn't supported yet!")
+    
+    if factory.FileName is None:
+        raise ValueError("Include in XSD string not yet supported")
+    filepath = attributes["schemaLocation"]
+    if filepath is not None and not os.path.exists(filepath):
+        filepath = os.path.join(factory.BaseFolder, filepath)
+        if not os.path.exists(filepath):
+            raise ValueError("No file '%s' found for include" % attributes["schemaLocation"])
+    xsdfile = open(filepath, 'r')
+    include_factory = XSDClassFactory(minidom.parse(xsdfile), filepath)
+    xsdfile.close()
+    include_factory.CreateClasses()
+    
+    if factory.TargetNamespace == include_factory.TargetNamespace:
+        factory.Namespaces[factory.TargetNamespace].update(include_factory.Namespaces[include_factory.TargetNamespace])
+    else:
+        factory.Namespaces[include_factory.TargetNamespace] = include_factory.Namespaces[include_factory.TargetNamespace]
+    factory.ComputedClasses.update(include_factory.ComputedClasses)
+    return None
     
 def ReduceRedefine(factory, attributes, elements):
     annotations, children = factory.ReduceElements(elements)
@@ -885,7 +963,7 @@
             return False
         for name, value in schema.items():
             ref_value = reference.get(name, None)
-            if ref_value is None:
+            if ref_value is None and value != None:
                 return False
             result = CompareSchema(value, ref_value)
             if not result:
@@ -905,8 +983,8 @@
 
 class XSDClassFactory(ClassFactory):
 
-    def __init__(self, document, debug = False):
-        ClassFactory.__init__(self, document, debug)
+    def __init__(self, document, filepath=None, debug=False):
+        ClassFactory.__init__(self, document, filepath, debug)
         self.Namespaces["xml"] = {
             "lang": {
                 "type": SYNTAXATTRIBUTE, 
@@ -943,7 +1021,10 @@
         }
         
     def ParseSchema(self):
-        schema = self.Document.childNodes[0]
+        for child in self.Document.childNodes:
+            if child.nodeType == self.Document.ELEMENT_NODE:
+                schema = child
+                break
         for qualified_name, attr in schema._attrs.items():
             value = GetAttributeValue(attr)
             if value == "http://www.w3.org/2001/XMLSchema":
@@ -957,7 +1038,7 @@
         self.Schema = XSD_NAMESPACE["schema"]["extract"]["default"](self, schema)
         ReduceSchema(self, self.Schema[1], self.Schema[2])
 
-    def FindSchemaElement(self, element_name, element_type):
+    def FindSchemaElement(self, element_name, element_type=None):
         namespace, name = DecomposeQualifiedName(element_name)
         element = self.GetQualifiedNameInfos(name, namespace, True)
         if element is None and namespace == self.TargetNamespace and name not in self.CurrentCompilations:
@@ -970,29 +1051,28 @@
             if name in self.CurrentCompilations:
                 if self.Debug:
                     print "Warning : \"%s\" is circular referenced!" % element_name
-                return element_name
             else:
                 raise ValueError("\"%s\" isn't defined!" % element_name)
-        if element["type"] != element_type:
-            raise ValueError("\"%s\" isn't a group!" % element_name)
+        if element_type is not None and element["type"] != element_type:
+            raise ValueError("\"%s\" isn't of the expected type!" % element_name)
         return element
     
     def CreateSchemaElement(self, element_name, element_type):
         for type, attributes, elements in self.Schema[2]:
             namespace, name = DecomposeQualifiedName(type)
-            if attributes.has_key("name") and attributes["name"] == element_name:
+            if attributes.get("name", None) == element_name:
                 element_infos = None
-                if element_type == ATTRIBUTE and name == "attribute":
+                if element_type in (ATTRIBUTE, None) and name == "attribute":
                     element_infos = ReduceAttribute(self, attributes, elements)
-                elif element_type == ELEMENT and name == "element":
+                elif element_type in (ELEMENT, None) and name == "element":
                     element_infos = ReduceElement(self, attributes, elements)
-                elif element_type == ATTRIBUTESGROUP and name == "attributeGroup":
+                elif element_type in (ATTRIBUTESGROUP, None) and name == "attributeGroup":
                     element_infos = ReduceAttributeGroup(self, attributes, elements)
-                elif element_type == ELEMENTSGROUP and name == "group":
+                elif element_type in (ELEMENTSGROUP, None) and name == "group":
                     element_infos = ReduceGroup(self, attributes, elements)
-                elif element_type == SIMPLETYPE and name == "simpleType":
+                elif element_type in (SIMPLETYPE, None) and name == "simpleType":
                     element_infos = ReduceSimpleType(self, attributes, elements)
-                elif element_type == COMPLEXTYPE and name == "complexType":
+                elif element_type in (COMPLEXTYPE, None) and name == "complexType":
                     element_infos = ReduceComplexType(self, attributes, elements)
                 if element_infos is not None:
                     self.Namespaces[self.TargetNamespace][element_name] = element_infos
@@ -1002,20 +1082,17 @@
 """
 This function opens the xsd file and generate the classes from the xml tree
 """
-def GenerateClassesFromXSD(filename, declare = False):
-    xsdfile = open(filename, 'r')
-    factory = XSDClassFactory(minidom.parse(xsdfile))
+def GenerateClassesFromXSD(filepath, declare=False):
+    xsdfile = open(filepath, 'r')
+    factory = XSDClassFactory(minidom.parse(xsdfile), filepath)
     xsdfile.close()
-    factory.ParseSchema()
     return GenerateClasses(factory, declare)
 
 """
 This function generate the classes from the xsd given as a string
 """
-def GenerateClassesFromXSDstring(xsdstring, declare = False):
-    factory = XSDClassFactory(minidom.parseString(xsdstring))
-    factory.ParseSchema()
-    return GenerateClasses(factory, declare)
+def GenerateClassesFromXSDstring(xsdstring, declare=False):
+    return GenerateClasses(XSDClassFactory(minidom.parseString(xsdstring)), declare)
 
 
 #-------------------------------------------------------------------------------
@@ -1371,7 +1448,7 @@
         "type": SYNTAXELEMENT, 
         "extract": {
             "default": GenerateElement("key", ["id", "name"], 
-                re.compile("((?:annotation )?(?:selector |(?:field )+))"))
+                re.compile("((?:annotation )?(?:selector (?:field )+))"))
         },
         "reduce": ReduceKey
     },
@@ -1387,7 +1464,7 @@
         "type": SYNTAXELEMENT, 
         "extract": {
             "default": GenerateElement("keyref", ["id", "name", "refer"], 
-                re.compile("((?:annotation )?(?:selector |(?:field )+))"))
+                re.compile("((?:annotation )?(?:selector (?:field )+))"))
         },
         "reduce": ReduceKeyRef
     },
@@ -1986,7 +2063,8 @@
     "xpath": {
         "type": SYNTAXATTRIBUTE, 
         "extract": {
-            "default": NotSupportedYet("xpath")
+#            "default": NotSupportedYet("xpath")
+            "default": GetAttributeValue
         }
     },
     
@@ -2031,7 +2109,7 @@
         "facets": STRING_FACETS,
         "generate": GenerateSimpleTypeXMLText(str),
         "initial": lambda: 0,
-        "check": lambda x: isinstance(x, IntType)
+        "check": lambda x: isinstance(x, (IntType, LongType))
     },
     
     "hexBinary": {
@@ -2041,7 +2119,7 @@
         "facets": STRING_FACETS,
         "generate": GenerateSimpleTypeXMLText(lambda x: ("%."+str(int(round(len("%X"%x)/2.)*2))+"X")%x),
         "initial": lambda: 0,
-        "check": lambda x: isinstance(x, IntType)
+        "check": lambda x: isinstance(x, (IntType, LongType))
     },
 
     "integer": {