xmlclass/xmlclass.py
branchpython3
changeset 3750 f62625418bff
parent 3589 a0b645a934c9
child 3752 9f6f46dbe3ae
equal deleted inserted replaced
3749:fda6c1a37662 3750:f62625418bff
    21 # You should have received a copy of the GNU General Public License
    21 # You should have received a copy of the GNU General Public License
    22 # along with this program; if not, write to the Free Software
    22 # along with this program; if not, write to the Free Software
    23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    24 
    24 
    25 
    25 
    26 from __future__ import absolute_import
    26 
    27 from __future__ import print_function
    27 
    28 import os
    28 import os
    29 import re
    29 import re
    30 import datetime
    30 import datetime
    31 from functools import reduce
    31 from functools import reduce
    32 from xml.dom import minidom
    32 from xml.dom import minidom
   105 
   105 
   106 
   106 
   107 [
   107 [
   108     SYNTAXELEMENT, SYNTAXATTRIBUTE, SIMPLETYPE, COMPLEXTYPE, COMPILEDCOMPLEXTYPE,
   108     SYNTAXELEMENT, SYNTAXATTRIBUTE, SIMPLETYPE, COMPLEXTYPE, COMPILEDCOMPLEXTYPE,
   109     ATTRIBUTESGROUP, ELEMENTSGROUP, ATTRIBUTE, ELEMENT, CHOICE, ANY, TAG, CONSTRAINT,
   109     ATTRIBUTESGROUP, ELEMENTSGROUP, ATTRIBUTE, ELEMENT, CHOICE, ANY, TAG, CONSTRAINT,
   110 ] = range(13)
   110 ] = list(range(13))
   111 
   111 
   112 
   112 
   113 def NotSupportedYet(type):
   113 def NotSupportedYet(type):
   114     """
   114     """
   115     Function that generates a function that point out to user that datatype
   115     Function that generates a function that point out to user that datatype
   126     """
   126     """
   127     This function calculates the number of whitespace for indentation
   127     This function calculates the number of whitespace for indentation
   128     """
   128     """
   129     first = indent * 2
   129     first = indent * 2
   130     second = first + len(balise) + 1
   130     second = first + len(balise) + 1
   131     return u'\t'.expandtabs(first), u'\t'.expandtabs(second)
   131     return '\t'.expandtabs(first), '\t'.expandtabs(second)
   132 
   132 
   133 
   133 
   134 def GetAttributeValue(attr, extract=True):
   134 def GetAttributeValue(attr, extract=True):
   135     """
   135     """
   136     Function that extracts data from a tree node
   136     Function that extracts data from a tree node
   142         return attr
   142         return attr
   143     if len(attr.childNodes) == 1:
   143     if len(attr.childNodes) == 1:
   144         return text(unescape(attr.childNodes[0].data))
   144         return text(unescape(attr.childNodes[0].data))
   145     else:
   145     else:
   146         # content is a CDATA
   146         # content is a CDATA
   147         txt = u''
   147         txt = ''
   148         for node in attr.childNodes:
   148         for node in attr.childNodes:
   149             if not (node.nodeName == "#text" and node.data.strip() == u''):
   149             if not (node.nodeName == "#text" and node.data.strip() == ''):
   150                 txt += text(unescape(node.data))
   150                 txt += text(unescape(node.data))
   151         return text
   151         return text
   152 
   152 
   153 
   153 
   154 def GetNormalizedString(attr, extract=True):
   154 def GetNormalizedString(attr, extract=True):
   632             def initial_value():
   632             def initial_value():
   633                 value = infos["elmt_type"]["initial"]()
   633                 value = infos["elmt_type"]["initial"]()
   634                 if infos["type"] != ANY:
   634                 if infos["type"] != ANY:
   635                     DefaultElementClass.__setattr__(value, "tag", element_name)
   635                     DefaultElementClass.__setattr__(value, "tag", element_name)
   636                 return value
   636                 return value
   637         return [initial_value() for dummy in xrange(infos["minOccurs"])]
   637         return [initial_value() for dummy in range(infos["minOccurs"])]
   638     else:
   638     else:
   639         return []
   639         return []
   640 
   640 
   641 
   641 
   642 def GetContentInfos(name, choices):
   642 def GetContentInfos(name, choices):
   689             if choice_name in choices_dict:
   689             if choice_name in choices_dict:
   690                 raise ValueError("'%s' element defined two times in choice" % choice_name)
   690                 raise ValueError("'%s' element defined two times in choice" % choice_name)
   691             choices_dict[choice_name] = infos
   691             choices_dict[choice_name] = infos
   692     prefix = ("%s:" % factory.TargetNamespace
   692     prefix = ("%s:" % factory.TargetNamespace
   693               if factory.TargetNamespace is not None else "")
   693               if factory.TargetNamespace is not None else "")
   694     choices_xpath = "|".join(map(lambda x: prefix + x, choices_dict.keys()))
   694     choices_xpath = "|".join([prefix + x for x in list(choices_dict.keys())])
   695 
   695 
   696     def GetContentInitial():
   696     def GetContentInitial():
   697         content_name, infos = choices[0]
   697         content_name, infos = choices[0]
   698         if content_name == "sequence":
   698         if content_name == "sequence":
   699             content_value = []
   699             content_value = []
   700             for dummy in xrange(infos["minOccurs"]):
   700             for dummy in range(infos["minOccurs"]):
   701                 for element_infos in infos["elements"]:
   701                 for element_infos in infos["elements"]:
   702                     content_value.extend(GetElementInitialValue(factory, element_infos))
   702                     content_value.extend(GetElementInitialValue(factory, element_infos))
   703         else:
   703         else:
   704             content_value = GetElementInitialValue(factory, infos)
   704             content_value = GetElementInitialValue(factory, infos)
   705         return content_value
   705         return content_value
   799 
   799 
   800     def GetQualifiedNameInfos(self, name, namespace=None, canbenone=False):
   800     def GetQualifiedNameInfos(self, name, namespace=None, canbenone=False):
   801         if namespace is None:
   801         if namespace is None:
   802             if name in self.Namespaces[self.SchemaNamespace]:
   802             if name in self.Namespaces[self.SchemaNamespace]:
   803                 return self.Namespaces[self.SchemaNamespace][name]
   803                 return self.Namespaces[self.SchemaNamespace][name]
   804             for space, elements in self.Namespaces.iteritems():
   804             for space, elements in self.Namespaces.items():
   805                 if space != self.SchemaNamespace and name in elements:
   805                 if space != self.SchemaNamespace and name in elements:
   806                     return elements[name]
   806                     return elements[name]
   807             parts = name.split("_", 1)
   807             parts = name.split("_", 1)
   808             if len(parts) > 1:
   808             if len(parts) > 1:
   809                 group = self.GetQualifiedNameInfos(parts[0], namespace)
   809                 group = self.GetQualifiedNameInfos(parts[0], namespace)
   841 
   841 
   842     def SplitQualifiedName(self, name, namespace=None, canbenone=False):
   842     def SplitQualifiedName(self, name, namespace=None, canbenone=False):
   843         if namespace is None:
   843         if namespace is None:
   844             if name in self.Namespaces[self.SchemaNamespace]:
   844             if name in self.Namespaces[self.SchemaNamespace]:
   845                 return name, None
   845                 return name, None
   846             for space, elements in self.Namespaces.items():
   846             for space, elements in list(self.Namespaces.items()):
   847                 if space != self.SchemaNamespace and name in elements:
   847                 if space != self.SchemaNamespace and name in elements:
   848                     return name, None
   848                     return name, None
   849             parts = name.split("_", 1)
   849             parts = name.split("_", 1)
   850             if len(parts) > 1:
   850             if len(parts) > 1:
   851                 group = self.GetQualifiedNameInfos(parts[0], namespace)
   851                 group = self.GetQualifiedNameInfos(parts[0], namespace)
   881             raise ValueError("Unknown namespace \"%s\"!" % namespace)
   881             raise ValueError("Unknown namespace \"%s\"!" % namespace)
   882         return None, None
   882         return None, None
   883 
   883 
   884     def ExtractNodeAttrs(self, element_name, node, valid_attrs):
   884     def ExtractNodeAttrs(self, element_name, node, valid_attrs):
   885         attrs = {}
   885         attrs = {}
   886         for qualified_name, attr in node._attrs.items():
   886         for qualified_name, attr in list(node._attrs.items()):
   887             namespace, name = DecomposeQualifiedName(qualified_name)
   887             namespace, name = DecomposeQualifiedName(qualified_name)
   888             if name in valid_attrs:
   888             if name in valid_attrs:
   889                 infos = self.GetQualifiedNameInfos(name, namespace)
   889                 infos = self.GetQualifiedNameInfos(name, namespace)
   890                 if infos["type"] != SYNTAXATTRIBUTE:
   890                 if infos["type"] != SYNTAXATTRIBUTE:
   891                     raise ValueError("\"%s\" can't be a member attribute!" % name)
   891                     raise ValueError("\"%s\" can't be a member attribute!" % name)
  1014             return typeinfos
  1014             return typeinfos
  1015 
  1015 
  1016     def GetEquivalentParents(self, parent):
  1016     def GetEquivalentParents(self, parent):
  1017         return reduce(lambda x, y: x + y,
  1017         return reduce(lambda x, y: x + y,
  1018                       [[p] + self.GetEquivalentParents(p)
  1018                       [[p] + self.GetEquivalentParents(p)
  1019                        for p in self.EquivalentClassesParent.get(parent, {}).keys()], [])
  1019                        for p in list(self.EquivalentClassesParent.get(parent, {}).keys())], [])
  1020 
  1020 
  1021     def CreateClasses(self):
  1021     def CreateClasses(self):
  1022         """
  1022         """
  1023         Method that generates the classes
  1023         Method that generates the classes
  1024         """
  1024         """
  1025         self.ParseSchema()
  1025         self.ParseSchema()
  1026         for name, infos in self.Namespaces[self.TargetNamespace].items():
  1026         for name, infos in list(self.Namespaces[self.TargetNamespace].items()):
  1027             if infos["type"] == ELEMENT:
  1027             if infos["type"] == ELEMENT:
  1028                 if not isinstance(infos["elmt_type"], string_types) and \
  1028                 if not isinstance(infos["elmt_type"], string_types) and \
  1029                    infos["elmt_type"]["type"] == COMPLEXTYPE:
  1029                    infos["elmt_type"]["type"] == COMPLEXTYPE:
  1030                     self.ComputeAfter.append((name, None, infos["elmt_type"], True))
  1030                     self.ComputeAfter.append((name, None, infos["elmt_type"], True))
  1031                     while len(self.ComputeAfter) > 0:
  1031                     while len(self.ComputeAfter) > 0:
  1053                             result = self.CreateClass(*self.ComputeAfter.pop(0))
  1053                             result = self.CreateClass(*self.ComputeAfter.pop(0))
  1054                             if result is not None and \
  1054                             if result is not None and \
  1055                                not isinstance(result, string_types):
  1055                                not isinstance(result, string_types):
  1056                                 self.Namespaces[self.TargetNamespace][result["name"]] = result
  1056                                 self.Namespaces[self.TargetNamespace][result["name"]] = result
  1057 
  1057 
  1058         for name, parents in self.ComputedClassesLookUp.iteritems():
  1058         for name, parents in self.ComputedClassesLookUp.items():
  1059             if isinstance(parents, dict):
  1059             if isinstance(parents, dict):
  1060                 computed_classes = parents.items()
  1060                 computed_classes = list(parents.items())
  1061             elif parents[1] is not None:
  1061             elif parents[1] is not None:
  1062                 computed_classes = [(self.etreeNamespaceFormat % parents[1], parents[0])]
  1062                 computed_classes = [(self.etreeNamespaceFormat % parents[1], parents[0])]
  1063             else:
  1063             else:
  1064                 computed_classes = []
  1064                 computed_classes = []
  1065             for parent, computed_class in computed_classes:
  1065             for parent, computed_class in computed_classes:
  1099                         return classname
  1099                         return classname
  1100             elif result is not None:
  1100             elif result is not None:
  1101                 if self.FileName is not None:
  1101                 if self.FileName is not None:
  1102                     classinfos["base"] = self.ComputedClasses[self.FileName].get(result["name"], None)
  1102                     classinfos["base"] = self.ComputedClasses[self.FileName].get(result["name"], None)
  1103                     if classinfos["base"] is None:
  1103                     if classinfos["base"] is None:
  1104                         for filename, classes in self.ComputedClasses.iteritems():
  1104                         for filename, classes in self.ComputedClasses.items():
  1105                             if filename != self.FileName:
  1105                             if filename != self.FileName:
  1106                                 classinfos["base"] = classes.get(result["name"], None)
  1106                                 classinfos["base"] = classes.get(result["name"], None)
  1107                                 if classinfos["base"] is not None:
  1107                                 if classinfos["base"] is not None:
  1108                                     break
  1108                                     break
  1109                 else:
  1109                 else:
  1195 
  1195 
  1196     def PrintClasses(self):
  1196     def PrintClasses(self):
  1197         """
  1197         """
  1198         Method that print the classes generated
  1198         Method that print the classes generated
  1199         """
  1199         """
  1200         items = self.ComputedClasses.items()
  1200         items = list(self.ComputedClasses.items())
  1201         items.sort()
  1201         items.sort()
  1202         if self.FileName is not None:
  1202         if self.FileName is not None:
  1203             for filename, classes in items:
  1203             for filename, classes in items:
  1204                 print("File '%s':" % filename)
  1204                 print("File '%s':" % filename)
  1205                 class_items = classes.items()
  1205                 class_items = list(classes.items())
  1206                 class_items.sort()
  1206                 class_items.sort()
  1207                 for classname, xmlclass in class_items:
  1207                 for classname, xmlclass in class_items:
  1208                     print("%s: %s" % (classname, str(xmlclass)))
  1208                     print("%s: %s" % (classname, str(xmlclass)))
  1209         else:
  1209         else:
  1210             for classname, xmlclass in items:
  1210             for classname, xmlclass in items:
  1211                 print("%s: %s" % (classname, str(xmlclass)))
  1211                 print("%s: %s" % (classname, str(xmlclass)))
  1212 
  1212 
  1213     def PrintClassNames(self):
  1213     def PrintClassNames(self):
  1214         classnames = self.XMLClassDefinitions.keys()
  1214         classnames = list(self.XMLClassDefinitions.keys())
  1215         classnames.sort()
  1215         classnames.sort()
  1216         for classname in classnames:
  1216         for classname in classnames:
  1217             print(classname)
  1217             print(classname)
  1218 
  1218 
  1219 
  1219 
  1317             else:
  1317             else:
  1318                 element_name = factory.etreeNamespaceFormat % name
  1318                 element_name = factory.etreeNamespaceFormat % name
  1319                 if element_infos["maxOccurs"] == "unbounded" or element_infos["maxOccurs"] > 1:
  1319                 if element_infos["maxOccurs"] == "unbounded" or element_infos["maxOccurs"] > 1:
  1320                     values = self.findall(element_name)
  1320                     values = self.findall(element_name)
  1321                     if element_infos["elmt_type"]["type"] == SIMPLETYPE:
  1321                     if element_infos["elmt_type"]["type"] == SIMPLETYPE:
  1322                         return map(lambda value:
  1322                         return [element_infos["elmt_type"]["extract"](value.text, extract=False) for value in values]
  1323                                    element_infos["elmt_type"]["extract"](value.text, extract=False),
       
  1324                                    values)
       
  1325                     return values
  1323                     return values
  1326                 else:
  1324                 else:
  1327                     value = self.find(element_name)
  1325                     value = self.find(element_name)
  1328                     if element_infos["elmt_type"]["type"] == SIMPLETYPE:
  1326                     if element_infos["elmt_type"]["type"] == SIMPLETYPE:
  1329                         return element_infos["elmt_type"]["extract"](value.text, extract=False)
  1327                         return element_infos["elmt_type"]["extract"](value.text, extract=False)
  1373 
  1371 
  1374                 for element in self.xpath(element_xpath, namespaces=factory.NSMAP):
  1372                 for element in self.xpath(element_xpath, namespaces=factory.NSMAP):
  1375                     self.remove(element)
  1373                     self.remove(element)
  1376 
  1374 
  1377                 if value is not None:
  1375                 if value is not None:
  1378                     element_idx = elements.keys().index(name)
  1376                     element_idx = list(elements.keys()).index(name)
  1379                     if element_idx > 0:
  1377                     if element_idx > 0:
  1380                         previous_elements_xpath = "|".join(map(
  1378                         previous_elements_xpath = "|".join([prefix + x
  1381                             lambda x: prefix + x
       
  1382                             if x != "content"
  1379                             if x != "content"
  1383                             else elements["content"]["elmt_type"]["choices_xpath"].path,
  1380                             else elements["content"]["elmt_type"]["choices_xpath"].path for x in list(elements.keys())[:element_idx]])
  1384                             elements.keys()[:element_idx]))
       
  1385 
  1381 
  1386                         insertion_point = len(self.xpath(previous_elements_xpath, namespaces=factory.NSMAP))
  1382                         insertion_point = len(self.xpath(previous_elements_xpath, namespaces=factory.NSMAP))
  1387                     else:
  1383                     else:
  1388                         insertion_point = 0
  1384                         insertion_point = 0
  1389 
  1385 
  1485         else:
  1481         else:
  1486             if not derived:
  1482             if not derived:
  1487                 children.extend(self.getElementAttributes())
  1483                 children.extend(self.getElementAttributes())
  1488             if "base" in classinfos:
  1484             if "base" in classinfos:
  1489                 children.extend(classinfos["base"].getElementInfos(self, name, derived=True)["children"])
  1485                 children.extend(classinfos["base"].getElementInfos(self, name, derived=True)["children"])
  1490             for element_name, element in elements.items():
  1486             for element_name, element in list(elements.items()):
  1491                 if element["minOccurs"] == 0:
  1487                 if element["minOccurs"] == 0:
  1492                     use = "optional"
  1488                     use = "optional"
  1493                 if element_name == "content" and element["type"] == CHOICE:
  1489                 if element_name == "content" and element["type"] == CHOICE:
  1494                     attr_type = [(choice["name"], None) for choice in element["choices"]]
  1490                     attr_type = [(choice["name"], None) for choice in element["choices"]]
  1495                     if self.content is None:
  1491                     if self.content is None:
  1585                 self.set(attribute["name"], attribute["attr_type"]["generate"](attribute["attr_type"]["initial"]()))
  1581                 self.set(attribute["name"], attribute["attr_type"]["generate"](attribute["attr_type"]["initial"]()))
  1586         for element in classinfos["elements"]:
  1582         for element in classinfos["elements"]:
  1587             if element["type"] != CHOICE:
  1583             if element["type"] != CHOICE:
  1588                 initial = GetElementInitialValue(factory, element)
  1584                 initial = GetElementInitialValue(factory, element)
  1589                 if initial is not None:
  1585                 if initial is not None:
  1590                     map(self.append, initial)
  1586                     list(map(self.append, initial))
  1591     return initMethod
  1587     return initMethod
  1592 
  1588 
  1593 
  1589 
  1594 def generateSetMethod(attr):
  1590 def generateSetMethod(attr):
  1595     def setMethod(self, value):
  1591     def setMethod(self, value):
  1840         self.NSMAP = namespaces
  1836         self.NSMAP = namespaces
  1841         targetNamespace = etree.QName(default_namespace_format % "d").namespace
  1837         targetNamespace = etree.QName(default_namespace_format % "d").namespace
  1842         if targetNamespace is not None:
  1838         if targetNamespace is not None:
  1843             self.RootNSMAP = {
  1839             self.RootNSMAP = {
  1844                 name if targetNamespace != uri else None: uri
  1840                 name if targetNamespace != uri else None: uri
  1845                 for name, uri in namespaces.iteritems()}
  1841                 for name, uri in namespaces.items()}
  1846         else:
  1842         else:
  1847             self.RootNSMAP = namespaces
  1843             self.RootNSMAP = namespaces
  1848         self.BaseClass = base_class
  1844         self.BaseClass = base_class
  1849         self.XSDSchema = xsd_schema
  1845         self.XSDSchema = xsd_schema
  1850 
  1846 
  1943     factory.Parser = parser
  1939     factory.Parser = parser
  1944 
  1940 
  1945     ComputedClasses = factory.CreateClasses()
  1941     ComputedClasses = factory.CreateClasses()
  1946     if factory.FileName is not None:
  1942     if factory.FileName is not None:
  1947         ComputedClasses = ComputedClasses[factory.FileName]
  1943         ComputedClasses = ComputedClasses[factory.FileName]
  1948     BaseClass = [(name, XSDclass) for name, XSDclass in ComputedClasses.items() if XSDclass.IsBaseClass]
  1944     BaseClass = [(name, XSDclass) for name, XSDclass in list(ComputedClasses.items()) if XSDclass.IsBaseClass]
  1949 
  1945 
  1950     parser.initMembers(
  1946     parser.initMembers(
  1951         factory.NSMAP,
  1947         factory.NSMAP,
  1952         factory.etreeNamespaceFormat,
  1948         factory.etreeNamespaceFormat,
  1953         BaseClass[0] if len(BaseClass) == 1 else None,
  1949         BaseClass[0] if len(BaseClass) == 1 else None,