xmlclass/xmlclass.py
branchpython3
changeset 3752 9f6f46dbe3ae
parent 3750 f62625418bff
child 3755 ca814b175391
equal deleted inserted replaced
3751:a80a66ba52d6 3752:9f6f46dbe3ae
    21 # You should have received a copy of the GNU General Public License
    21 # You should have received a copy of the GNU General Public License
    22 # along with this program; if not, write to the Free Software
    22 # along with this program; if not, write to the Free Software
    23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    23 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    24 
    24 
    25 
    25 
    26 
       
    27 
       
    28 import os
    26 import os
    29 import re
    27 import re
    30 import datetime
    28 import datetime
    31 from functools import reduce
    29 from functools import reduce
    32 from xml.dom import minidom
    30 from xml.dom import minidom
    33 from xml.sax.saxutils import unescape
    31 from xml.sax.saxutils import unescape
    34 from collections import OrderedDict
    32 from collections import OrderedDict
    35 from builtins import str as text
    33 
    36 
       
    37 from six import string_types
       
    38 from six.moves import xrange
       
    39 from lxml import etree
    34 from lxml import etree
    40 
    35 
    41 
    36 
    42 def CreateNode(name):
    37 def CreateNode(name):
    43     node = minidom.Node()
    38     node = minidom.Node()
   139     @return: data extracted as string
   134     @return: data extracted as string
   140     """
   135     """
   141     if not extract:
   136     if not extract:
   142         return attr
   137         return attr
   143     if len(attr.childNodes) == 1:
   138     if len(attr.childNodes) == 1:
   144         return text(unescape(attr.childNodes[0].data))
   139         return str(unescape(attr.childNodes[0].data))
   145     else:
   140     else:
   146         # content is a CDATA
   141         # content is a CDATA
   147         txt = ''
   142         txt = ''
   148         for node in attr.childNodes:
   143         for node in attr.childNodes:
   149             if not (node.nodeName == "#text" and node.data.strip() == ''):
   144             if not (node.nodeName == "#text" and node.data.strip() == ''):
   150                 txt += text(unescape(node.data))
   145                 txt += str(unescape(node.data))
   151         return text
   146         return txt
   152 
   147 
   153 
   148 
   154 def GetNormalizedString(attr, extract=True):
   149 def GetNormalizedString(attr, extract=True):
   155     """
   150     """
   156     Function that normalizes a string according to XML 1.0. Replace
   151     Function that normalizes a string according to XML 1.0. Replace
   574     return {
   569     return {
   575         "type": COMPLEXTYPE,
   570         "type": COMPLEXTYPE,
   576         "extract": ExtractAny,
   571         "extract": ExtractAny,
   577         "generate": GenerateAny,
   572         "generate": GenerateAny,
   578         "initial": InitialAny,
   573         "initial": InitialAny,
   579         "check": lambda x: isinstance(x, (string_types, etree.ElementBase))
   574         "check": lambda x: isinstance(x, (str, etree.ElementBase))
   580     }
   575     }
   581 
   576 
   582 
   577 
   583 def GenerateTagInfos(infos):
   578 def GenerateTagInfos(infos):
   584     def ExtractTag(tree):
   579     def ExtractTag(tree):
   610         "check": lambda x: x is None or infos["minOccurs"] == 0 and x
   605         "check": lambda x: x is None or infos["minOccurs"] == 0 and x
   611     }
   606     }
   612 
   607 
   613 
   608 
   614 def FindTypeInfos(factory, infos):
   609 def FindTypeInfos(factory, infos):
   615     if isinstance(infos, string_types):
   610     if isinstance(infos, str):
   616         namespace, name = DecomposeQualifiedName(infos)
   611         namespace, name = DecomposeQualifiedName(infos)
   617         return factory.GetQualifiedNameInfos(name, namespace)
   612         return factory.GetQualifiedNameInfos(name, namespace)
   618     return infos
   613     return infos
   619 
   614 
   620 
   615 
   966         else:
   961         else:
   967             lookup_classes[parent] = typeinfos
   962             lookup_classes[parent] = typeinfos
   968 
   963 
   969     def AddToLookupClass(self, name, parent, typeinfos):
   964     def AddToLookupClass(self, name, parent, typeinfos):
   970         lookup_name = self.etreeNamespaceFormat % name
   965         lookup_name = self.etreeNamespaceFormat % name
   971         if isinstance(typeinfos, string_types):
   966         if isinstance(typeinfos, str):
   972             self.AddEquivalentClass(name, typeinfos)
   967             self.AddEquivalentClass(name, typeinfos)
   973             typeinfos = self.etreeNamespaceFormat % typeinfos
   968             typeinfos = self.etreeNamespaceFormat % typeinfos
   974         lookup_classes = self.ComputedClassesLookUp.get(lookup_name)
   969         lookup_classes = self.ComputedClassesLookUp.get(lookup_name)
   975         if lookup_classes is None:
   970         if lookup_classes is None:
   976             self.ComputedClassesLookUp[lookup_name] = (typeinfos, parent)
   971             self.ComputedClassesLookUp[lookup_name] = (typeinfos, parent)
   984             self.AddDistinctionBetweenParentsInLookupClass(
   979             self.AddDistinctionBetweenParentsInLookupClass(
   985                 lookup_classes, parent, typeinfos)
   980                 lookup_classes, parent, typeinfos)
   986             self.ComputedClassesLookUp[lookup_name] = lookup_classes
   981             self.ComputedClassesLookUp[lookup_name] = lookup_classes
   987 
   982 
   988     def ExtractTypeInfos(self, name, parent, typeinfos):
   983     def ExtractTypeInfos(self, name, parent, typeinfos):
   989         if isinstance(typeinfos, string_types):
   984         if isinstance(typeinfos, str):
   990             namespace, type_name = DecomposeQualifiedName(typeinfos)
   985             namespace, type_name = DecomposeQualifiedName(typeinfos)
   991             infos = self.GetQualifiedNameInfos(type_name, namespace)
   986             infos = self.GetQualifiedNameInfos(type_name, namespace)
   992             if name != "base":
   987             if name != "base":
   993                 if infos["type"] == SIMPLETYPE:
   988                 if infos["type"] == SIMPLETYPE:
   994                     self.AddToLookupClass(name, parent, DefaultElementClass)
   989                     self.AddToLookupClass(name, parent, DefaultElementClass)
   995                 elif namespace == self.TargetNamespace:
   990                 elif namespace == self.TargetNamespace:
   996                     self.AddToLookupClass(name, parent, type_name)
   991                     self.AddToLookupClass(name, parent, type_name)
   997             if infos["type"] == COMPLEXTYPE:
   992             if infos["type"] == COMPLEXTYPE:
   998                 type_name, parent = self.SplitQualifiedName(type_name, namespace)
   993                 type_name, parent = self.SplitQualifiedName(type_name, namespace)
   999                 result = self.CreateClass(type_name, parent, infos)
   994                 result = self.CreateClass(type_name, parent, infos)
  1000                 if result is not None and not isinstance(result, string_types):
   995                 if result is not None and not isinstance(result, str):
  1001                     self.Namespaces[self.TargetNamespace][result["name"]] = result
   996                     self.Namespaces[self.TargetNamespace][result["name"]] = result
  1002                 return result
   997                 return result
  1003             elif infos["type"] == ELEMENT and infos["elmt_type"]["type"] == COMPLEXTYPE:
   998             elif infos["type"] == ELEMENT and infos["elmt_type"]["type"] == COMPLEXTYPE:
  1004                 type_name, parent = self.SplitQualifiedName(type_name, namespace)
   999                 type_name, parent = self.SplitQualifiedName(type_name, namespace)
  1005                 result = self.CreateClass(type_name, parent, infos["elmt_type"])
  1000                 result = self.CreateClass(type_name, parent, infos["elmt_type"])
  1006                 if result is not None and not isinstance(result, string_types):
  1001                 if result is not None and not isinstance(result, str):
  1007                     self.Namespaces[self.TargetNamespace][result["name"]] = result
  1002                     self.Namespaces[self.TargetNamespace][result["name"]] = result
  1008                 return result
  1003                 return result
  1009             else:
  1004             else:
  1010                 return infos
  1005                 return infos
  1011         elif typeinfos["type"] == COMPLEXTYPE:
  1006         elif typeinfos["type"] == COMPLEXTYPE:
  1023         Method that generates the classes
  1018         Method that generates the classes
  1024         """
  1019         """
  1025         self.ParseSchema()
  1020         self.ParseSchema()
  1026         for name, infos in list(self.Namespaces[self.TargetNamespace].items()):
  1021         for name, infos in list(self.Namespaces[self.TargetNamespace].items()):
  1027             if infos["type"] == ELEMENT:
  1022             if infos["type"] == ELEMENT:
  1028                 if not isinstance(infos["elmt_type"], string_types) and \
  1023                 if not isinstance(infos["elmt_type"], str) and \
  1029                    infos["elmt_type"]["type"] == COMPLEXTYPE:
  1024                    infos["elmt_type"]["type"] == COMPLEXTYPE:
  1030                     self.ComputeAfter.append((name, None, infos["elmt_type"], True))
  1025                     self.ComputeAfter.append((name, None, infos["elmt_type"], True))
  1031                     while len(self.ComputeAfter) > 0:
  1026                     while len(self.ComputeAfter) > 0:
  1032                         result = self.CreateClass(*self.ComputeAfter.pop(0))
  1027                         result = self.CreateClass(*self.ComputeAfter.pop(0))
  1033                         if result is not None and not isinstance(result, string_types):
  1028                         if result is not None and not isinstance(result, str):
  1034                             self.Namespaces[self.TargetNamespace][result["name"]] = result
  1029                             self.Namespaces[self.TargetNamespace][result["name"]] = result
  1035             elif infos["type"] == COMPLEXTYPE:
  1030             elif infos["type"] == COMPLEXTYPE:
  1036                 self.ComputeAfter.append((name, None, infos))
  1031                 self.ComputeAfter.append((name, None, infos))
  1037                 while len(self.ComputeAfter) > 0:
  1032                 while len(self.ComputeAfter) > 0:
  1038                     result = self.CreateClass(*self.ComputeAfter.pop(0))
  1033                     result = self.CreateClass(*self.ComputeAfter.pop(0))
  1039                     if result is not None and \
  1034                     if result is not None and \
  1040                        not isinstance(result, string_types):
  1035                        not isinstance(result, str):
  1041                         self.Namespaces[self.TargetNamespace][result["name"]] = result
  1036                         self.Namespaces[self.TargetNamespace][result["name"]] = result
  1042             elif infos["type"] == ELEMENTSGROUP:
  1037             elif infos["type"] == ELEMENTSGROUP:
  1043                 elements = []
  1038                 elements = []
  1044                 if "elements" in infos:
  1039                 if "elements" in infos:
  1045                     elements = infos["elements"]
  1040                     elements = infos["elements"]
  1046                 elif "choices" in infos:
  1041                 elif "choices" in infos:
  1047                     elements = infos["choices"]
  1042                     elements = infos["choices"]
  1048                 for element in elements:
  1043                 for element in elements:
  1049                     if not isinstance(element["elmt_type"], string_types) and \
  1044                     if not isinstance(element["elmt_type"], str) and \
  1050                        element["elmt_type"]["type"] == COMPLEXTYPE:
  1045                        element["elmt_type"]["type"] == COMPLEXTYPE:
  1051                         self.ComputeAfter.append((element["name"], infos["name"], element["elmt_type"]))
  1046                         self.ComputeAfter.append((element["name"], infos["name"], element["elmt_type"]))
  1052                         while len(self.ComputeAfter) > 0:
  1047                         while len(self.ComputeAfter) > 0:
  1053                             result = self.CreateClass(*self.ComputeAfter.pop(0))
  1048                             result = self.CreateClass(*self.ComputeAfter.pop(0))
  1054                             if result is not None and \
  1049                             if result is not None and \
  1055                                not isinstance(result, string_types):
  1050                                not isinstance(result, str):
  1056                                 self.Namespaces[self.TargetNamespace][result["name"]] = result
  1051                                 self.Namespaces[self.TargetNamespace][result["name"]] = result
  1057 
  1052 
  1058         for name, parents in self.ComputedClassesLookUp.items():
  1053         for name, parents in self.ComputedClassesLookUp.items():
  1059             if isinstance(parents, dict):
  1054             if isinstance(parents, dict):
  1060                 computed_classes = list(parents.items())
  1055                 computed_classes = list(parents.items())
  1750         self.ElementClass = None
  1745         self.ElementClass = None
  1751 
  1746 
  1752     def GetElementClass(self, element_tag, parent_tag=None, default=DefaultElementClass):
  1747     def GetElementClass(self, element_tag, parent_tag=None, default=DefaultElementClass):
  1753         element_class = self.LookUpClasses.get(element_tag, (default, None))
  1748         element_class = self.LookUpClasses.get(element_tag, (default, None))
  1754         if not isinstance(element_class, dict):
  1749         if not isinstance(element_class, dict):
  1755             if isinstance(element_class[0], string_types):
  1750             if isinstance(element_class[0], str):
  1756                 return self.GetElementClass(element_class[0], default=default)
  1751                 return self.GetElementClass(element_class[0], default=default)
  1757             return element_class[0]
  1752             return element_class[0]
  1758 
  1753 
  1759         element_with_parent_class = element_class.get(parent_tag, default)
  1754         element_with_parent_class = element_class.get(parent_tag, default)
  1760         if isinstance(element_with_parent_class, string_types):
  1755         if isinstance(element_with_parent_class, str):
  1761             return self.GetElementClass(element_with_parent_class, default=default)
  1756             return self.GetElementClass(element_with_parent_class, default=default)
  1762         return element_with_parent_class
  1757         return element_with_parent_class
  1763 
  1758 
  1764     def SetLookupResult(self, element, element_class):
  1759     def SetLookupResult(self, element, element_class):
  1765         """
  1760         """
  1817         if isinstance(element_class, list):
  1812         if isinstance(element_class, list):
  1818             children = "".join([
  1813             children = "".join([
  1819                 "%s " % etree.QName(child.tag).localname
  1814                 "%s " % etree.QName(child.tag).localname
  1820                 for child in element])
  1815                 for child in element])
  1821             for possible_class in element_class:
  1816             for possible_class in element_class:
  1822                 if isinstance(possible_class, string_types):
  1817                 if isinstance(possible_class, str):
  1823                     possible_class = self.GetElementClass(possible_class)
  1818                     possible_class = self.GetElementClass(possible_class)
  1824                 if possible_class.StructurePattern.match(children) is not None:
  1819                 if possible_class.StructurePattern.match(children) is not None:
  1825                     return possible_class
  1820                     return possible_class
  1826             return element_class[0]
  1821             return element_class[0]
  1827         return element_class
  1822         return element_class