Ticket #10295: xml_serializer.py

File xml_serializer.py, 8.6 KB (added by kausikram, 7 years ago)

xml_serializer.py from serializer module

Line 
1"""
2XML serializer.
3"""
4
5from django.conf import settings
6from django.core.serializers import base
7from django.db import models
8from django.utils.xmlutils import SimplerXMLGenerator
9from django.utils.encoding import smart_unicode
10from xml.dom import pulldom
11
12class Serializer(base.Serializer):
13    """
14    Serializes a QuerySet to XML.
15    """
16
17    def indent(self, level):
18        if self.options.get('indent', None) is not None:
19            self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent', None) * level)
20
21    def start_serialization(self):
22        """
23        Start serialization -- open the XML document and the root element.
24        """
25        self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET))
26        self.xml.startDocument()
27        self.xml.startElement("django-objects", {"version" : "1.0"})
28
29    def end_serialization(self):
30        """
31        End serialization -- end the document.
32        """
33        self.indent(0)
34        self.xml.endElement("django-objects")
35        self.xml.endDocument()
36
37    def start_object(self, obj):
38        """
39        Called as each object is handled.
40        """
41        if not hasattr(obj, "_meta"):
42            raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj))
43
44        self.indent(1)
45        self.xml.startElement("object", {
46            "pk"    : smart_unicode(obj._get_pk_val()),
47            "name"  : smart_unicode(obj._meta.pk.name),
48            "model" : smart_unicode(obj._meta),
49        })
50
51    def end_object(self, obj):
52        """
53        Called after handling all fields for an object.
54        """
55        self.indent(1)
56        self.xml.endElement("object")
57
58    def handle_field(self, obj, field):
59        """
60        Called to handle each field on an object (except for ForeignKeys and
61        ManyToManyFields)
62        """
63        self.indent(2)
64        self.xml.startElement("field", {
65            "name" : field.name,
66            "type" : field.get_internal_type()
67        })
68
69        # Get a "string version" of the object's data (this is handled by the
70        # serializer base class).
71        if getattr(obj, field.name) is not None:
72            value = self.get_string_value(obj, field)
73            self.xml.characters(smart_unicode(value))
74        else:
75            self.xml.addQuickElement("None")
76
77        self.xml.endElement("field")
78
79    def handle_fk_field(self, obj, field):
80        """
81        Called to handle a ForeignKey (we need to treat them slightly
82        differently from regular fields).
83        """
84        self._start_relational_field(field)
85        related = getattr(obj, field.name)
86        if related is not None:
87            if field.rel.field_name == related._meta.pk.name:
88                # Related to remote object via primary key
89                related = related._get_pk_val()
90            else:
91                # Related to remote object via other field
92                related = getattr(related, field.rel.field_name)
93            self.xml.characters(smart_unicode(related))
94        else:
95            self.xml.addQuickElement("None")
96        self.xml.endElement("field")
97
98    def handle_m2m_field(self, obj, field):
99        """
100        Called to handle a ManyToManyField. Related objects are only
101        serialized as references to the object's PK (i.e. the related *data*
102        is not dumped, just the relation).
103        """
104        if field.creates_table:
105            self._start_relational_field(field)
106            for relobj in getattr(obj, field.name).iterator():
107                self.xml.addQuickElement("object", attrs={"pk" : smart_unicode(relobj._get_pk_val())})
108            self.xml.endElement("field")
109
110    def _start_relational_field(self, field):
111        """
112        Helper to output the <field> element for relational fields
113        """
114        self.indent(2)
115        self.xml.startElement("field", {
116            "name" : field.name,
117            "rel"  : field.rel.__class__.__name__,
118            "to"   : smart_unicode(field.rel.to._meta),
119        })
120
121class Deserializer(base.Deserializer):
122    """
123    Deserialize XML.
124    """
125
126    def __init__(self, stream_or_string, **options):
127        super(Deserializer, self).__init__(stream_or_string, **options)
128        self.event_stream = pulldom.parse(self.stream)
129
130    def next(self):
131        for event, node in self.event_stream:
132            if event == "START_ELEMENT" and node.nodeName == "object":
133                self.event_stream.expandNode(node)
134                return self._handle_object(node)
135        raise StopIteration
136
137    def _handle_object(self, node):
138        """
139        Convert an <object> node to a DeserializedObject.
140        """
141        # Look up the model using the model loading mechanism. If this fails,
142        # bail.
143        Model = self._get_model_from_node(node, "model")
144
145        # Start building a data dictionary from the object.  If the node is
146        # missing the pk attribute, bail.
147        pk = node.getAttribute("pk")
148        if not pk:
149            raise base.DeserializationError("<object> node is missing the 'pk' attribute")
150
151        data = {Model._meta.pk.attname : Model._meta.pk.to_python(pk)}
152
153        # Also start building a dict of m2m data (this is saved as
154        # {m2m_accessor_attribute : [list_of_related_objects]})
155        m2m_data = {}
156
157        # Deseralize each field.
158        for field_node in node.getElementsByTagName("field"):
159            # If the field is missing the name attribute, bail (are you
160            # sensing a pattern here?)
161            field_name = field_node.getAttribute("name")
162            if not field_name:
163                raise base.DeserializationError("<field> node is missing the 'name' attribute")
164
165            # Get the field from the Model. This will raise a
166            # FieldDoesNotExist if, well, the field doesn't exist, which will
167            # be propagated correctly.
168            field = Model._meta.get_field(field_name)
169
170            # As is usually the case, relation fields get the special treatment.
171            if field.rel and isinstance(field.rel, models.ManyToManyRel):
172                m2m_data[field.name] = self._handle_m2m_field_node(field_node, field)
173            elif field.rel and isinstance(field.rel, models.ManyToOneRel):
174                data[field.attname] = self._handle_fk_field_node(field_node, field)
175            else:
176                if field_node.getElementsByTagName('None'):
177                    value = None
178                else:
179                    value = field.to_python(getInnerText(field_node).strip())
180                data[field.name] = value
181
182        # Return a DeserializedObject so that the m2m data has a place to live.
183        return base.DeserializedObject(Model(**data), m2m_data)
184
185    def _handle_fk_field_node(self, node, field):
186        """
187        Handle a <field> node for a ForeignKey
188        """
189        # Check if there is a child node named 'None', returning None if so.
190        if node.getElementsByTagName('None'):
191            return None
192        else:
193            return field.rel.to._meta.get_field(field.rel.field_name).to_python(
194                       getInnerText(node).strip())
195
196    def _handle_m2m_field_node(self, node, field):
197        """
198        Handle a <field> node for a ManyToManyField.
199        """
200        return [field.rel.to._meta.pk.to_python(
201                    c.getAttribute("pk"))
202                    for c in node.getElementsByTagName("object")]
203
204    def _get_model_from_node(self, node, attr):
205        """
206        Helper to look up a model from a <object model=...> or a <field
207        rel=... to=...> node.
208        """
209        model_identifier = node.getAttribute(attr)
210        if not model_identifier:
211            raise base.DeserializationError(
212                "<%s> node is missing the required '%s' attribute" \
213                    % (node.nodeName, attr))
214        try:
215            Model = models.get_model(*model_identifier.split("."))
216        except TypeError:
217            Model = None
218        if Model is None:
219            raise base.DeserializationError(
220                "<%s> node has invalid model identifier: '%s'" % \
221                    (node.nodeName, model_identifier))
222        return Model
223
224
225def getInnerText(node):
226    """
227    Get all the inner text of a DOM node (recursively).
228    """
229    # inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html
230    inner_text = []
231    for child in node.childNodes:
232        if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE:
233            inner_text.append(child.data)
234        elif child.nodeType == child.ELEMENT_NODE:
235            inner_text.extend(getInnerText(child))
236        else:
237           pass
238    return u"".join(inner_text)
239
Back to Top