Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert XML string back to Python object given type attributes #74

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
138 changes: 138 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Visual Studio Code files
.vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
21 changes: 21 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Converts a Python dictionary or other native data type into a valid XML string.
Details
=======

DicttoXML
---------

Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and dict-like objects) data types, with arbitrary nesting for the collections. Items with a `datetime` type are converted to ISO format strings. Items with a `None` type become empty XML elements.

The root object passed into the `dicttoxml` method can be any of the supported data types.
Expand Down Expand Up @@ -37,6 +40,24 @@ Elements with an unsupported data type raise a TypeError exception.

If an element name is invalid XML, it is rendered with the name "key" and the invalid name is included as a `name` attribute. E.g. `{ "^.{0,256}$": "foo" }` would be rendered `<key name="^.{0,256}$">foo</key>`. An exception is element names with spaces, which are converted to underscores.

XMLtoDict
---------

Supports item (`int`, `float`, `bool`, `str`, `none`) and collection (`list`, and `dict`) data types, with arbitrary nesting for the collections.

XML -> Python
int int
float float
str str
null None
bool bool
list list
dict dict

Data types are parsed from the type attributes of each element in the XML string. Therefore, type attributes must be enabled (attr_type=True) when creating the XML from DictToXML.

Tip: Since DicttoXML converts the empty string ("") to "none", XMLtoDict will convert "none" to an empty string "". Bear this im mind when using this function to parse an XML.

**This module should work in Python 2.6+ and Python 3.**

Installation
Expand Down
53 changes: 50 additions & 3 deletions dicttoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numbers
import logging
from xml.dom.minidom import parseString
from xml.etree import ElementTree


LOG = logging.getLogger("dicttoxml")
Expand Down Expand Up @@ -202,8 +203,6 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata):
output = []
addline = output.append

item_name = item_func(parent)

for key, val in obj.items():
LOG.info('Looping inside convert_dict(): key="%s", val="%s", type(val)="%s"' % (
unicode_me(key), unicode_me(val), type(val).__name__)
Expand Down Expand Up @@ -330,7 +329,10 @@ def convert_kv(key, val, attr_type, attr={}, cdata=False):
key, attr = make_valid_xml_name(key, attr)

if attr_type:
attr['type'] = get_xml_type(val)
xml_type = get_xml_type(val)
attr['type'] = xml_type
if xml_type == 'str' and val != None:
attr['empty'] = not bool(val)
attrstring = make_attrstring(attr)
return '<%s%s>%s</%s>' % (
key, attrstring,
Expand Down Expand Up @@ -398,3 +400,48 @@ def dicttoxml(obj, root=True, custom_root='root', ids=False, attr_type=True,
addline(convert(obj, ids, attr_type, item_func, cdata, parent=''))
return ''.join(output).encode('utf-8')

def cast_from_attribute(text, attr):
"""Converts XML text into a Python data format based on the tag attribute"""
if attr["type"] == "str":
if attr["empty"].lower() == "true":
return ""
else:
return str(text)
elif attr["type"] == "int":
return int(text)
elif attr["type"] == "float":
return float(text)
elif attr["type"] == "bool":
if str(text).lower() == "true":
return True
elif str(text).lower() == "false":
return False
else:
raise ValueError("bool attribute expected 'true' or 'false'")
elif attr["type"] == "list":
return []
elif attr["type"] == "dict":
return {}
elif attr["type"].lower() == "null":
return None
else:
raise TypeError("unsupported type: only 'str', 'int', 'float', 'bool', 'list', 'dict', and 'None' supported")

def xmltodict(obj):
"""Converts an XML string into a Python object based on each tag's attribute"""
def add_to_output(obj, child):
if "type" not in child.attrib:
raise ValueError("XML must contain type attributes for each tag")
if isinstance(obj, dict):
obj.update({child.tag: cast_from_attribute(child.text, child.attrib)})
for sub in child:
add_to_output(obj[child.tag], sub)
elif isinstance(obj, list):
obj.append(cast_from_attribute(child.text, child.attrib))
for sub in child:
add_to_output(obj[-1], sub)
root = ElementTree.fromstring(obj)
output = {}
for child in root:
add_to_output(output, child)
return {root.tag: output}
29 changes: 29 additions & 0 deletions test/unit_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import os, sys
import unittest
import dicttoxml

class UnitTests(unittest.TestCase):

def test_xmltodict(self):
input = {
'string' : "This is a string with special characters",
'empty_string' : '',
'int' : 1002,
'float' : 12.56,
'other_float' : float(80),
'boolean' : False,
'none_type' : None,
'list' : [99, 'sheep', 'dog'],
'empty_list' : [],
'list_of_dicts' : [{}, {'hi_there': 7, 'owl': 'exterminator'}, {'foo': 56.2, 'ok': True}],
'dict_of_lists' : {'list1': [3, 6, 'dog', 'cat', False], 'empty_list': []},
'nested_lists' : [[4, 5, 6, 7], [1, 2, 3, 4, [5, 6, 7, 8]]]
}
xml = dicttoxml.dicttoxml(input)
output = dicttoxml.xmltodict(xml)
self.assertEqual({'root': input}, output)

if __name__== "__main__":
unittest.main()