Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into rgiliam/build-rework
Browse files Browse the repository at this point in the history
  • Loading branch information
nirosys committed Oct 21, 2024
2 parents de83c92 + 479ed74 commit a0744ba
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 54 deletions.
24 changes: 16 additions & 8 deletions src-python/amazon/ion/simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@


def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=None,
tuple_as_sexp=False, omit_version_marker=False):
tuple_as_sexp=False, omit_version_marker=False, trailing_commas=False):
"""Serialize ``obj`` as an Ion formatted stream and write it to fp.
The python object hierarchy is mapped to the Ion data model as described in the module pydoc.
Expand All @@ -121,11 +121,15 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=No
indent (Str): If binary is False and indent is a string, then members of containers will be pretty-printed with
a newline followed by that string repeated for each level of nesting. None (the default) selects the most
compact representation without any newlines. Example: to indent with four spaces per level of nesting,
use ``' '``.
use ``' '``. Supported only in the pure Python implementation (because pretty printing is not yet
supported in the C implementation).
tuple_as_sexp (Optional[True|False]): When True, all tuple values will be written as Ion s-expressions.
When False, all tuple values will be written as Ion lists. Default: False.
omit_version_marker (Optional[True|False]): If binary is False and omit_version_marker is True, omits the
Ion Version Marker ($ion_1_0) from the output. Default: False.
trailing_commas (Optional[True|False]): If binary is False and pretty printing (indent is not None), includes
trailing commas in containers. Default: False. Supported only in the pure Python implementation (because
pretty printing is not yet supported in the C implementation).
Returns None.
"""
Expand All @@ -135,11 +139,12 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=No
else:
return dump_python(obj, fp, imports=imports, binary=binary, sequence_as_stream=sequence_as_stream,
indent=indent,
tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker)
tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker,
trailing_commas=trailing_commas)


def dumps(obj, imports=None, binary=True, sequence_as_stream=False,
indent=None, tuple_as_sexp=False, omit_version_marker=False):
indent=None, tuple_as_sexp=False, omit_version_marker=False, trailing_commas=False):
"""Serialize obj as described by dump, return the serialized data as bytes or unicode.
Returns:
Expand All @@ -149,7 +154,8 @@ def dumps(obj, imports=None, binary=True, sequence_as_stream=False,
ion_buffer = io.BytesIO()

dump(obj, ion_buffer, imports=imports, sequence_as_stream=sequence_as_stream, binary=binary,
indent=indent, tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker)
indent=indent, tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker,
trailing_commas=trailing_commas)

ret_val = ion_buffer.getvalue()
ion_buffer.close()
Expand Down Expand Up @@ -292,9 +298,10 @@ def loads(ion_str: Union[bytes, str], catalog=None, single_value=True, parse_eag


def dump_python(obj, fp, imports=None, binary=True, sequence_as_stream=False,
indent=None, tuple_as_sexp=False, omit_version_marker=False):
indent=None, tuple_as_sexp=False, omit_version_marker=False,
trailing_commas=False):
"""'pure' Python implementation. Users should prefer to call ``dump``."""
raw_writer = binary_writer(imports) if binary else text_writer(indent=indent)
raw_writer = binary_writer(imports) if binary else text_writer(indent=indent, trailing_commas=trailing_commas)
writer = blocking_writer(raw_writer, fp)
from_type = _FROM_TYPE_TUPLE_AS_SEXP if tuple_as_sexp else _FROM_TYPE
if binary or not omit_version_marker:
Expand Down Expand Up @@ -475,7 +482,8 @@ def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp

res = ionc.ionc_write(obj, binary, sequence_as_stream, tuple_as_sexp)

# TODO support "omit_version_marker" rather than hacking.
# TODO: support "omit_version_marker" rather than hacking.
# TODO: support "trailing_commas" (support is not included in the C code).
if not binary and not omit_version_marker:
res = b'$ion_1_0 ' + res
fp.write(res)
Expand Down
11 changes: 6 additions & 5 deletions src-python/amazon/ion/writer_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def serialize(ion_event):


@coroutine
def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=None):
def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=None, trailing_commas=False):
pretty = indent is not None
serialize_container_delimiter = \
_serialize_container_delimiter_pretty if pretty else _serialize_container_delimiter_normal
Expand All @@ -369,7 +369,7 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non
ion_event, self = (yield transition)
delegate = self

if has_written_values and not ion_event.event_type.ends_container:
if has_written_values and ((indent and trailing_commas) or not ion_event.event_type.ends_container):
# TODO This will always emit a delimiter for containers--should make it not do that.
# Write the delimiter for the next value.
if depth == 0:
Expand Down Expand Up @@ -402,7 +402,8 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non

if ion_event.event_type is IonEventType.CONTAINER_START:
writer_event = DataEvent(WriteEventType.NEEDS_INPUT, _serialize_container_start(ion_event))
delegate = _raw_writer_coroutine(depth + 1, ion_event, self, indent=indent)
delegate = _raw_writer_coroutine(depth + 1, ion_event, self, indent=indent,
trailing_commas=trailing_commas)
elif depth == 0:
# Serialize at the top-level.
if ion_event.event_type is IonEventType.STREAM_END:
Expand All @@ -429,7 +430,7 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non


# TODO Add options for text formatting.
def raw_writer(indent=None):
def raw_writer(indent=None, trailing_commas=False):
"""Returns a raw text writer co-routine.
Yields:
Expand All @@ -447,7 +448,7 @@ def raw_writer(indent=None):
# of writing (Dec 2022).
indent_bytes = indent.encode("UTF-8") if isinstance(indent, str) else indent

return writer_trampoline(_raw_writer_coroutine(indent=indent_bytes))
return writer_trampoline(_raw_writer_coroutine(indent=indent_bytes, trailing_commas=trailing_commas))

# TODO Determine if we need to do anything special for non-raw writer. Validation?
text_writer = raw_writer
12 changes: 6 additions & 6 deletions src/ioncmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,8 @@ static iERR _ionc_write(hWRITER writer, PyObject* objs, PyObject* tuple_as_sexp,
*/
static PyObject* ionc_write(PyObject *self, PyObject *args, PyObject *kwds) {
iENTER;
PyObject *obj, *binary, *sequence_as_stream, *tuple_as_sexp;
ION_STREAM *ion_stream = NULL;
PyObject *obj=NULL, *binary=NULL, *sequence_as_stream=NULL, *tuple_as_sexp=NULL;
ION_STREAM *ion_stream = NULL;
BYTE* buf = NULL;
hWRITER writer = NULL;
static char *kwlist[] = {"obj", "binary", "sequence_as_stream", "tuple_as_sexp", NULL};
Expand Down Expand Up @@ -858,10 +858,10 @@ static PyObject* ionc_write(PyObject *self, PyObject *args, PyObject *kwds) {
ion_stream_close(ion_stream);
}
PyMem_Free(buf);
Py_DECREF(obj);
Py_DECREF(binary);
Py_DECREF(sequence_as_stream);
Py_DECREF(tuple_as_sexp);
Py_XDECREF(obj);
Py_XDECREF(binary);
Py_XDECREF(sequence_as_stream);
Py_XDECREF(tuple_as_sexp);

PyObject* exception = NULL;
if (err == IERR_INVALID_STATE) {
Expand Down
15 changes: 15 additions & 0 deletions tests/test_reader_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,18 @@ def test_blocking_reader(p):
else:
actual = reader.send(input)
assert expected == actual

# Simple test to make sure that our C extension returns a proper exception if we give it unexpected parameters.
def test_ionc_reader_fails_graceful():
from amazon.ion.simpleion import c_ext
from amazon.ion.exceptions import IonException
from pytest import raises

try:
from amazon.ion import ionc
with raises(IonException) as e_info:
ionc.ionc_read(None, 0, None, None) # Invalid argument
# Exceptions don't compare eq..
assert str(e_info.value) == str(IonException('IERR_INVALID_ARG ')) # Space is added when the error is thrown.
except ImportError:
pass
93 changes: 58 additions & 35 deletions tests/test_simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,34 +556,35 @@ def dump_func(*args, **kw):
def _generate_roundtrips(roundtrips):
for is_binary in (True, False):
for indent in ('not used',) if is_binary else (None, '', ' ', ' ', '\t', '\n\t\n '):
def _adjust_sids(annotations=()):
if is_binary and isinstance(obj, SymbolToken):
return SymbolToken(obj.text, 10 + len(annotations))
return obj

def _to_obj(to_type=None, annotations=(), tuple_as_sexp=False):
if to_type is None:
to_type = ion_type
obj_out = _adjust_sids(annotations)
return _FROM_ION_TYPE[ion_type].from_value(to_type, obj_out, annotations=annotations), is_binary, indent, tuple_as_sexp

for obj in roundtrips:
obj = _adjust_sids()
yield obj, is_binary, indent, False
if not obj_has_ion_type_and_annotation(obj):
ion_type = _ion_type(obj, _FROM_TYPE)
yield _to_obj()
else:
ion_type = obj.ion_type
if isinstance(obj, IonPyNull):
obj = None
yield _to_obj(annotations=(u'annot1', u'annot2'))
if isinstance(obj, list):
yield _to_obj(IonType.SEXP)
yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'))
if isinstance(obj, tuple) and not isinstance(obj, SymbolToken):
yield _to_obj(IonType.SEXP, tuple_as_sexp=True)
yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'), tuple_as_sexp=True)
for trailing_commas in (True, False):
def _adjust_sids(annotations=()):
if is_binary and isinstance(obj, SymbolToken):
return SymbolToken(obj.text, 10 + len(annotations))
return obj

def _to_obj(to_type=None, annotations=(), tuple_as_sexp=False):
if to_type is None:
to_type = ion_type
obj_out = _adjust_sids(annotations)
return _FROM_ION_TYPE[ion_type].from_value(to_type, obj_out, annotations=annotations), is_binary, indent, tuple_as_sexp, trailing_commas

for obj in roundtrips:
obj = _adjust_sids()
yield obj, is_binary, indent, False, trailing_commas
if not obj_has_ion_type_and_annotation(obj):
ion_type = _ion_type(obj, _FROM_TYPE)
yield _to_obj()
else:
ion_type = obj.ion_type
if isinstance(obj, IonPyNull):
obj = None
yield _to_obj(annotations=(u'annot1', u'annot2'))
if isinstance(obj, list):
yield _to_obj(IonType.SEXP)
yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'))
if isinstance(obj, tuple) and not isinstance(obj, SymbolToken):
yield _to_obj(IonType.SEXP, tuple_as_sexp=True)
yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'), tuple_as_sexp=True)


def _assert_roundtrip(before, after, tuple_as_sexp):
Expand All @@ -594,9 +595,9 @@ def _assert_roundtrip(before, after, tuple_as_sexp):
*tuple(_generate_roundtrips(_ROUNDTRIPS))
)
def test_roundtrip(p):
obj, is_binary, indent, tuple_as_sexp = p
obj, is_binary, indent, tuple_as_sexp, trailing_commas = p
out = BytesIO()
dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp)
dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp, trailing_commas=trailing_commas)
out.seek(0)
res = load(out)
_assert_roundtrip(obj, res, tuple_as_sexp)
Expand All @@ -606,10 +607,10 @@ def test_roundtrip(p):
*tuple(_generate_roundtrips(_ROUNDTRIPS))
)
def test_roundtrip_ion_stream(p):
obj, is_binary, indent, tuple_as_sexp = p
obj, is_binary, indent, tuple_as_sexp, trailing_commas = p
expected = [obj]
out = BytesIO()
dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp)
dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp, trailing_commas=trailing_commas)
out.seek(0)
res = load(out, single_value=False, parse_eagerly=True)
_assert_roundtrip(expected, res, tuple_as_sexp)
Expand Down Expand Up @@ -643,6 +644,7 @@ class PrettyPrintParams(NamedTuple):
indent: str
exact_text: Optional[str] = None
regexes: Sequence = []
trailing_commas: bool = False

@parametrize(
PrettyPrintParams(ion_text='a', indent=' ', exact_text="$ion_1_0\na"),
Expand All @@ -659,15 +661,36 @@ class PrettyPrintParams(NamedTuple):
exact_text="$ion_1_0\n[\n\tapple,\n\t\"banana\",\n\t{\n\t\troof: false\n\t}\n]"),
PrettyPrintParams(ion_text='[apple, {roof: false, walls:4, door: wood::large::true}]', indent='\t',
regexes=["\\A\\$ion_1_0\n\\[\n\tapple,\n\t\\{", "\n\t\tdoor: wood::large::true,?\n",
"\n\t\troof: false,?\n", "\n\t\twalls: 4,?\n", "\n\t\\}\n\\]\\Z"])
"\n\t\troof: false,?\n", "\n\t\twalls: 4,?\n", "\n\t\\}\n\\]\\Z"]),
PrettyPrintParams(ion_text='a', indent=' ', trailing_commas=True, exact_text="$ion_1_0\na"),
PrettyPrintParams(ion_text='"a"', indent=' ', trailing_commas=True, exact_text="$ion_1_0\n\"a\""),
PrettyPrintParams(ion_text='\'$a__9\'', indent=' ', trailing_commas=True,
exact_text="$ion_1_0\n$a__9"),
PrettyPrintParams(ion_text='\'$a_\\\'_9\'', indent=' ', trailing_commas=True,
exact_text="$ion_1_0\n\'$a_\\\'_9\'"),
PrettyPrintParams(ion_text='[a, b, chair::2008-08-08T]', indent=' ', trailing_commas=True,
exact_text="$ion_1_0\n[\n a,\n b,\n chair::2008-08-08T,\n]"),
PrettyPrintParams(ion_text='[a, b, chair::2008-08-08T]',
indent=None, trailing_commas=True, # not pretty print
exact_text="$ion_1_0 [a,b,chair::2008-08-08T]"),
PrettyPrintParams(ion_text='[apple, {roof: false}]', indent='\t', trailing_commas=True,
exact_text="$ion_1_0\n[\n\tapple,\n\t{\n\t\troof: false,\n\t},\n]"),
PrettyPrintParams(ion_text='[apple, "banana", {roof: false}]', indent='\t', trailing_commas=True,
exact_text="$ion_1_0\n[\n\tapple,\n\t\"banana\",\n\t{\n\t\troof: false,\n\t},\n]"),
PrettyPrintParams(ion_text='[apple, {roof: false, walls:4, door: wood::large::true}]', indent='\t',
trailing_commas=True,
regexes=["\\A\\$ion_1_0\n\\[\n\tapple,\n\t\\{", "\n\t\tdoor: wood::large::true,\n",
"\n\t\troof: false,\n", "\n\t\twalls: 4,\n", "\n\t\\},\n\\]\\Z"])
)
def test_pretty_print(p):
if c_ext:
# TODO support pretty print for C extension.
return
ion_text, indent, exact_text, regexes = p
ion_text, indent, exact_text, regexes, trailing_commas = p
ion_value = loads(ion_text)
actual_pretty_ion_text = dumps(ion_value, binary=False, indent=indent)
actual_pretty_ion_text = dumps(ion_value, binary=False, indent=indent,
trailing_commas=trailing_commas)
if exact_text is not None:
assert actual_pretty_ion_text == exact_text
for regex_str in regexes:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_writer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,18 @@ def test_blocking_writer(p):
result_type = writer.send(None)
assert isinstance(result_type, WriteEventType) and result_type is not WriteEventType.HAS_PENDING
assert p.expected == buf.getvalue()


# Simple test to make sure that our C extension returns a proper exception if we give it unexpected parameters.
def test_ionc_writer_fails_graceful():
from amazon.ion.exceptions import IonException
from pytest import raises

try:
from amazon.ion import ionc
with raises(IonException) as e_info:
ionc.ionc_write(None, True, False, False, None) # Invalid argument
# Exceptions don't compare eq..
assert str(e_info.value) == str(IonException('IERR_INVALID_ARG ')) # Space is added when the error is thrown.
except ImportError:
pass
18 changes: 18 additions & 0 deletions tests/test_writer_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def new_writer():
def test_raw_writer(p):
assert_writer_events(p, new_writer)


@parametrize(
(None, True),
('', True),
Expand Down Expand Up @@ -250,6 +251,7 @@ class _P_Q(NamedTuple):

pass


@parametrize(
_P_Q('a', False),
_P_Q('a0', False),
Expand Down Expand Up @@ -296,3 +298,19 @@ def test_quote_symbols(p):
assert needs_quotes == quoted
assert len(ion_value.ion_annotations) == 1
assert ion_value.ion_annotations[0].text == symbol_text


@parametrize(
(' ', True),
(' ', False),
(None, True),
(None, False))
def test_trailing_commas(p):
indent, trailing_commas = p

# Ensure raw_writer can handle trailing_commas value with indent value for all value combinations.
raw_writer(indent, trailing_commas)

# Ensure that a value dumped using the indent and trailing_commas values can be successfully loaded.
ion_val = loads('[a, {x:2, y: height::17}]')
assert ion_val == loads(dumps(ion_val, binary=False, indent=indent, trailing_commas=trailing_commas))

0 comments on commit a0744ba

Please sign in to comment.