From eac299b6f1269656388c780d2b0c1d7bd7527d6e Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Fri, 18 Oct 2024 12:42:34 -0700 Subject: [PATCH 1/2] Fix invalid memory access in ionc_write (#376) * Fix invalid memory access when more arguments are passed to ionc_write than expected --- amazon/ion/ioncmodule.c | 12 ++++++------ tests/test_reader_base.py | 15 +++++++++++++++ tests/test_writer_base.py | 15 +++++++++++++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/amazon/ion/ioncmodule.c b/amazon/ion/ioncmodule.c index 7f2b220d5..e66e0f7d1 100644 --- a/amazon/ion/ioncmodule.c +++ b/amazon/ion/ioncmodule.c @@ -776,8 +776,8 @@ static iERR _ionc_write(hWRITER writer, PyObject* objs, PyObject* tuple_as_sexp, */ static PyObject* ionc_write(PyObject *self, PyObject *args, PyObject *kwds) { iENTER; - PyObject *obj, *binary, *sequence_as_stream, *tuple_as_sexp; - ION_STREAM *ion_stream = NULL; + PyObject *obj=NULL, *binary=NULL, *sequence_as_stream=NULL, *tuple_as_sexp=NULL; + ION_STREAM *ion_stream = NULL; BYTE* buf = NULL; hWRITER writer = NULL; static char *kwlist[] = {"obj", "binary", "sequence_as_stream", "tuple_as_sexp", NULL}; @@ -858,10 +858,10 @@ static PyObject* ionc_write(PyObject *self, PyObject *args, PyObject *kwds) { ion_stream_close(ion_stream); } PyMem_Free(buf); - Py_DECREF(obj); - Py_DECREF(binary); - Py_DECREF(sequence_as_stream); - Py_DECREF(tuple_as_sexp); + Py_XDECREF(obj); + Py_XDECREF(binary); + Py_XDECREF(sequence_as_stream); + Py_XDECREF(tuple_as_sexp); PyObject* exception = NULL; if (err == IERR_INVALID_STATE) { diff --git a/tests/test_reader_base.py b/tests/test_reader_base.py index b51a9d414..1b075c459 100644 --- a/tests/test_reader_base.py +++ b/tests/test_reader_base.py @@ -238,3 +238,18 @@ def test_blocking_reader(p): else: actual = reader.send(input) assert expected == actual + +# Simple test to make sure that our C extension returns a proper exception if we give it unexpected parameters. +def test_ionc_reader_fails_graceful(): + from amazon.ion.simpleion import c_ext + from amazon.ion.exceptions import IonException + from pytest import raises + + try: + from amazon.ion import ionc + with raises(IonException) as e_info: + ionc.ionc_read(None, 0, None, None) # Invalid argument + # Exceptions don't compare eq.. + assert str(e_info.value) == str(IonException('IERR_INVALID_ARG ')) # Space is added when the error is thrown. + except ImportError: + pass diff --git a/tests/test_writer_base.py b/tests/test_writer_base.py index c2815752c..24dab8c8b 100644 --- a/tests/test_writer_base.py +++ b/tests/test_writer_base.py @@ -134,3 +134,18 @@ def test_blocking_writer(p): result_type = writer.send(None) assert isinstance(result_type, WriteEventType) and result_type is not WriteEventType.HAS_PENDING assert p.expected == buf.getvalue() + + +# Simple test to make sure that our C extension returns a proper exception if we give it unexpected parameters. +def test_ionc_writer_fails_graceful(): + from amazon.ion.exceptions import IonException + from pytest import raises + + try: + from amazon.ion import ionc + with raises(IonException) as e_info: + ionc.ionc_write(None, True, False, False, None) # Invalid argument + # Exceptions don't compare eq.. + assert str(e_info.value) == str(IonException('IERR_INVALID_ARG ')) # Space is added when the error is thrown. + except ImportError: + pass From 479ed74b7ac39ca81e77d6496377c1232eebe965 Mon Sep 17 00:00:00 2001 From: mavjop Date: Mon, 21 Oct 2024 00:33:04 -0700 Subject: [PATCH 2/2] feat: Add `trailing_commas` to `dump`/`dumps` (#372) * feat: Add `trailing_commas` to `dump`/`dumps` Add an optional boolean argument `trailing_commas` to the `dump` and `dumps` methods in `simpleion`. Default: False. When `trailing_commas=True`, and when `indent` is not `None`, this causes `dump[s]` to include a comma on the last item in a container. **Note:** Since the C module already **does not support pretty printing**, this flag does nothing when using the C module. * fix: Fix test errors for trailing_commas test additions * fix: trailing_commas: fix tests & only when indent set Make the `trailing_commas` flag do what it says: Only apply when pretty printing (`indent is not None`). Fix all remaining `test_simpleion.py` failures. * fix: Remove accidentally left-in debugging-aid comment * fix: Remove accidental trailing_commas inclusion in dump_extension() --------- Co-authored-by: Stephen Jacob --- amazon/ion/simpleion.py | 24 ++++++---- amazon/ion/writer_text.py | 11 ++--- tests/test_simpleion.py | 93 ++++++++++++++++++++++++--------------- tests/test_writer_text.py | 18 ++++++++ 4 files changed, 98 insertions(+), 48 deletions(-) diff --git a/amazon/ion/simpleion.py b/amazon/ion/simpleion.py index 4e29a568c..709e072bc 100644 --- a/amazon/ion/simpleion.py +++ b/amazon/ion/simpleion.py @@ -95,7 +95,7 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=None, - tuple_as_sexp=False, omit_version_marker=False): + tuple_as_sexp=False, omit_version_marker=False, trailing_commas=False): """Serialize ``obj`` as an Ion formatted stream and write it to fp. The python object hierarchy is mapped to the Ion data model as described in the module pydoc. @@ -121,11 +121,15 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=No indent (Str): If binary is False and indent is a string, then members of containers will be pretty-printed with a newline followed by that string repeated for each level of nesting. None (the default) selects the most compact representation without any newlines. Example: to indent with four spaces per level of nesting, - use ``' '``. + use ``' '``. Supported only in the pure Python implementation (because pretty printing is not yet + supported in the C implementation). tuple_as_sexp (Optional[True|False]): When True, all tuple values will be written as Ion s-expressions. When False, all tuple values will be written as Ion lists. Default: False. omit_version_marker (Optional[True|False]): If binary is False and omit_version_marker is True, omits the Ion Version Marker ($ion_1_0) from the output. Default: False. + trailing_commas (Optional[True|False]): If binary is False and pretty printing (indent is not None), includes + trailing commas in containers. Default: False. Supported only in the pure Python implementation (because + pretty printing is not yet supported in the C implementation). Returns None. """ @@ -135,11 +139,12 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=No else: return dump_python(obj, fp, imports=imports, binary=binary, sequence_as_stream=sequence_as_stream, indent=indent, - tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker) + tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker, + trailing_commas=trailing_commas) def dumps(obj, imports=None, binary=True, sequence_as_stream=False, - indent=None, tuple_as_sexp=False, omit_version_marker=False): + indent=None, tuple_as_sexp=False, omit_version_marker=False, trailing_commas=False): """Serialize obj as described by dump, return the serialized data as bytes or unicode. Returns: @@ -149,7 +154,8 @@ def dumps(obj, imports=None, binary=True, sequence_as_stream=False, ion_buffer = io.BytesIO() dump(obj, ion_buffer, imports=imports, sequence_as_stream=sequence_as_stream, binary=binary, - indent=indent, tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker) + indent=indent, tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker, + trailing_commas=trailing_commas) ret_val = ion_buffer.getvalue() ion_buffer.close() @@ -292,9 +298,10 @@ def loads(ion_str: Union[bytes, str], catalog=None, single_value=True, parse_eag def dump_python(obj, fp, imports=None, binary=True, sequence_as_stream=False, - indent=None, tuple_as_sexp=False, omit_version_marker=False): + indent=None, tuple_as_sexp=False, omit_version_marker=False, + trailing_commas=False): """'pure' Python implementation. Users should prefer to call ``dump``.""" - raw_writer = binary_writer(imports) if binary else text_writer(indent=indent) + raw_writer = binary_writer(imports) if binary else text_writer(indent=indent, trailing_commas=trailing_commas) writer = blocking_writer(raw_writer, fp) from_type = _FROM_TYPE_TUPLE_AS_SEXP if tuple_as_sexp else _FROM_TYPE if binary or not omit_version_marker: @@ -475,7 +482,8 @@ def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp res = ionc.ionc_write(obj, binary, sequence_as_stream, tuple_as_sexp) - # TODO support "omit_version_marker" rather than hacking. + # TODO: support "omit_version_marker" rather than hacking. + # TODO: support "trailing_commas" (support is not included in the C code). if not binary and not omit_version_marker: res = b'$ion_1_0 ' + res fp.write(res) diff --git a/amazon/ion/writer_text.py b/amazon/ion/writer_text.py index 49d22fbd5..4d19856ae 100644 --- a/amazon/ion/writer_text.py +++ b/amazon/ion/writer_text.py @@ -359,7 +359,7 @@ def serialize(ion_event): @coroutine -def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=None): +def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=None, trailing_commas=False): pretty = indent is not None serialize_container_delimiter = \ _serialize_container_delimiter_pretty if pretty else _serialize_container_delimiter_normal @@ -369,7 +369,7 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non ion_event, self = (yield transition) delegate = self - if has_written_values and not ion_event.event_type.ends_container: + if has_written_values and ((indent and trailing_commas) or not ion_event.event_type.ends_container): # TODO This will always emit a delimiter for containers--should make it not do that. # Write the delimiter for the next value. if depth == 0: @@ -402,7 +402,8 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non if ion_event.event_type is IonEventType.CONTAINER_START: writer_event = DataEvent(WriteEventType.NEEDS_INPUT, _serialize_container_start(ion_event)) - delegate = _raw_writer_coroutine(depth + 1, ion_event, self, indent=indent) + delegate = _raw_writer_coroutine(depth + 1, ion_event, self, indent=indent, + trailing_commas=trailing_commas) elif depth == 0: # Serialize at the top-level. if ion_event.event_type is IonEventType.STREAM_END: @@ -429,7 +430,7 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non # TODO Add options for text formatting. -def raw_writer(indent=None): +def raw_writer(indent=None, trailing_commas=False): """Returns a raw text writer co-routine. Yields: @@ -447,7 +448,7 @@ def raw_writer(indent=None): # of writing (Dec 2022). indent_bytes = indent.encode("UTF-8") if isinstance(indent, str) else indent - return writer_trampoline(_raw_writer_coroutine(indent=indent_bytes)) + return writer_trampoline(_raw_writer_coroutine(indent=indent_bytes, trailing_commas=trailing_commas)) # TODO Determine if we need to do anything special for non-raw writer. Validation? text_writer = raw_writer diff --git a/tests/test_simpleion.py b/tests/test_simpleion.py index 06f233c3b..972c3c8c0 100644 --- a/tests/test_simpleion.py +++ b/tests/test_simpleion.py @@ -556,34 +556,35 @@ def dump_func(*args, **kw): def _generate_roundtrips(roundtrips): for is_binary in (True, False): for indent in ('not used',) if is_binary else (None, '', ' ', ' ', '\t', '\n\t\n '): - def _adjust_sids(annotations=()): - if is_binary and isinstance(obj, SymbolToken): - return SymbolToken(obj.text, 10 + len(annotations)) - return obj - - def _to_obj(to_type=None, annotations=(), tuple_as_sexp=False): - if to_type is None: - to_type = ion_type - obj_out = _adjust_sids(annotations) - return _FROM_ION_TYPE[ion_type].from_value(to_type, obj_out, annotations=annotations), is_binary, indent, tuple_as_sexp - - for obj in roundtrips: - obj = _adjust_sids() - yield obj, is_binary, indent, False - if not obj_has_ion_type_and_annotation(obj): - ion_type = _ion_type(obj, _FROM_TYPE) - yield _to_obj() - else: - ion_type = obj.ion_type - if isinstance(obj, IonPyNull): - obj = None - yield _to_obj(annotations=(u'annot1', u'annot2')) - if isinstance(obj, list): - yield _to_obj(IonType.SEXP) - yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2')) - if isinstance(obj, tuple) and not isinstance(obj, SymbolToken): - yield _to_obj(IonType.SEXP, tuple_as_sexp=True) - yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'), tuple_as_sexp=True) + for trailing_commas in (True, False): + def _adjust_sids(annotations=()): + if is_binary and isinstance(obj, SymbolToken): + return SymbolToken(obj.text, 10 + len(annotations)) + return obj + + def _to_obj(to_type=None, annotations=(), tuple_as_sexp=False): + if to_type is None: + to_type = ion_type + obj_out = _adjust_sids(annotations) + return _FROM_ION_TYPE[ion_type].from_value(to_type, obj_out, annotations=annotations), is_binary, indent, tuple_as_sexp, trailing_commas + + for obj in roundtrips: + obj = _adjust_sids() + yield obj, is_binary, indent, False, trailing_commas + if not obj_has_ion_type_and_annotation(obj): + ion_type = _ion_type(obj, _FROM_TYPE) + yield _to_obj() + else: + ion_type = obj.ion_type + if isinstance(obj, IonPyNull): + obj = None + yield _to_obj(annotations=(u'annot1', u'annot2')) + if isinstance(obj, list): + yield _to_obj(IonType.SEXP) + yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2')) + if isinstance(obj, tuple) and not isinstance(obj, SymbolToken): + yield _to_obj(IonType.SEXP, tuple_as_sexp=True) + yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'), tuple_as_sexp=True) def _assert_roundtrip(before, after, tuple_as_sexp): @@ -594,9 +595,9 @@ def _assert_roundtrip(before, after, tuple_as_sexp): *tuple(_generate_roundtrips(_ROUNDTRIPS)) ) def test_roundtrip(p): - obj, is_binary, indent, tuple_as_sexp = p + obj, is_binary, indent, tuple_as_sexp, trailing_commas = p out = BytesIO() - dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp) + dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp, trailing_commas=trailing_commas) out.seek(0) res = load(out) _assert_roundtrip(obj, res, tuple_as_sexp) @@ -606,10 +607,10 @@ def test_roundtrip(p): *tuple(_generate_roundtrips(_ROUNDTRIPS)) ) def test_roundtrip_ion_stream(p): - obj, is_binary, indent, tuple_as_sexp = p + obj, is_binary, indent, tuple_as_sexp, trailing_commas = p expected = [obj] out = BytesIO() - dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp) + dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp, trailing_commas=trailing_commas) out.seek(0) res = load(out, single_value=False, parse_eagerly=True) _assert_roundtrip(expected, res, tuple_as_sexp) @@ -643,6 +644,7 @@ class PrettyPrintParams(NamedTuple): indent: str exact_text: Optional[str] = None regexes: Sequence = [] + trailing_commas: bool = False @parametrize( PrettyPrintParams(ion_text='a', indent=' ', exact_text="$ion_1_0\na"), @@ -659,15 +661,36 @@ class PrettyPrintParams(NamedTuple): exact_text="$ion_1_0\n[\n\tapple,\n\t\"banana\",\n\t{\n\t\troof: false\n\t}\n]"), PrettyPrintParams(ion_text='[apple, {roof: false, walls:4, door: wood::large::true}]', indent='\t', regexes=["\\A\\$ion_1_0\n\\[\n\tapple,\n\t\\{", "\n\t\tdoor: wood::large::true,?\n", - "\n\t\troof: false,?\n", "\n\t\twalls: 4,?\n", "\n\t\\}\n\\]\\Z"]) + "\n\t\troof: false,?\n", "\n\t\twalls: 4,?\n", "\n\t\\}\n\\]\\Z"]), + + PrettyPrintParams(ion_text='a', indent=' ', trailing_commas=True, exact_text="$ion_1_0\na"), + PrettyPrintParams(ion_text='"a"', indent=' ', trailing_commas=True, exact_text="$ion_1_0\n\"a\""), + PrettyPrintParams(ion_text='\'$a__9\'', indent=' ', trailing_commas=True, + exact_text="$ion_1_0\n$a__9"), + PrettyPrintParams(ion_text='\'$a_\\\'_9\'', indent=' ', trailing_commas=True, + exact_text="$ion_1_0\n\'$a_\\\'_9\'"), + PrettyPrintParams(ion_text='[a, b, chair::2008-08-08T]', indent=' ', trailing_commas=True, + exact_text="$ion_1_0\n[\n a,\n b,\n chair::2008-08-08T,\n]"), + PrettyPrintParams(ion_text='[a, b, chair::2008-08-08T]', + indent=None, trailing_commas=True, # not pretty print + exact_text="$ion_1_0 [a,b,chair::2008-08-08T]"), + PrettyPrintParams(ion_text='[apple, {roof: false}]', indent='\t', trailing_commas=True, + exact_text="$ion_1_0\n[\n\tapple,\n\t{\n\t\troof: false,\n\t},\n]"), + PrettyPrintParams(ion_text='[apple, "banana", {roof: false}]', indent='\t', trailing_commas=True, + exact_text="$ion_1_0\n[\n\tapple,\n\t\"banana\",\n\t{\n\t\troof: false,\n\t},\n]"), + PrettyPrintParams(ion_text='[apple, {roof: false, walls:4, door: wood::large::true}]', indent='\t', + trailing_commas=True, + regexes=["\\A\\$ion_1_0\n\\[\n\tapple,\n\t\\{", "\n\t\tdoor: wood::large::true,\n", + "\n\t\troof: false,\n", "\n\t\twalls: 4,\n", "\n\t\\},\n\\]\\Z"]) ) def test_pretty_print(p): if c_ext: # TODO support pretty print for C extension. return - ion_text, indent, exact_text, regexes = p + ion_text, indent, exact_text, regexes, trailing_commas = p ion_value = loads(ion_text) - actual_pretty_ion_text = dumps(ion_value, binary=False, indent=indent) + actual_pretty_ion_text = dumps(ion_value, binary=False, indent=indent, + trailing_commas=trailing_commas) if exact_text is not None: assert actual_pretty_ion_text == exact_text for regex_str in regexes: diff --git a/tests/test_writer_text.py b/tests/test_writer_text.py index e96bf08b8..da0384699 100644 --- a/tests/test_writer_text.py +++ b/tests/test_writer_text.py @@ -217,6 +217,7 @@ def new_writer(): def test_raw_writer(p): assert_writer_events(p, new_writer) + @parametrize( (None, True), ('', True), @@ -250,6 +251,7 @@ class _P_Q(NamedTuple): pass + @parametrize( _P_Q('a', False), _P_Q('a0', False), @@ -296,3 +298,19 @@ def test_quote_symbols(p): assert needs_quotes == quoted assert len(ion_value.ion_annotations) == 1 assert ion_value.ion_annotations[0].text == symbol_text + + +@parametrize( + (' ', True), + (' ', False), + (None, True), + (None, False)) +def test_trailing_commas(p): + indent, trailing_commas = p + + # Ensure raw_writer can handle trailing_commas value with indent value for all value combinations. + raw_writer(indent, trailing_commas) + + # Ensure that a value dumped using the indent and trailing_commas values can be successfully loaded. + ion_val = loads('[a, {x:2, y: height::17}]') + assert ion_val == loads(dumps(ion_val, binary=False, indent=indent, trailing_commas=trailing_commas))