-
Notifications
You must be signed in to change notification settings - Fork 51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add trailing_commas
to dump
/dumps
#372
Changes from 4 commits
11e449b
d753ba1
b124edc
1d2f00d
4a88648
b2f91a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,7 +95,7 @@ | |
|
||
|
||
def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=None, | ||
tuple_as_sexp=False, omit_version_marker=False): | ||
tuple_as_sexp=False, omit_version_marker=False, trailing_commas=False): | ||
"""Serialize ``obj`` as an Ion formatted stream and write it to fp. | ||
|
||
The python object hierarchy is mapped to the Ion data model as described in the module pydoc. | ||
|
@@ -121,11 +121,15 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=No | |
indent (Str): If binary is False and indent is a string, then members of containers will be pretty-printed with | ||
a newline followed by that string repeated for each level of nesting. None (the default) selects the most | ||
compact representation without any newlines. Example: to indent with four spaces per level of nesting, | ||
use ``' '``. | ||
use ``' '``. Supported only in the pure Python implementation (because pretty printing is not yet | ||
supported in the C implementation). | ||
tuple_as_sexp (Optional[True|False]): When True, all tuple values will be written as Ion s-expressions. | ||
When False, all tuple values will be written as Ion lists. Default: False. | ||
omit_version_marker (Optional[True|False]): If binary is False and omit_version_marker is True, omits the | ||
Ion Version Marker ($ion_1_0) from the output. Default: False. | ||
trailing_commas (Optional[True|False]): If binary is False and pretty printing (indent is not None), includes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: it may actually be clearer to a user to make it independent of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a thought. I actually had it accidentally including trailing commas in non-pretty-printed output for the first revision, so it would be easy to put back to that (except for the problem explained in the next paragraph). I guess if you say you don't want indent but you do want trailing commas, there's no strong reason not to. However ... there is one reason not to. This is a reason why I'd rather that be a feature we could consider but not try to include here. That reason is that the Python C module implementation already doesn't support pretty-printing, so I was able to leave it out of scope. If we make I'd rather not hold this PR up to implement non-pretty-printed trailing-comma output in the C module; especially since I'm not sure we're likely to see any strong demand for trailing commas in non-pretty-printed output. I guess we could make it not depend on |
||
trailing commas in containers. Default: False. Supported only in the pure Python implementation (because | ||
pretty printing is not yet supported in the C implementation). | ||
|
||
Returns None. | ||
""" | ||
|
@@ -135,11 +139,12 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, indent=No | |
else: | ||
return dump_python(obj, fp, imports=imports, binary=binary, sequence_as_stream=sequence_as_stream, | ||
indent=indent, | ||
tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker) | ||
tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker, | ||
trailing_commas=trailing_commas) | ||
|
||
|
||
def dumps(obj, imports=None, binary=True, sequence_as_stream=False, | ||
indent=None, tuple_as_sexp=False, omit_version_marker=False): | ||
indent=None, tuple_as_sexp=False, omit_version_marker=False, trailing_commas=False): | ||
"""Serialize obj as described by dump, return the serialized data as bytes or unicode. | ||
|
||
Returns: | ||
|
@@ -149,7 +154,8 @@ def dumps(obj, imports=None, binary=True, sequence_as_stream=False, | |
ion_buffer = io.BytesIO() | ||
|
||
dump(obj, ion_buffer, imports=imports, sequence_as_stream=sequence_as_stream, binary=binary, | ||
indent=indent, tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker) | ||
indent=indent, tuple_as_sexp=tuple_as_sexp, omit_version_marker=omit_version_marker, | ||
trailing_commas=trailing_commas) | ||
|
||
ret_val = ion_buffer.getvalue() | ||
ion_buffer.close() | ||
|
@@ -292,9 +298,10 @@ def loads(ion_str: Union[bytes, str], catalog=None, single_value=True, parse_eag | |
|
||
|
||
def dump_python(obj, fp, imports=None, binary=True, sequence_as_stream=False, | ||
indent=None, tuple_as_sexp=False, omit_version_marker=False): | ||
indent=None, tuple_as_sexp=False, omit_version_marker=False, | ||
trailing_commas=False): | ||
"""'pure' Python implementation. Users should prefer to call ``dump``.""" | ||
raw_writer = binary_writer(imports) if binary else text_writer(indent=indent) | ||
raw_writer = binary_writer(imports) if binary else text_writer(indent=indent, trailing_commas=trailing_commas) | ||
writer = blocking_writer(raw_writer, fp) | ||
from_type = _FROM_TYPE_TUPLE_AS_SEXP if tuple_as_sexp else _FROM_TYPE | ||
if binary or not omit_version_marker: | ||
|
@@ -470,10 +477,11 @@ def add(obj): | |
event = reader.send(NEXT_EVENT) | ||
|
||
|
||
def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp=False, omit_version_marker=False): | ||
def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp=False, omit_version_marker=False, | ||
trailing_commas=False): | ||
"""C-extension implementation. Users should prefer to call ``dump``.""" | ||
|
||
res = ionc.ionc_write(obj, binary, sequence_as_stream, tuple_as_sexp) | ||
res = ionc.ionc_write(obj, binary, sequence_as_stream, tuple_as_sexp, trailing_commas) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This call is what is causing the segfault in the unit tests. If you want to put in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops! I meant to omit the argument from the call to ionc... I'll fix that and post a new rev. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wait, I must be confused. I'm not passing the arg to the C code, as far as I can see?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, oops, in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. (Removed |
||
|
||
# TODO support "omit_version_marker" rather than hacking. | ||
if not binary and not omit_version_marker: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -359,7 +359,7 @@ def serialize(ion_event): | |
|
||
|
||
@coroutine | ||
def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=None): | ||
def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=None, trailing_commas=False): | ||
pretty = indent is not None | ||
serialize_container_delimiter = \ | ||
_serialize_container_delimiter_pretty if pretty else _serialize_container_delimiter_normal | ||
|
@@ -369,7 +369,7 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non | |
ion_event, self = (yield transition) | ||
delegate = self | ||
|
||
if has_written_values and not ion_event.event_type.ends_container: | ||
if has_written_values and ((indent and trailing_commas) or not ion_event.event_type.ends_container): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I may be wrong but I think this will also add a space after the last item in an s-exp: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, I did think you were right, but now I actually think not, because there's a separate set of delimiters defined for use when pretty printing:
which uses an empty string as the delimiter for s-expressions, since they add newlines. I could test this to make sure, but I'm pretty sure. If we do want to apply this to non-pretty-printing, then we could do that, but then your concern expressed here would need an additional work-around, or there would indeed be an extra space. I think the solution, if needed, would be to fork:
into:
and use |
||
# TODO This will always emit a delimiter for containers--should make it not do that. | ||
# Write the delimiter for the next value. | ||
if depth == 0: | ||
|
@@ -402,7 +402,8 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non | |
|
||
if ion_event.event_type is IonEventType.CONTAINER_START: | ||
writer_event = DataEvent(WriteEventType.NEEDS_INPUT, _serialize_container_start(ion_event)) | ||
delegate = _raw_writer_coroutine(depth + 1, ion_event, self, indent=indent) | ||
delegate = _raw_writer_coroutine(depth + 1, ion_event, self, indent=indent, | ||
trailing_commas=trailing_commas) | ||
elif depth == 0: | ||
# Serialize at the top-level. | ||
if ion_event.event_type is IonEventType.STREAM_END: | ||
|
@@ -429,7 +430,7 @@ def _raw_writer_coroutine(depth=0, container_event=None, whence=None, indent=Non | |
|
||
|
||
# TODO Add options for text formatting. | ||
def raw_writer(indent=None): | ||
def raw_writer(indent=None, trailing_commas=False): | ||
"""Returns a raw text writer co-routine. | ||
|
||
Yields: | ||
|
@@ -447,7 +448,7 @@ def raw_writer(indent=None): | |
# of writing (Dec 2022). | ||
indent_bytes = indent.encode("UTF-8") if isinstance(indent, str) else indent | ||
|
||
return writer_trampoline(_raw_writer_coroutine(indent=indent_bytes)) | ||
return writer_trampoline(_raw_writer_coroutine(indent=indent_bytes, trailing_commas=trailing_commas)) | ||
|
||
# TODO Determine if we need to do anything special for non-raw writer. Validation? | ||
text_writer = raw_writer |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
np!