Skip to content

Commit

Permalink
refactor!: put all format options in classes
Browse files Browse the repository at this point in the history
This removes the formatting arguments to `remarshal.remarshal`
and makes it clear what format uses what options.

Customizing the output is one extra function call.
Conversions with the default formatting are the same as before.

I am not very concerned about breaking the Python API
because I have never heard from API users
and have found none on GitHub.
Remarshal users use it through the CLI.
  • Loading branch information
dbohdan committed Nov 14, 2024
1 parent 6bd8273 commit 63bc9e4
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 86 deletions.
223 changes: 163 additions & 60 deletions src/remarshal/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,35 +46,73 @@
if TYPE_CHECKING:
from rich.style import StyleType

YAMLStyle = Literal["", "'", '"', "|", ">"]


class CLIDefaults:
INDENT = None
SORT_KEYS = False
STRINGIFY = False
WIDTH = 80


class Defaults:
JSON_INDENT = 4
MAX_VALUES = 1000000
YAML_INDENT = 2
YAML_STYLE = ""
WIDTH = 80

PYTHON_WIDTH = WIDTH
YAML_WIDTH = WIDTH


YAMLStyle = Literal["", "'", '"', "|", ">"]


@dataclass(frozen=True)
class FormatOptions:
pass


@dataclass(frozen=True)
class CBOROptions(FormatOptions):
pass


@dataclass(frozen=True)
class JSONOptions(FormatOptions):
indent: int | None = Defaults.JSON_INDENT
sort_keys: bool = True
stringify: bool = False


@dataclass(frozen=True)
class MsgPackOptions(FormatOptions):
pass


@dataclass(frozen=True)
class PythonOptions(FormatOptions):
indent: int | None = None
sort_keys: bool = True
width: int = Defaults.PYTHON_WIDTH


@dataclass(frozen=True)
class TOMLOptions(FormatOptions):
indent: int | None = None
sort_keys: bool = True
stringify: bool = False


@dataclass(frozen=True)
class YAMLOptions(FormatOptions):
indent: int = Defaults.YAML_INDENT
style: YAMLStyle = Defaults.YAML_STYLE
width: int = Defaults.YAML_WIDTH


__all__ = [
"INPUT_FORMATS",
"OPTIONS_CLASSES",
"OUTPUT_FORMATS",
"RICH_ARGPARSE_STYLES",
"CLIDefaults",
Expand All @@ -85,6 +123,7 @@ class YAMLOptions(FormatOptions):
"YAMLOptions",
"decode",
"encode",
"format_options",
"identity",
"main",
"remarshal",
Expand All @@ -94,6 +133,14 @@ class YAMLOptions(FormatOptions):

INPUT_FORMATS = ["cbor", "json", "msgpack", "toml", "yaml"]
OUTPUT_FORMATS = ["cbor", "json", "msgpack", "python", "toml", "yaml"]
OPTIONS_CLASSES = {
"cbor": CBOROptions,
"json": JSONOptions,
"msgpack": MsgPackOptions,
"python": PythonOptions,
"toml": TOMLOptions,
"yaml": YAMLOptions,
}
UTF_8 = "utf-8"

RICH_ARGPARSE_STYLES: dict[str, StyleType] = {
Expand Down Expand Up @@ -289,7 +336,7 @@ def output_width(value: str) -> int:

parser.add_argument(
"--width",
default=CLIDefaults.WIDTH,
default=Defaults.WIDTH,
metavar="<n>",
type=output_width, # Allow "inf".
help=(
Expand Down Expand Up @@ -351,13 +398,6 @@ def output_width(value: str) -> int:
if args.output_format == "":
parser.error("Need an explicit output format")

# Replace `yaml_*` options with `YAMLOptions`.
vars(args)["yaml_options"] = YAMLOptions(
style=args.yaml_style,
)

del vars(args)["yaml_style"]

return args


Expand Down Expand Up @@ -622,22 +662,21 @@ def _encode_python(
indent: int | None,
sort_keys: bool,
width: int,
) -> bytes:
) -> str:
compact = False
if indent is None:
compact = True
indent = 0

return bytes(
return (
pprint.pformat(
data,
compact=compact,
indent=indent,
sort_dicts=sort_keys,
width=width,
)
+ "\n",
UTF_8,
+ "\n"
)


Expand Down Expand Up @@ -695,20 +734,13 @@ def _encode_yaml(
data: Document,
*,
indent: int | None,
options: FormatOptions | None,
style: YAMLStyle,
width: int,
) -> str:
if options is None:
options = YAMLOptions()

if not isinstance(options, YAMLOptions):
msg = "'options' not of type 'YAMLOptions'"
raise TypeError(msg)

yaml = ruamel.yaml.YAML()
yaml.default_flow_style = False

yaml.default_style = options.style # type: ignore
yaml.default_style = style # type: ignore
yaml.indent = indent
yaml.width = width

Expand All @@ -728,53 +760,123 @@ def _encode_yaml(
raise ValueError(msg)


def format_options(
output_format: str,
*,
indent: int | None = None,
sort_keys: bool = False,
stringify: bool = False,
width: int = Defaults.WIDTH,
yaml_style: YAMLStyle = Defaults.YAML_STYLE,
) -> FormatOptions:
if output_format == "cbor":
return CBOROptions()

if output_format == "json":
return JSONOptions(
indent=indent,
sort_keys=sort_keys,
stringify=stringify,
)

if output_format == "msgpack":
return MsgPackOptions()

if output_format == "python":
return PythonOptions(
indent=indent,
sort_keys=sort_keys,
width=width,
)

if output_format == "toml":
return TOMLOptions(
sort_keys=sort_keys,
stringify=stringify,
)

if output_format == "yaml":
return YAMLOptions(
indent=Defaults.YAML_INDENT if indent is None else indent,
style=yaml_style,
width=width,
)

msg = f"Unknown output format: {output_format}"
raise ValueError(msg)


def encode(
output_format: str,
data: Document,
*,
indent: int | None,
options: FormatOptions | None,
sort_keys: bool,
stringify: bool,
width: int,
) -> bytes:
if output_format == "json":
if output_format == "cbor":
if not isinstance(options, CBOROptions):
msg = "expected 'options' argument to have class 'CBOROptions'"
raise TypeError(msg)

encoded = _encode_cbor(data)

elif output_format == "json":
if not isinstance(options, JSONOptions):
msg = "expected 'options' argument to have class 'JSONOptions'"
raise TypeError(msg)

encoded = _encode_json(
data,
indent=indent,
sort_keys=sort_keys,
stringify=stringify,
indent=options.indent,
sort_keys=options.sort_keys,
stringify=options.stringify,
).encode(UTF_8)

elif output_format == "msgpack":
if not isinstance(options, MsgPackOptions):
msg = "expected 'options' argument to have class 'MsgPackOptions'"
raise TypeError(msg)
encoded = _encode_msgpack(data)

elif output_format == "python":
if not isinstance(options, PythonOptions):
msg = "expected 'options' argument to have class 'PythonOptions'"
raise TypeError(msg)
encoded = _encode_python(
data,
indent=indent,
sort_keys=sort_keys,
width=width,
)
indent=options.indent,
sort_keys=options.sort_keys,
width=options.width,
).encode(UTF_8)

elif output_format == "toml":
if not isinstance(options, TOMLOptions):
msg = "expected 'options' argument to have class 'TOMLOptions'"
raise TypeError(msg)

if not isinstance(data, Mapping):
msg = (
f"Top-level value of type '{type(data).__name__}' cannot "
"be encoded as TOML"
)
raise TypeError(msg)
encoded = _encode_toml(data, sort_keys=sort_keys, stringify=stringify).encode(
UTF_8
)
encoded = _encode_toml(
data,
sort_keys=options.sort_keys,
stringify=options.stringify,
).encode(UTF_8)

elif output_format == "yaml":
if not isinstance(options, YAMLOptions):
msg = "expected 'options' argument to have class 'YAMLOptions'"
raise TypeError(msg)

encoded = _encode_yaml(
data,
indent=indent,
options=options,
width=width,
indent=options.indent,
style=options.style,
width=options.width,
).encode(UTF_8)
elif output_format == "msgpack":
encoded = _encode_msgpack(data)
elif output_format == "cbor":
encoded = _encode_cbor(data)

else:
msg = f"Unknown output format: {output_format}"
raise ValueError(msg)
Expand All @@ -785,32 +887,31 @@ def encode(
# === Main ===


def remarshal( # noqa: PLR0913
def remarshal(
input_format: str,
output_format: str,
input: Path | str,
output: Path | str,
*,
indent: int | None = None,
max_values: int = Defaults.MAX_VALUES,
options: FormatOptions | None = None,
sort_keys: bool = True,
stringify: bool = False,
transform: Callable[[Document], Document] | None = None,
unwrap: str | None = None,
width: int = CLIDefaults.WIDTH,
wrap: str | None = None,
) -> None:
input_file = None
output_file = None

if options is None:
options = format_options(output_format)

try:
input_file = sys.stdin.buffer if input == "-" else Path(input).open("rb")
output_file = sys.stdout.buffer if output == "-" else Path(output).open("wb")

input_data = input_file.read()
if not isinstance(input_data, bytes):
msg = "input_data must be bytes"
msg = "'input_data' must be 'bytes'"
raise TypeError(msg)

parsed = decode(input_format, input_data)
Expand All @@ -836,11 +937,7 @@ def remarshal( # noqa: PLR0913
encoded = encode(
output_format,
parsed,
indent=indent,
options=options,
sort_keys=sort_keys,
stringify=stringify,
width=width,
)

output_file.write(encoded)
Expand All @@ -855,16 +952,22 @@ def main() -> None:
args = _parse_command_line(sys.argv)

try:
options = format_options(
args.output_format,
indent=args.indent,
sort_keys=args.sort_keys,
stringify=args.stringify,
width=args.width,
yaml_style=args.yaml_style,
)

remarshal(
args.input_format,
args.output_format,
args.input,
args.output,
indent=args.indent,
max_values=args.max_values,
options=args.yaml_options,
sort_keys=args.sort_keys,
stringify=args.stringify,
options=options,
unwrap=args.unwrap,
wrap=args.wrap,
)
Expand Down
Loading

0 comments on commit 63bc9e4

Please sign in to comment.