diff --git a/partiql-spi/src/main/java/org/partiql/spi/value/DatumReader.java b/partiql-spi/src/main/java/org/partiql/spi/value/DatumReader.java new file mode 100644 index 000000000..3dc992178 --- /dev/null +++ b/partiql-spi/src/main/java/org/partiql/spi/value/DatumReader.java @@ -0,0 +1,21 @@ +package org.partiql.spi.value; + +import org.jetbrains.annotations.Nullable; + +/** + * The {@link DatumReader} interface is a low-level reader interface for reading streams of PartiQL data. + *
+ * {@see java.io.Reader} + *
+ * TODO + * - public void reset(); + * - public void skip(long n); + */ +public interface DatumReader extends AutoCloseable { + + /** + * @return next Datum or null. + */ + @Nullable + public Datum read(); +} diff --git a/partiql-spi/src/main/java/org/partiql/spi/value/DatumWriter.java b/partiql-spi/src/main/java/org/partiql/spi/value/DatumWriter.java new file mode 100644 index 000000000..bf6ea2328 --- /dev/null +++ b/partiql-spi/src/main/java/org/partiql/spi/value/DatumWriter.java @@ -0,0 +1,16 @@ +package org.partiql.spi.value; + +/** + * The {@link DatumWriter} interface is a low-level writer interface for writing streams of PartiQL data. + *
+ * {@see java.io.Writer} + */ +public interface DatumWriter extends AutoCloseable { + + /** + * Like java.io.Reader with combined `append` and `write` since this does not implement Appendable. + * + * @param datum to write. + */ + public DatumWriter write(Datum datum); +} diff --git a/partiql-spi/src/main/java/org/partiql/spi/value/Variant.java b/partiql-spi/src/main/java/org/partiql/spi/value/Variant.java new file mode 100644 index 000000000..1fba149a0 --- /dev/null +++ b/partiql-spi/src/main/java/org/partiql/spi/value/Variant.java @@ -0,0 +1,43 @@ +package org.partiql.spi.value; + +import org.jetbrains.annotations.NotNull; + +import java.nio.charset.Charset; + +/** + * Variant is a {@link Datum} with the ability to pack and unpack a value. + * + * @param + */ +public interface Variant extends Datum { + + /** + * Unpack the inner variant value. + * + * @return T + */ + T unpack(); + + /** + * TODO move to writer? + *
+ * Pack the variant into a byte array. + * + * @return byte[] + */ + default byte[] pack() { + throw new UnsupportedOperationException("variant does not have a byte[] encoding."); + } + + /** + * TODO move to writer? + *
+ * Pack the variant into a byte array with the given charset. + * + * @param charset charset + * @return byte[] + */ + default byte[] pack(@NotNull Charset charset) { + throw new UnsupportedOperationException("variant does not have an encoding for charset: " + charset.name()); + } +} diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatum.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatum.kt index 201fa2d64..9cb8a0824 100644 --- a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatum.kt +++ b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatum.kt @@ -1,161 +1,100 @@ package org.partiql.spi.value.ion +import com.amazon.ion.system.IonBinaryWriterBuilder +import com.amazon.ion.system.IonTextWriterBuilder import com.amazon.ionelement.api.AnyElement -import com.amazon.ionelement.api.ElementType.BLOB import com.amazon.ionelement.api.ElementType.BOOL -import com.amazon.ionelement.api.ElementType.CLOB import com.amazon.ionelement.api.ElementType.DECIMAL import com.amazon.ionelement.api.ElementType.FLOAT import com.amazon.ionelement.api.ElementType.INT import com.amazon.ionelement.api.ElementType.LIST -import com.amazon.ionelement.api.ElementType.NULL import com.amazon.ionelement.api.ElementType.SEXP import com.amazon.ionelement.api.ElementType.STRING import com.amazon.ionelement.api.ElementType.STRUCT import com.amazon.ionelement.api.ElementType.SYMBOL import com.amazon.ionelement.api.ElementType.TIMESTAMP +import com.amazon.ionelement.api.IonElement import org.partiql.spi.value.Datum import org.partiql.spi.value.Field +import org.partiql.spi.value.Variant import org.partiql.types.PType import org.partiql.value.datetime.Date import org.partiql.value.datetime.DateTimeValue import org.partiql.value.datetime.Time import org.partiql.value.datetime.TimeZone import org.partiql.value.datetime.Timestamp +import java.io.ByteArrayOutputStream import java.math.BigDecimal import java.math.BigInteger +import java.nio.charset.Charset +import java.nio.charset.StandardCharsets /** * A [Datum] implemented over Ion's [AnyElement]. */ -public class IonDatum private constructor(value: AnyElement, type: PType) : - Datum { - - // DO NOT USE FINAL - private var _value = value - private var _type = type - private var _kind = value.type +public class IonDatum(private var value: AnyElement) : Variant { /** - * Some encoding of PartiQL values as Ion. + * TODO replace with PType.variant("ion") */ - private enum class Annotation(val symbol: String) { - MISSING("\$missing"), - BAG("\$bag"), - DATE("\$date"), - TIME("\$time"), - TIMESTAMP("\$timestamp"), - GRAPH("\$graph"); - - override fun toString(): String = symbol + private var type = PType.unknown() - companion object { + /** + * Unpack the inner Ion value. + * + * @return IonElement + */ + override fun unpack(): IonElement = value - @JvmStatic - fun of(value: AnyElement): Annotation? = value.annotations.lastOrNull()?.let { - Annotation.values().find { a -> a.symbol == it } - } - } + /** + * Pack an IonDatum into byte[] using the binary Ion encoding. + * + * @return byte[] + */ + override fun pack(): ByteArray { + val buffer = ByteArrayOutputStream() + val writer = IonBinaryWriterBuilder.standard().build(buffer) + value.writeTo(writer) + return buffer.toByteArray() } - public companion object { - - /** - * TODO reader/writer ?? or check annotations - * - * @param value - * @return - */ - @JvmStatic - public fun of(value: AnyElement): Datum { - val tag = Annotation.of(value) - val type = when (value.type) { - NULL -> return when (tag) { - Annotation.MISSING -> Datum.missing() - Annotation.BAG -> Datum.nullValue(PType.bag()) - Annotation.DATE -> Datum.nullValue(PType.date()) - Annotation.TIME -> Datum.nullValue(PType.time(6)) - Annotation.TIMESTAMP -> Datum.nullValue(PType.time(6)) - Annotation.GRAPH -> error("Datum does not support GRAPH type.") - null -> Datum.nullValue() - } - BOOL -> when (tag) { - null -> PType.bool() - else -> error("Unexpected type annotation for Ion BOOL: $tag") - } - INT -> when (tag) { - null -> PType.numeric() - else -> error("Unexpected type annotation for Ion INT: $tag") - } - FLOAT -> when (tag) { - null -> PType.doublePrecision() - else -> error("Unexpected type annotation for Ion FLOAT: $tag") - } - DECIMAL -> when (tag) { - null -> PType.decimal() - else -> error("Unexpected type annotation for Ion DECIMAL: $tag") - } - STRING -> when (tag) { - null -> PType.string() - else -> error("Unexpected type annotation for Ion STRING: $tag") - } - CLOB -> when (tag) { - null -> PType.clob(Int.MAX_VALUE) - else -> error("Unexpected type annotation for Ion CLOB: $tag") - } - BLOB -> when (tag) { - null -> PType.blob(Int.MAX_VALUE) - else -> error("Unexpected type annotation for Ion BLOB: $tag") - } - LIST -> when (tag) { - Annotation.BAG -> PType.bag() - null -> PType.array() - else -> error("Unexpected type annotation for Ion LIST: $tag") - } - STRUCT -> when (tag) { - null -> PType.struct() - Annotation.DATE -> TODO("IonDatum for DATE not supported") - Annotation.TIME -> TODO("IonDatum for TIME not supported") - Annotation.TIMESTAMP -> TODO("IonDatum for TIMESTAMP not supported") - else -> error("Unexpected type annotation for Ion STRUCT: $tag") - } - SEXP -> when (tag) { - null -> PType.sexp() - else -> error("Unexpected type annotation for Ion SEXP: $tag") - } - SYMBOL -> when (tag) { - null -> PType.symbol() - else -> error("Unexpected type annotation for Ion SYMBOL: $tag") - } - TIMESTAMP -> when (tag) { - null -> PType.timestamp(6) - else -> error("Unexpected type annotation for Ion TIMESTAMP: $tag") - } - } - return IonDatum(value, type) + /** + * Pack an IonDatum into a UTF-8 string byte[] using the textual Ion encoding. + * + * @param charset + * @return + */ + override fun pack(charset: Charset): ByteArray { + if (charset != StandardCharsets.UTF_8 || charset != StandardCharsets.US_ASCII) { + // unsupported + return super.pack(charset) } + val buffer = ByteArrayOutputStream() + val writer = IonTextWriterBuilder.standard().build(buffer) + value.writeTo(writer) + return buffer.toByteArray() } - override fun getType(): PType = _type + override fun getType(): PType = type - override fun isNull(): Boolean = _value.isNull + override fun isNull(): Boolean = value.isNull override fun isMissing(): Boolean = false - override fun getString(): String = when (_kind) { - SYMBOL -> _value.stringValue - STRING -> _value.stringValue + override fun getString(): String = when (value.type) { + SYMBOL -> value.stringValue + STRING -> value.stringValue else -> super.getString() } - override fun getBoolean(): Boolean = when (_kind) { - BOOL -> _value.booleanValue + override fun getBoolean(): Boolean = when (value.type) { + BOOL -> value.booleanValue else -> super.getBoolean() } - // override fun getBytes(): ByteArray = when (_kind) { - // CLOB -> _value.clobValue.copyOfBytes() - // BLOB -> _value.blobValue.copyOfBytes() + // override fun getBytes(): ByteArray = when (value.type) { + // CLOB -> value.clobValue.copyOfBytes() + // BLOB -> value.blobValue.copyOfBytes() // else -> super.getBytes() // } // @@ -164,9 +103,9 @@ public class IonDatum private constructor(value: AnyElement, type: PType) : // } override fun getDate(): Date { - return when (_kind) { + return when (value.type) { TIMESTAMP -> { - val ts = _value.timestampValue + val ts = value.timestampValue DateTimeValue.date(ts.year, ts.month, ts.day) } else -> super.getDate() @@ -174,9 +113,9 @@ public class IonDatum private constructor(value: AnyElement, type: PType) : } override fun getTime(): Time { - return when (_kind) { + return when (value.type) { TIMESTAMP -> { - val ts = _value.timestampValue + val ts = value.timestampValue val tz = when (ts.localOffset) { null -> TimeZone.UnknownTimeZone else -> TimeZone.UtcOffset.of(ts.zHour, ts.zMinute) @@ -189,65 +128,65 @@ public class IonDatum private constructor(value: AnyElement, type: PType) : // TODO: Handle struct notation override fun getTimestamp(): Timestamp { - return when (_kind) { - TIMESTAMP -> DateTimeValue.timestamp(_value.timestampValue) + return when (value.type) { + TIMESTAMP -> DateTimeValue.timestamp(value.timestampValue) else -> super.getTimestamp() } } - override fun getBigInteger(): BigInteger = when (_kind) { - INT -> _value.bigIntegerValue + override fun getBigInteger(): BigInteger = when (value.type) { + INT -> value.bigIntegerValue else -> super.getBigInteger() } - override fun getDouble(): Double = when (_kind) { - FLOAT -> _value.doubleValue + override fun getDouble(): Double = when (value.type) { + FLOAT -> value.doubleValue else -> super.getDouble() } - override fun getBigDecimal(): BigDecimal = when (_kind) { - DECIMAL -> _value.decimalValue.bigDecimalValue() + override fun getBigDecimal(): BigDecimal = when (value.type) { + DECIMAL -> value.decimalValue.bigDecimalValue() else -> super.getBigDecimal() } - override fun iterator(): MutableIterator = when (_kind) { - LIST -> _value.listValues.map { of(it) }.toMutableList().iterator() - SEXP -> _value.sexpValues.map { of(it) }.toMutableList().iterator() + override fun iterator(): MutableIterator = when (value.type) { + LIST -> value.listValues.map { IonDatum(it) }.toMutableList().iterator() + SEXP -> value.sexpValues.map { IonDatum(it) }.toMutableList().iterator() else -> super.iterator() } override fun getFields(): MutableIterator { - if (_kind != STRUCT) { + if (value.type != STRUCT) { return super.getFields() } - return _value.structFields - .map { Field.of(it.name, of(it.value)) } + return value.structFields + .map { Field.of(it.name, IonDatum(it.value)) } .toMutableList() .iterator() } override fun get(name: String): Datum { - if (_kind != STRUCT) { + if (value.type != STRUCT) { return super.get(name) } // TODO handle multiple/ambiguous field names? - val v = _value.asStruct().getOptional(name) + val v = value.asStruct().getOptional(name) return if (v == null) { Datum.missing() } else { - of(v) + IonDatum(v) } } override fun getInsensitive(name: String): Datum { - if (_kind != STRUCT) { + if (value.type != STRUCT) { return super.get(name) } // TODO handle multiple/ambiguous field names? - val struct = _value.asStruct() + val struct = value.asStruct() for (field in struct.fields) { if (field.name.equals(name, ignoreCase = true)) { - return of(field.value) + return IonDatum(field.value) } } return Datum.missing() diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumException.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumException.kt new file mode 100644 index 000000000..1fdde0ee8 --- /dev/null +++ b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumException.kt @@ -0,0 +1,14 @@ +package org.partiql.spi.value.ion + +import com.amazon.ion.Span + +/** + * These are errors specific to reading Ion data. + * + * TODO add DATA to PError kind. + */ +public class IonDatumException internal constructor( + public override val message: String, + public override val cause: Throwable?, + public val span: Span, +) : RuntimeException() diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumReader.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumReader.kt new file mode 100644 index 000000000..1b0cf0c18 --- /dev/null +++ b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumReader.kt @@ -0,0 +1,213 @@ +package org.partiql.spi.value.ion; + +import com.amazon.ion.IonException +import com.amazon.ion.IonReader +import com.amazon.ion.IonType +import com.amazon.ion.Span +import com.amazon.ion.SpanProvider +import com.amazon.ion.system.IonReaderBuilder +import com.amazon.ionelement.api.loadSingleElement +import org.partiql.spi.value.Datum +import org.partiql.spi.value.DatumReader +import java.io.IOException +import java.io.InputStream + +/** + * A [DatumReader] implementation for Ion encoded PartiQL data. + * + * TODO this will need to be re-worked for structural/collection types. + */ +public class IonDatumReader private constructor( + private val reader: IonReader, + private val others: Map, +) : DatumReader { + + /** + * Create a reader from the [IonReader]. + */ + public constructor(reader: IonReader) : this(reader, emptyMap()) + + /** + * Create a reader from the [InputStream] using the standard [IonReader]. + */ + public constructor(input: InputStream) : this(IonReaderBuilder.standard().build(input)) + + /** + * Helper for current span on errors. + */ + private fun span(): Span = reader.asFacet(SpanProvider::class.java).currentSpan() + + /** + * From AutoCloseable. + */ + override fun close() { + reader.close() + } + + /** + * Read next Datum or null. + */ + @Throws(IOException::class, IonDatumException::class) + override fun read(): Datum? { + return try { + val type = reader.next() ?: return null + val anno = reader.typeAnnotations + when (anno.size) { + 0 -> read0(type) + 1 -> read1(anno[0]) + else -> throw IonDatumException("expected 0 or 1 annotations", null, span()) + } + } catch (ex: IonException) { + throw IonDatumException("data exception", ex, span()) + } + } + + /** + * Any read + */ + private fun read0(type: IonType): Datum = when (type) { + IonType.NULL -> Datum.nullValue() + IonType.BOOL -> bool() + IonType.INT -> bigint() + IonType.FLOAT -> double() + IonType.DECIMAL -> decimal() + IonType.TIMESTAMP -> TODO("timestamp") + IonType.STRING -> varchar() + IonType.CLOB -> clob() + IonType.BLOB -> clob() + IonType.LIST -> array() + IonType.SEXP -> TODO() + IonType.STRUCT -> struct() + IonType.SYMBOL -> missing() + IonType.DATAGRAM -> throw IonDatumException("encountered datagram", null, span()) + } + + /** + * Would be nice to NOT do string comparisons. + */ + private fun read1(annotation: String): Datum = when (annotation) { + "bool" -> bool() + "tinyint" -> tinyint() + "smallint" -> smallint() + "int" -> int() + "bigint" -> bigint() + "real" -> real() + "double" -> double() + "varchar" -> varchar() + "array" -> array() + "bag" -> bag() + "struct" -> struct() + "ion" -> IonDatum(loadSingleElement(reader)) + else -> TODO("check others") + } + + private fun missing(): Datum = when (reader.symbolValue().text) { + "missing" -> Datum.missing() + else -> throw IonDatumException("expected symbol `missing`", null, span()) + } + + private fun bool(): Datum = Datum.bool(reader.booleanValue()) + + private fun tinyint(): Datum { + val v = reader.longValue() + if (v < Byte.MIN_VALUE || v > Byte.MAX_VALUE) { + throw IonDatumException("tinyint out of range", null, span()) + } + return Datum.tinyint(v.toByte()) + } + + private fun smallint(): Datum { + val v = reader.longValue() + if (v < Short.MIN_VALUE || v > Short.MAX_VALUE) { + throw IonDatumException("smallint out of range", null, span()) + } + return Datum.smallint(v.toShort()) + } + + /** + * As far as I can tell, IonReader impls do `(int) longValue()`. + */ + private fun int(): Datum { + val v = reader.longValue() + if (v < Int.MIN_VALUE || v > Int.MAX_VALUE) { + throw IonDatumException("int out of range", null, span()) + } + return Datum.integer(v.toInt()) + } + + private fun bigint(): Datum = Datum.bigint(reader.longValue()) + + private fun decimal(): Datum = TODO() + + private fun decimal(precision: Int): Datum = TODO() + + private fun decimal(precision: Int, scale: Int): Datum = TODO() + + private fun float(precision: Int): Datum = TODO("float") + + private fun real(): Datum { + val v = reader.doubleValue() + if (v > Float.MAX_VALUE || v < Float.MIN_VALUE) { + throw IonDatumException("real out of range", null, span()) + } + return Datum.real(v.toFloat()) + } + + private fun double(): Datum = Datum.doublePrecision(reader.doubleValue()) + + private fun char(length: Int): Datum = TODO() + + private fun varchar(): Datum = Datum.string(reader.stringValue()) + + private fun varchar(length: Int): Datum = TODO() + + private fun clob(): Datum = TODO() + + private fun clob(length: Int): Datum = TODO() + + private fun blob(): Datum = TODO() + + private fun blob(length: Int): Datum = TODO() + + private fun date(): Datum = TODO() + + private fun time(precision: Int): Datum = TODO() + + private fun timez(precision: Int): Datum = TODO() + + private fun timestamp(precision: Int): Datum = TODO() + + private fun timestampz(precision: Int): Datum = TODO() + + private fun array(): Datum = TODO() + + private fun bag(): Datum = TODO() + + private fun struct(): Datum = TODO() + + public companion object { + + @JvmStatic + public fun build(): Builder = Builder() + } + + /** + * IonDatumReader.Builder can be re-used + */ + public class Builder { + + private val others = mutableMapOf() + + public fun register(encoding: String, reader: DatumReader): Builder { + others[encoding] = reader + return this + } + + public fun build(reader: IonReader): IonDatumReader = IonDatumReader(reader, others) + + public fun build(input: InputStream): IonDatumReader = IonDatumReader( + reader = IonReaderBuilder.standard().build(input), + others = others, + ) + } +} diff --git a/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumWriter.kt b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumWriter.kt new file mode 100644 index 000000000..2e656e298 --- /dev/null +++ b/partiql-spi/src/main/kotlin/org/partiql/spi/value/ion/IonDatumWriter.kt @@ -0,0 +1,15 @@ +package org.partiql.spi.value.ion; + +import org.partiql.spi.value.Datum +import org.partiql.spi.value.DatumWriter + +public class IonDatumWriter : DatumWriter { + + override fun close() { + TODO("Not yet implemented") + } + + override fun write(datum: Datum?): DatumWriter { + TODO("Not yet implemented") + } +} diff --git a/partiql-types/src/main/java/org/partiql/types/PType.java b/partiql-types/src/main/java/org/partiql/types/PType.java index 600e89f56..cbe5e7d50 100644 --- a/partiql-types/src/main/java/org/partiql/types/PType.java +++ b/partiql-types/src/main/java/org/partiql/types/PType.java @@ -556,11 +556,13 @@ static PType character(int length) { */ @NotNull static PType varchar(int length) { - return new PTypeWithMaxLength(Kind.CHAR, length); + return new PTypeWithMaxLength(Kind.VARCHAR, length); } /** * @return a PartiQL string type + * + * TODO remove in favor of non-parameterized VARCHAR */ @NotNull static PType string() {