From f094eaca6c1a56652bf70ed29b368ebe5dcf856f Mon Sep 17 00:00:00 2001 From: Olga Ovcharenko Date: Mon, 21 Aug 2023 17:07:28 +0200 Subject: [PATCH] [SYSTEMDS-3589] Frame single column ragged array This commit contains code to add a simple ragged array, that allows us to allocate columns in frames with a lower number of contained materialized values. Closes #1857 Closes #1884 --- .../sysds/runtime/frame/data/FrameBlock.java | 2 +- .../runtime/frame/data/columns/Array.java | 22 +- .../frame/data/columns/ArrayFactory.java | 8 +- .../runtime/frame/data/columns/CharArray.java | 2 +- .../frame/data/columns/OptionalArray.java | 4 +- .../frame/data/columns/RaggedArray.java | 238 ++++++++++++++---- .../frame/data/columns/StringArray.java | 5 +- .../frame/array/FrameArrayTests.java | 185 +++++++++----- 8 files changed, 346 insertions(+), 120 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java b/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java index b9c748481a8..513b788b605 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java @@ -861,7 +861,7 @@ private double arraysSizeInMemory() { try { size += pool.submit(() -> { return Arrays.stream(_coldata).parallel() // parallel columns - .map(x -> x.getInMemorySize()).reduce(0L, Long::sum); + .map(x ->x.getInMemorySize()).reduce(0L, (a,x) -> a + x); }).get(); } catch(InterruptedException | ExecutionException e) { diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java index f57e303a498..ff6c5d3d5f5 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java @@ -147,8 +147,8 @@ public final int size() { /** * Get the value at a given index. * - * This method returns objects that have a high overhead in allocation. Therefore it is not as efficient as using the - * vectorized operations specified in the object. + * This method returns objects that have a high overhead in allocation. Therefore it is not as efficient as using + * the vectorized operations specified in the object. * * @param index The index to query * @return The value returned as an object @@ -168,8 +168,24 @@ public final int size() { */ public abstract Object get(); + /** + * Get the index's value. + * + * returns 0 in case of Null. + * + * @param i index to get value from + * @return the value + */ public abstract double getAsDouble(int i); + /** + * Get the index's value. + * + * returns Double.NaN in case of Null. + * + * @param i index to get value from + * @return the value + */ public double getAsNaNDouble(int i) { return getAsDouble(i); } @@ -644,7 +660,7 @@ public ArrayCompressionStatistics statistics(int nSamples) { if(ddcSize < memSize) return new ArrayCompressionStatistics(memSizePerElement, // - estDistinct, true, getValueType(),FrameArrayType.DDC, memSize, ddcSize); + estDistinct, true, getValueType(), FrameArrayType.DDC, memSize, ddcSize); return null; } diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java index 55d32f15be9..92f4ee4c313 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java @@ -35,7 +35,7 @@ public interface ArrayFactory { public final static int bitSetSwitchPoint = 64; public enum FrameArrayType { - STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER, OPTIONAL, DDC; + STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER, RAGGED, OPTIONAL, DDC; } public static StringArray create(String[] col) { @@ -74,6 +74,10 @@ public static OptionalArray create(T[] col) { return new OptionalArray<>(col); } + public static RaggedArray create(T[] col, int m) { + return new RaggedArray(col, m); + } + public static long getInMemorySize(ValueType type, int _numRows, boolean containsNull) { if(containsNull) { switch(type) { @@ -216,6 +220,8 @@ public static Array read(DataInput in, int nRow) throws IOException { case CHARACTER: arr = new CharArray(new char[nRow]); break; + case RAGGED: + return RaggedArray.readRagged(in, nRow); case OPTIONAL: return OptionalArray.readOpt(in, nRow); case DDC: diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java index be7044e9071..d87cf39666e 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java @@ -185,7 +185,7 @@ public FrameArrayType getFrameArrayType() { @Override public long getExactSerializedSize() { - return 1 + 2 * _data.length; + return 1L + 2L * _data.length; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java index 2405c227569..772b07af8b0 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java @@ -459,9 +459,9 @@ public boolean equals(Array other) { @Override public String toString() { StringBuilder sb = new StringBuilder(_size + 2); - sb.append(super.toString() + "<" + _a.getValueType() + ">:["); + sb.append(super.toString()).append("<").append(_a.getClass().getSimpleName()).append(">:["); for(int i = 0; i < _size - 1; i++) - sb.append(get(i) + ","); + sb.append(get(i)).append(","); sb.append(get(_size - 1)); sb.append("]"); return sb.toString(); diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java index b8f49679b2c..a2745df32a9 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java @@ -29,12 +29,11 @@ import org.apache.sysds.runtime.matrix.data.Pair; /** - * A Ragged array for the columns contains a smaller array, only containing the values of the top most part of the - * column. + * A Ragged array for a single column contains a smaller array, only containing the values of the top most part of the * - * This makes the allocation much better in cases where only the top n rows of a m row frame is used for the specific + * This makes the allocation much better in cases where only the top n rows of a m row frame are used for the specific * column. It is typically used for instances of transform encode, where the transform encode return a metadata frame to - * enable encoding and decoding the matrix + * enable encoding and decoding the matrix. */ public class RaggedArray extends Array { @@ -49,97 +48,203 @@ public class RaggedArray extends Array { */ public RaggedArray(T[] a, int m) { super(m); - throw new NotImplementedException(); + this._a = ArrayFactory.create(a); + } + + /** + * The allocation where, a's length is shorter than m, and we handle all accesses above len(a) as null. + * + * @param a The underlying array that is shorter than length m + * @param m The overall supported length m + */ + public RaggedArray(Array a, int m) { + super(m); + this._a = a; + } + + protected Array getInnerArray() { + return _a; } @Override public void write(DataOutput out) throws IOException { - throw new NotImplementedException("Unimplemented method 'write'"); + out.writeByte(FrameArrayType.RAGGED.ordinal()); + out.writeInt(_size); + out.writeInt(_a.size()); + _a.write(out); } @Override + @SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { - throw new NotImplementedException("Unimplemented method 'readFields'"); + _size = in.readInt(); + _a = (Array) ArrayFactory.read(in, in.readInt()); + } + + protected static RaggedArray readRagged(DataInput in, int nRow) throws IOException { + + int m = in.readInt(); + final Array a = ArrayFactory.read(in, in.readInt()); + return new RaggedArray<>(a, m); + } @Override public T get(int index) { - throw new NotImplementedException("Unimplemented method 'get'"); + if(index > _size || index < 0) + throw new ArrayIndexOutOfBoundsException("Index " + index + " out of bounds " + _size); + return index < _a._size ? _a.get(index) : null; } @Override public Object get() { - throw new NotImplementedException("Unimplemented method 'get'"); + throw new NotImplementedException("Should not be called"); } @Override public double getAsDouble(int i) { - throw new NotImplementedException("Unimplemented method 'getAsDouble'"); + return i < _a._size ? _a.getAsDouble(i) : 0; + } + + @Override + public double getAsNaNDouble(int i) { + return i < _a._size ? _a.getAsNaNDouble(i) : Double.NaN; } @Override public void set(int index, T value) { - throw new NotImplementedException("Unimplemented method 'set'"); + if(index < _a._size) + _a.set(index, value); + else if(index < super.size()) { + _a.reset(index + 1); + _a.set(index, value); + LOG.warn("Reallocated ragged array"); + } } @Override public void set(int index, double value) { - throw new NotImplementedException("Unimplemented method 'set'"); + if(index < _a._size) + _a.set(index, value); + else if(index < super.size()) { + _a.reset(index + 1); + _a.set(index, value); + LOG.warn("Reallocated ragged array"); + } } @Override public void set(int index, String value) { - throw new NotImplementedException("Unimplemented method 'set'"); + if(index < _a._size) + _a.set(index, value); + else if(index < super.size()) { + _a.reset(index + 1); + _a.set(index, value); + LOG.warn("Reallocated ragged array"); + } } @Override public void setFromOtherType(int rl, int ru, Array value) { - throw new NotImplementedException("Unimplemented method 'setFromOtherType'"); + if(rl >= 0 && rl < _a._size && ru < _a._size) + _a.setFromOtherType(rl, ru, value); + else + throw new NotImplementedException("Unimplemented method 'setFromOtherType'"); } @Override public void set(int rl, int ru, Array value) { - throw new NotImplementedException("Unimplemented method 'set'"); + if(rl >= 0 && rl < _a._size && ru < _a._size) + if(value instanceof RaggedArray) + _a.set(rl, ru, ((RaggedArray) value).getInnerArray()); + else if(_a.getClass() == value.getClass()) + _a.set(rl, ru, value); + else + throw new RuntimeException( + "RaggedArray set: value type should be same to RaggedArray type " + _a.getClass()); + else if(rl >= 0 && rl < super.size() && ru < super.size()) { + _a.reset(rl + 1); + _a.set(rl, ru, value); + LOG.warn("Reallocated ragged array"); + } } @Override public void set(int rl, int ru, Array value, int rlSrc) { - throw new NotImplementedException("Unimplemented method 'set'"); + if(rl >= 0 && rl < _a._size && ru < _a._size) + if(value instanceof RaggedArray) + _a.set(rl, ru, ((RaggedArray) value).getInnerArray(), rlSrc); + else if(_a.getClass() == value.getClass()) + _a.set(rl, ru, value, rlSrc); + else + throw new RuntimeException( + "RaggedArray set: value type should be same to RaggedArray type " + _a.getClass()); } @Override public void setNz(int rl, int ru, Array value) { - throw new NotImplementedException("Unimplemented method 'setNz'"); + if(rl >= 0 && rl < _a._size && ru < _a._size) + _a.setNz(rl, ru, value); + else + throw new NotImplementedException(); } @Override public void setFromOtherTypeNz(int rl, int ru, Array value) { - throw new NotImplementedException("Unimplemented method 'setFromOtherTypeNz'"); + if(rl >= 0 && rl < _a._size && ru < _a._size) + _a.setFromOtherTypeNz(rl, ru, value); + else + throw new NotImplementedException(); } @Override public void append(String value) { - throw new NotImplementedException("Unimplemented method 'append'"); + Array oldVals = _a.clone(); + _a.reset(super.size() + 1); + _a.set(0, oldVals.size() - 1, oldVals); + _a.set(super.size(), value); + super._size += 1; + + LOG.warn("Fully allocated ragged array"); } @Override public void append(T value) { - throw new NotImplementedException("Unimplemented method 'append'"); + Array oldVals = _a.clone(); + _a.reset(super.size() + 1); + _a.set(0, oldVals.size() - 1, oldVals); + _a.set(super.size(), value); + super._size += 1; + + LOG.warn("Fully allocated ragged array"); } @Override public Array append(Array other) { - throw new NotImplementedException("Unimplemented method 'append'"); + Array oldVals = _a.clone(); + _a.reset(super.size() + other._size + 1); + _a.set(0, oldVals.size() - 1, oldVals); + _a.set(super.size(), super.size() + other.size() - 1, other); + super._size += other.size(); + + LOG.warn("Fully allocated ragged array"); + + return this; } @Override public Array slice(int rl, int ru) { - throw new NotImplementedException("Unimplemented method 'slice'"); + if(rl >= 0 && rl < _a._size && ru < _a._size) + return _a.slice(rl, ru); + else if(rl >= 0 && ru >= _a._size) + return _a.slice(rl, _a._size - 1); + return null; } @Override public void reset(int size) { - throw new NotImplementedException("Unimplemented method 'reset'"); + _a.reset(size); + super._size = size; } @Override @@ -149,107 +254,130 @@ public byte[] getAsByteArray() { @Override public ValueType getValueType() { - throw new NotImplementedException("Unimplemented method 'getValueType'"); + return _a.getValueType(); } @Override public Pair analyzeValueType() { - throw new NotImplementedException("Unimplemented method 'analyzeValueType'"); + return _a.analyzeValueType(); } @Override public FrameArrayType getFrameArrayType() { - throw new NotImplementedException("Unimplemented method 'getFrameArrayType'"); + return FrameArrayType.RAGGED; } @Override public long getExactSerializedSize() { - throw new NotImplementedException("Unimplemented method 'getExactSerializedSize'"); + return _a.getExactSerializedSize() + 8 + 1; } @Override protected Array changeTypeBitSet() { - throw new NotImplementedException("Unimplemented method 'changeTypeBitSet'"); + return _a.changeTypeBitSet(); } @Override protected Array changeTypeBoolean() { - throw new NotImplementedException("Unimplemented method 'changeTypeBoolean'"); + return _a.changeTypeBoolean(); } @Override protected Array changeTypeDouble() { - throw new NotImplementedException("Unimplemented method 'changeTypeDouble'"); + return _a.changeTypeDouble(); } @Override protected Array changeTypeFloat() { - throw new NotImplementedException("Unimplemented method 'changeTypeFloat'"); + return _a.changeTypeFloat(); } @Override protected Array changeTypeInteger() { - throw new NotImplementedException("Unimplemented method 'changeTypeInteger'"); + return _a.changeTypeInteger(); } @Override protected Array changeTypeLong() { - throw new NotImplementedException("Unimplemented method 'changeTypeLong'"); + return _a.changeTypeLong(); } @Override protected Array changeTypeString() { - throw new NotImplementedException("Unimplemented method 'changeTypeString'"); + return _a.changeTypeString(); } @Override protected Array changeTypeCharacter() { - throw new NotImplementedException("Unimplemented method 'changeTypeCharacter'"); + return _a.changeTypeCharacter(); } @Override public void fill(String val) { - throw new NotImplementedException("Unimplemented method 'fill'"); + _a.reset(super.size()); + _a.fill(val); } @Override public void fill(T val) { - throw new NotImplementedException("Unimplemented method 'fill'"); + _a.reset(super.size()); + _a.fill(val); } @Override public boolean isShallowSerialize() { - throw new NotImplementedException("Unimplemented method 'isShallowSerialize'"); + return _a.isShallowSerialize(); } @Override public boolean isEmpty() { - throw new NotImplementedException("Unimplemented method 'isEmpty'"); + return _a.isEmpty(); } @Override + @SuppressWarnings("unchecked") public Array select(int[] indices) { - throw new NotImplementedException("Unimplemented method 'select'"); + Array ret = _a.getFrameArrayType() == FrameArrayType.OPTIONAL ? // + (Array) ArrayFactory.allocateOptional(_a.getValueType(), indices.length) : // + (Array) ArrayFactory.allocate(_a.getValueType(), indices.length); + for(int i = 0; i < indices.length; i++) + ret.set(i, get(indices[i])); + return ret; } @Override + @SuppressWarnings("unchecked") public Array select(boolean[] select, int nTrue) { - throw new NotImplementedException("Unimplemented method 'select'"); + Array ret = _a.getFrameArrayType() == FrameArrayType.OPTIONAL ? // + (Array) ArrayFactory.allocateOptional(_a.getValueType(), nTrue) : // + (Array) ArrayFactory.allocate(_a.getValueType(), nTrue); + int k = 0; + for(int i = 0; i < _a.size(); i++) { + if(select[i]) + ret.set(k++, _a.get(i)); + } + + for(int i = _a.size(); i < select.length; i++) { + if(select[i]) + ret.set(k++, get(i)); + } + + return ret; } @Override public boolean isNotEmpty(int i) { - throw new NotImplementedException("Unimplemented method 'isNotEmpty'"); + return i < _a.size() && _a.isNotEmpty(i); } @Override public Array clone() { - throw new NotImplementedException("Unimplemented method 'clone'"); + return new RaggedArray<>(_a.clone(), super._size); } @Override public double hashDouble(int idx) { - throw new NotImplementedException("Unimplemented method 'hashDouble'"); + return idx < _a.size() ? _a.hashDouble(idx) : Double.NaN; } @Override @@ -257,4 +385,26 @@ public boolean equals(Array other) { throw new NotImplementedException("Unimplemented method 'equals'"); } + @Override + public long getInMemorySize() { + return baseMemoryCost() + _a.getInMemorySize() + 8; + } + + @Override + public boolean containsNull() { + return (_a.size() < super._size) || _a.containsNull(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(_size + 2); + sb.append(super.toString()).append("<"); + sb.append(_a.getClass().getSimpleName()).append(">:["); + for(int i = 0; i < _size - 1; i++) + sb.append(get(i)).append(","); + sb.append(get(_size - 1)); + sb.append("]"); + return sb.toString(); + } + } diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java index 8eddc37707d..e24815aebaf 100644 --- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java +++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java @@ -302,8 +302,9 @@ protected Array changeTypeBoolean() { firstNN = _data[i++]; } - // detect type of transform. - if(i == size()) // if all null return empty boolean. + if(firstNN == null) + // this check is similar to saying i == size(); + // this means all values were null. therefore we have an easy time retuning an empty boolean array. return ArrayFactory.allocateBoolean(size()); else if(firstNN.toLowerCase().equals("true") || firstNN.toLowerCase().equals("false")) return changeTypeBooleanStandard(); diff --git a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java index 48f7f0bd2dd..c712789ab71 100644 --- a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java +++ b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java @@ -52,6 +52,7 @@ import org.apache.sysds.runtime.frame.data.columns.IntegerArray; import org.apache.sysds.runtime.frame.data.columns.LongArray; import org.apache.sysds.runtime.frame.data.columns.OptionalArray; +import org.apache.sysds.runtime.frame.data.columns.RaggedArray; import org.apache.sysds.runtime.frame.data.columns.StringArray; import org.apache.sysds.runtime.frame.data.lib.FrameLibRemoveEmpty; import org.apache.sysds.runtime.matrix.data.Pair; @@ -100,13 +101,17 @@ public static Collection data() { tests.add(new Object[] {ArrayFactory.create(new String[] {"1", "0", "1"}), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new String[] {"1", "0", "null"}), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new String[] {"0", "0", "null"}), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(new String[] {"true", "false", "false"}), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(new String[] {"True", "False", "False"}), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(new String[] {"False", "False", "False"}), FrameArrayType.STRING}); + tests.add( + new Object[] {ArrayFactory.create(new String[] {"true", "false", "false"}), FrameArrayType.STRING}); + tests.add( + new Object[] {ArrayFactory.create(new String[] {"True", "False", "False"}), FrameArrayType.STRING}); + tests.add( + new Object[] {ArrayFactory.create(new String[] {"False", "False", "False"}), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new String[] {"T", "F", "F"}), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new String[] {"t", "f", "f"}), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new String[] {"f", "t", "t"}), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(new String[] {"true", "false", "BLAA"}), FrameArrayType.STRING}); + tests + .add(new Object[] {ArrayFactory.create(new String[] {"true", "false", "BLAA"}), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new float[] {0.0f, 1.0f, 1.0f, 0.0f}), FrameArrayType.FP32}); tests.add(new Object[] {ArrayFactory.create(new double[] {0.0, 1.0, 1.0, 0.0}), FrameArrayType.FP64}); tests.add(new Object[] {ArrayFactory.create(new long[] {0, 1, 1, 0, 0, 1}), FrameArrayType.INT64}); @@ -114,9 +119,12 @@ public static Collection data() { tests.add(new Object[] {ArrayFactory.create(generateRandom01String(100, 324)), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(generateRandom01String(80, 22)), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(generateRandom01String(32, 221)), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(generateRandomTrueFalseString(32, 221)), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(generateRandomTrueFalseString(80, 221)), FrameArrayType.STRING}); - tests.add(new Object[] {ArrayFactory.create(generateRandomTrueFalseString(150, 221)), FrameArrayType.STRING}); + tests + .add(new Object[] {ArrayFactory.create(generateRandomTrueFalseString(32, 221)), FrameArrayType.STRING}); + tests + .add(new Object[] {ArrayFactory.create(generateRandomTrueFalseString(80, 221)), FrameArrayType.STRING}); + tests.add( + new Object[] {ArrayFactory.create(generateRandomTrueFalseString(150, 221)), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(generateRandomTFString(150, 221)), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(generateRandomTFString(22, 2)), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(generateRandomTFString(142, 4)), FrameArrayType.STRING}); @@ -129,8 +137,10 @@ public static Collection data() { tests.add(new Object[] {ArrayFactory.create(generateRandomNullFloatString(67, 21)), FrameArrayType.STRING}); tests.add(new Object[] {ArrayFactory.create(new String[30]), FrameArrayType.STRING}); // all null tests.add(new Object[] {ArrayFactory.create(new char[] {0, 0, 0, 0, 1, 1, 1}), FrameArrayType.CHARACTER}); - tests.add(new Object[] {ArrayFactory.create(new char[] {'t', 't', 'f', 'f', 'T'}), FrameArrayType.CHARACTER}); - tests.add(new Object[] {ArrayFactory.create(new char[] {'0', '2', '3', '4', '9'}), FrameArrayType.CHARACTER}); + tests.add( + new Object[] {ArrayFactory.create(new char[] {'t', 't', 'f', 'f', 'T'}), FrameArrayType.CHARACTER}); + tests.add( + new Object[] {ArrayFactory.create(new char[] {'0', '2', '3', '4', '9'}), FrameArrayType.CHARACTER}); tests.add(new Object[] {ArrayFactory.create(generateRandom01chars(150, 221)), FrameArrayType.CHARACTER}); tests.add(new Object[] {ArrayFactory.create(generateRandom01chars(67, 221)), FrameArrayType.CHARACTER}); tests.add(new Object[] {DDCArray.compressToDDC(ArrayFactory.create(generateRandom01chars(67, 221))), @@ -138,7 +148,8 @@ public static Collection data() { tests.add(new Object[] {DDCArray.compressToDDC(ArrayFactory.create(generateRandom01chars(30, 221))), FrameArrayType.CHARACTER}); // Long to int - tests.add(new Object[] {ArrayFactory.create(new long[] {3214, 424, 13, 22, 111, 134}), FrameArrayType.INT64}); + tests.add( + new Object[] {ArrayFactory.create(new long[] {3214, 424, 13, 22, 111, 134}), FrameArrayType.INT64}); tests.add(new Object[] {ArrayFactory.create(new double[] {// Double.NaN, 424, 13, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 134}), FrameArrayType.FP64}); @@ -228,10 +239,10 @@ public void getSizeEstimateVsReal() { case BOOLEAN: if(a instanceof BooleanArray) // just in case we overwrite the BitSet to boolean Array type. estSize = BooleanArray.estimateInMemorySize(a.size()); - break; + break; default: // nothing } - if(a.getFrameArrayType() == FrameArrayType.DDC) + if(a.getFrameArrayType() == FrameArrayType.DDC || a.getFrameArrayType() == FrameArrayType.RAGGED) return; if(memSize > estSize) fail("Estimated size is not smaller than actual:" + memSize + " " + estSize + "\n" + a.getValueType() + " " @@ -304,6 +315,8 @@ public void getFrameArrayType() { return; if(t == FrameArrayType.DDC)// can be many things. return; + if(t == FrameArrayType.OPTIONAL && a.getFrameArrayType() == FrameArrayType.RAGGED) + return; if(a.getFrameArrayType() == FrameArrayType.DDC) return; // can happen where DDC is wrapping Optional. @@ -354,6 +367,7 @@ public void get() { case CHARACTER: x = a.get(); break; + case RAGGED: case OPTIONAL: try { a.get(); @@ -715,7 +729,7 @@ public void toByteArray() { // just test that it serialize as byte array with no crashes a.getAsByteArray(); } - catch(DMLCompressionException e) { + catch(DMLCompressionException | NotImplementedException e) { return; // valid } } @@ -823,7 +837,7 @@ public void appendNull() { try { aa.append((String) null); - if(a.getFrameArrayType() == FrameArrayType.OPTIONAL) + if(a.getFrameArrayType() == FrameArrayType.OPTIONAL || a.getFrameArrayType() == FrameArrayType.RAGGED) assertEquals(aa.get(aa.size() - 1), null); else { switch(a.getValueType()) { @@ -870,7 +884,7 @@ public void append60Null() { for(int i = 0; i < 60; i++) aa.append((String) null); - if(a.getFrameArrayType() == FrameArrayType.OPTIONAL) + if(a.getFrameArrayType() == FrameArrayType.OPTIONAL || a.getFrameArrayType() == FrameArrayType.RAGGED) assertEquals(aa.get(aa.size() - 1), null); else { switch(a.getValueType()) { @@ -1044,7 +1058,9 @@ public void resetTestCase() { Array aa = a.clone(); aa.reset(10); - if(aa.getValueType() == ValueType.STRING || aa.getFrameArrayType() == FrameArrayType.OPTIONAL) { + if(aa.getValueType() == ValueType.STRING // + || aa.getFrameArrayType() == FrameArrayType.OPTIONAL // + || aa.getFrameArrayType() == FrameArrayType.RAGGED) { for(int i = 0; i < 10; i++) { assertEquals(null, aa.get(i)); } @@ -1268,7 +1284,7 @@ public void testAppendArray() { @SuppressWarnings("unchecked") public void testAppendValue() { Array aa = a.clone(); - boolean isOptional = aa instanceof OptionalArray; + boolean isOptional = aa instanceof OptionalArray || aa instanceof RaggedArray; try { switch(a.getValueType()) { @@ -1389,7 +1405,7 @@ public void testAppendArrayOptional() { @Test public void fillNull() { Array aa = a.clone(); - boolean isOptional = aa instanceof OptionalArray; + boolean isOptional = aa instanceof OptionalArray || aa instanceof RaggedArray; try { aa.fill((String) null); @@ -1551,7 +1567,7 @@ public void fill1ValueNull() { try { Array aa = a.clone(); - boolean isOptional = aa instanceof OptionalArray; + boolean isOptional = aa instanceof OptionalArray || aa instanceof RaggedArray; switch(a.getValueType()) { case BOOLEAN: ((Array) aa).fill((Boolean) null); @@ -1759,7 +1775,8 @@ protected static Array createDDC(FrameArrayType t, int size, int seed) { return DDCArray .compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); case INT64: - return DDCArray.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray + .compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); case FP32: return DDCArray .compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); @@ -1773,47 +1790,64 @@ protected static Array createDDC(FrameArrayType t, int size, int seed) { Random r = new Random(seed); switch(r.nextInt(7)) { case 0: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); case 1: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); case 2: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); case 3: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); case 4: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); default: return DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size, seed))); } + case RAGGED: + Random rand = new Random(seed); + switch(rand.nextInt(7)) { + case 0: + return ArrayFactory.create(generateRandomIntegerOpt(size, seed), size); + case 1: + return ArrayFactory.create(generateRandomLongOpt(size, seed), size); + case 2: + return ArrayFactory.create(generateRandomDoubleOpt(size, seed), size); + case 3: + return ArrayFactory.create(generateRandomFloatOpt(size, seed), size); + case 4: + return ArrayFactory.create(generateRandomCharacterOpt(size, seed), size); + default: + return ArrayFactory.create(generateRandomBooleanOpt(size, seed), size); + } case DDC: Random r2 = new Random(seed); switch(r2.nextInt(7)) { case 0: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); case 1: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); case 2: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); case 3: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); case 4: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); case 5: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique, 32))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique, 32))); default: return DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size, seed))); } + default: throw new DMLRuntimeException("Unsupported value type: " + t); @@ -1838,6 +1872,7 @@ protected static Array createOptional(FrameArrayType t, int size, int seed) { case CHARACTER: return ArrayFactory.create(generateRandomCharacterOpt(size, seed)); case OPTIONAL: + case RAGGED: // lets not test this case here. Random r = new Random(seed); switch(r.nextInt(7)) { case 0: @@ -1858,23 +1893,23 @@ protected static Array createOptional(FrameArrayType t, int size, int seed) { int nUnique = Math.max(size / 100, 2); switch(r2.nextInt(7)) { case 0: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); case 1: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); case 2: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); case 3: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); case 4: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); case 5: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique, 32))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique, 32))); default: return DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size, seed))); } @@ -1902,6 +1937,24 @@ protected static Array create(FrameArrayType t, int size, int seed) { return ArrayFactory.create(generateRandomDouble(size, seed)); case CHARACTER: return ArrayFactory.create(generateRandomChar(size, seed)); + case RAGGED: + Random rand = new Random(seed); + switch(rand.nextInt(7)) { + case 0: + return ArrayFactory.create(generateRandomIntegerOpt(size, seed), size); + case 1: + return ArrayFactory.create(generateRandomLongOpt(size, seed), size); + case 2: + return ArrayFactory.create(generateRandomDoubleOpt(size, seed), size); + case 3: + return ArrayFactory.create(generateRandomFloatOpt(size, seed), size); + case 4: + return ArrayFactory.create(generateRandomCharacterOpt(size, seed), size); + case 5: + return ArrayFactory.create(generateRandomString(size, seed), size); + default: + return ArrayFactory.create(generateRandomBooleanOpt(size, seed), size); + } case OPTIONAL: Random r = new Random(seed); switch(r.nextInt(7)) { @@ -1923,23 +1976,23 @@ protected static Array create(FrameArrayType t, int size, int seed) { int nUnique = Math.max(size / 100, 2); switch(r2.nextInt(7)) { case 0: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed, nUnique))); case 1: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique))); case 2: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique))); case 3: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique))); case 4: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed, nUnique))); case 5: - return DDCArray - .compressToDDC(ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique, 32))); + return DDCArray.compressToDDC( + ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique, 32))); default: return DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size, seed))); }