From 1a517202eef8a5f024ca9e4ead751e78fbcd9ea5 Mon Sep 17 00:00:00 2001 From: baunsgaard Date: Sat, 12 Aug 2023 01:20:21 +0200 Subject: [PATCH] IDictionary --- .../runtime/compress/cocode/CoCodeHybrid.java | 2 +- .../colgroup/AColGroupCompressed.java | 4 +- .../compress/colgroup/AColGroupOffset.java | 4 +- .../compress/colgroup/ADictBasedColGroup.java | 21 +- .../colgroup/AMorphingMMColGroup.java | 10 +- .../sysds/runtime/compress/colgroup/ASDC.java | 4 +- .../runtime/compress/colgroup/ASDCZero.java | 4 +- .../compress/colgroup/ColGroupConst.java | 22 +- .../compress/colgroup/ColGroupDDC.java | 16 +- .../compress/colgroup/ColGroupDDCFOR.java | 34 +- .../compress/colgroup/ColGroupOLE.java | 18 +- .../compress/colgroup/ColGroupRLE.java | 16 +- .../compress/colgroup/ColGroupSDC.java | 34 +- .../compress/colgroup/ColGroupSDCFOR.java | 34 +- .../compress/colgroup/ColGroupSDCSingle.java | 26 +- .../colgroup/ColGroupSDCSingleZeros.java | 28 +- .../compress/colgroup/ColGroupSDCZeros.java | 26 +- .../colgroup/IContainADictionary.java | 4 +- .../colgroup/dictionary/ADictionary.java | 895 +---------------- .../dictionary/DictLibMatrixMult.java | 14 +- .../colgroup/dictionary/Dictionary.java | 23 +- .../colgroup/dictionary/IDictionary.java | 940 ++++++++++++++++++ .../dictionary/IdentityDictionary.java | 24 +- .../dictionary/IdentityDictionarySlice.java | 2 +- .../dictionary/MatrixBlockDictionary.java | 24 +- .../dictionary/PlaceHolderDictionary.java | 474 +++++++++ .../colgroup/dictionary/QDictionary.java | 24 +- .../compress/colgroup/mapping/AMapToData.java | 30 +- .../compress/colgroup/mapping/MapToBit.java | 6 +- .../compress/colgroup/mapping/MapToZero.java | 4 +- .../compress/colgroup/scheme/DDCScheme.java | 4 +- .../compress/colgroup/scheme/SDCScheme.java | 4 +- .../runtime/compress/lib/CLALibSeparator.java | 75 ++ .../ColGroupMorphingPerformanceCompare.java | 4 +- .../colgroup/ColGroupNegativeTests.java | 10 +- .../compress/dictionary/DictionaryTests.java | 30 +- .../compress/io/IOCompressionTestUtils.java | 6 +- .../io/SeparateDictionariesAndIndexes.java | 46 + 38 files changed, 1814 insertions(+), 1132 deletions(-) create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IDictionary.java create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/PlaceHolderDictionary.java create mode 100644 src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSeparator.java create mode 100644 src/test/java/org/apache/sysds/test/component/compress/io/SeparateDictionariesAndIndexes.java diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeHybrid.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeHybrid.java index 554f90d95d0..6dc53739d24 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeHybrid.java +++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeHybrid.java @@ -39,7 +39,7 @@ protected CompressedSizeInfo coCodeColumns(CompressedSizeInfo colInfos, int k) { final int startSize = colInfos.getInfo().size(); if(startSize == 1) return colInfos; // nothing to join when there only is one column - else if(startSize <= 5) {// Greedy all compare all if small number of columns + else if(startSize <= 16) {// Greedy all compare all if small number of columns LOG.debug("Hybrid chose to do greedy cocode because of few columns"); CoCodeGreedy gd = new CoCodeGreedy(_sest, _cest, _cs); return colInfos.setInfo(gd.combine(colInfos.getInfo(), k)); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java index 00d23345f7f..97f0d8058a8 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java @@ -21,7 +21,7 @@ import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.DMLScriptException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.data.SparseBlock; @@ -205,7 +205,7 @@ public final void tsmm(MatrixBlock ret, int nRows) { protected abstract void tsmm(double[] result, int numColumns, int nRows); - protected static void tsmm(double[] result, int numColumns, int[] counts, ADictionary dict, IColIndex colIndexes) { + protected static void tsmm(double[] result, int numColumns, int[] counts, IDictionary dict, IColIndex colIndexes) { dict = dict.getMBDict(colIndexes.size()); final MatrixBlock mb = ((MatrixBlockDictionary) dict).getMatrixBlock(); if(mb.isInSparseFormat()) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupOffset.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupOffset.java index fc6843515ac..b742917fe87 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupOffset.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupOffset.java @@ -23,7 +23,7 @@ import java.io.DataOutput; import java.io.IOException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.utils.MemoryEstimates; @@ -46,7 +46,7 @@ public abstract class AColGroupOffset extends APreAgg { /** If the column group contains unassigned rows. */ protected final boolean _zeros; - protected AColGroupOffset(IColIndex colIndices, int numRows, boolean zeros, ADictionary dict, int[] ptr, char[] data, int[] cachedCounts) { + protected AColGroupOffset(IColIndex colIndices, int numRows, boolean zeros, IDictionary dict, int[] ptr, char[] data, int[] cachedCounts) { super(colIndices, dict, cachedCounts); _numRows = numRows; _zeros = zeros; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java index d1f81a73c84..c9eece4aa1b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java @@ -24,8 +24,9 @@ import java.util.HashSet; import java.util.Set; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -37,15 +38,15 @@ public abstract class ADictBasedColGroup extends AColGroupCompressed implements IContainADictionary { private static final long serialVersionUID = -3737025296618703668L; /** Distinct value tuples associated with individual bitmaps. */ - protected final ADictionary _dict; + protected final IDictionary _dict; /** - * A Abstract class for column groups that contain ADictionary for values. + * A Abstract class for column groups that contain IDictionary for values. * * @param colIndices The Column indexes * @param dict The dictionary to contain the distinct tuples */ - protected ADictBasedColGroup(IColIndex colIndices, ADictionary dict) { + protected ADictBasedColGroup(IColIndex colIndices, IDictionary dict) { super(colIndices); _dict = dict; if(dict == null) @@ -53,7 +54,7 @@ protected ADictBasedColGroup(IColIndex colIndices, ADictionary dict) { } - public ADictionary getDictionary() { + public IDictionary getDictionary() { return _dict; } @@ -197,14 +198,14 @@ public final AColGroup rightMultByMatrix(MatrixBlock right, IColIndex allCols) { return null; final int nVals = getNumValues(); - final ADictionary preAgg = (right.isInSparseFormat()) ? // Chose Sparse or Dense + final IDictionary preAgg = (right.isInSparseFormat()) ? // Chose Sparse or Dense rightMMPreAggSparse(nVals, right.getSparseBlock(), agCols, 0, nCol) : // sparse _dict.preaggValuesFromDense(nVals, _colIndexes, agCols, right.getDenseBlockValues(), nCol); // dense return allocateRightMultiplication(right, agCols, preAgg); } protected abstract AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, - ADictionary preAgg); + IDictionary preAgg); /** * Find the minimum number of columns that are effected by the right multiplication @@ -269,7 +270,7 @@ protected IColIndex rightMMGetColsSparse(SparseBlock b, int retCols, IColIndex a return ColIndexFactory.create(aggregateColumns); } - private ADictionary rightMMPreAggSparse(int numVals, SparseBlock b, IColIndex aggregateColumns, int cl, int cu) { + private IDictionary rightMMPreAggSparse(int numVals, SparseBlock b, IColIndex aggregateColumns, int cl, int cu) { final double[] ret = new double[numVals * aggregateColumns.size()]; for(int h = 0; h < _colIndexes.size(); h++) { final int colIdx = _colIndexes.get(h); @@ -300,10 +301,10 @@ public final AColGroup copyAndSet(IColIndex colIndexes) { return copyAndSet(colIndexes, _dict); } - protected final AColGroup copyAndSet(ADictionary newDictionary) { + protected final AColGroup copyAndSet(IDictionary newDictionary) { return copyAndSet(_colIndexes, newDictionary); } - protected abstract AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary); + protected abstract AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java index bb85e786e22..fc2c3642015 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AMorphingMMColGroup.java @@ -20,7 +20,7 @@ package org.apache.sysds.runtime.compress.colgroup; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.lib.CLALibLeftMultBy; import org.apache.sysds.runtime.compress.lib.CLALibTSMM; @@ -37,14 +37,14 @@ public abstract class AMorphingMMColGroup extends AColGroupValue { private static final long serialVersionUID = -4265713396790607199L; /** - * A Abstract class for column groups that contain ADictionary for values. + * A Abstract class for column groups that contain IDictionary for values. * * @param colIndices The Column indexes * @param dict The dictionary to contain the distinct tuples * @param cachedCounts The cached counts of the distinct tuples (can be null since it should be possible to * reconstruct the counts on demand) */ - protected AMorphingMMColGroup(IColIndex colIndices, ADictionary dict, int[] cachedCounts) { + protected AMorphingMMColGroup(IColIndex colIndices, IDictionary dict, int[] cachedCounts) { super(colIndices, dict, cachedCounts); } @@ -161,7 +161,7 @@ protected IColIndex rightMMGetColsSparse(SparseBlock b, int nCols, IColIndex all } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { LOG.warn("right mm should not be called directly on a morphing column group"); final double[] common = getCommon(); final int rc = right.getNumColumns(); @@ -195,7 +195,7 @@ protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex col } protected abstract AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, - ADictionary preAgg); + IDictionary preAgg); /** * extract common value from group and return non morphing group diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDC.java index 3c63cca7d2f..633adb3d01b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDC.java @@ -19,7 +19,7 @@ package org.apache.sysds.runtime.compress.colgroup; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme; @@ -42,7 +42,7 @@ public abstract class ASDC extends AMorphingMMColGroup implements AOffsetsGroup /** The number of rows in this column group */ protected final int _numRows; - protected ASDC(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, int[] cachedCounts) { + protected ASDC(IColIndex colIndices, int numRows, IDictionary dict, AOffset offsets, int[] cachedCounts) { super(colIndices, dict, cachedCounts); _indexes = offsets; _numRows = numRows; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDCZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDCZero.java index 23ce0be2556..77fb11e77ea 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDCZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ASDCZero.java @@ -19,7 +19,7 @@ package org.apache.sysds.runtime.compress.colgroup; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; @@ -40,7 +40,7 @@ public abstract class ASDCZero extends APreAgg implements AOffsetsGroup, IContai /** The number of rows in this column group */ protected final int _numRows; - protected ASDCZero(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, int[] cachedCounts) { + protected ASDCZero(IColIndex colIndices, int numRows, IDictionary dict, AOffset offsets, int[] cachedCounts) { super(colIndices, dict, cachedCounts); _indexes = offsets; _numRows = numRows; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java index 7e99d0e24b0..493a2a71192 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java @@ -23,9 +23,9 @@ import java.io.IOException; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -58,7 +58,7 @@ public class ColGroupConst extends ADictBasedColGroup implements IContainDefault * @param colIndices The Colum indexes for the column group. * @param dict The dictionary containing one tuple for the entire compression. */ - private ColGroupConst(IColIndex colIndices, ADictionary dict) { + private ColGroupConst(IColIndex colIndices, IDictionary dict) { super(colIndices, dict); } @@ -70,7 +70,7 @@ private ColGroupConst(IColIndex colIndices, ADictionary dict) { * @param dict The dictionary to use * @return A Colgroup either const or empty. */ - public static AColGroup create(IColIndex colIndices, ADictionary dict) { + public static AColGroup create(IColIndex colIndices, IDictionary dict) { if(dict == null) return new ColGroupEmpty(colIndices); else if(dict.getNumberOfValues(colIndices.size()) > 1) { @@ -147,7 +147,7 @@ public static AColGroup create(IColIndex cols, double[] values) { * @param dict The dictionary to contain int the Constant group. * @return A Constant column group. */ - public static AColGroup create(int numCols, ADictionary dict) { + public static AColGroup create(int numCols, IDictionary dict) { if(dict instanceof MatrixBlockDictionary) { MatrixBlock mbd = ((MatrixBlockDictionary) dict).getMatrixBlock(); if(mbd.getNumColumns() != numCols && mbd.getNumRows() != 1) { @@ -444,14 +444,14 @@ protected AColGroup sliceSingleColumn(int idx) { if(v == 0) return new ColGroupEmpty(colIndexes); else { - ADictionary retD = Dictionary.create(new double[] {_dict.getValue(idx)}); + IDictionary retD = Dictionary.create(new double[] {_dict.getValue(idx)}); return create(colIndexes, retD); } } @Override protected AColGroup sliceMultiColumns(int idStart, int idEnd, IColIndex outputCols) { - ADictionary retD = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); + IDictionary retD = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); return create(outputCols, retD); } @@ -467,7 +467,7 @@ public long getNumberNonZeros(int nRows) { @Override public AColGroup replace(double pattern, double replace) { - ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); + IDictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); return create(_colIndexes, replaced); } @@ -517,7 +517,7 @@ public CM_COV_Object centralMoment(CMOperator op, int nRows) { @Override public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) { - ADictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size()); + IDictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size()); if(d == null) return ColGroupEmpty.create(max); else @@ -534,12 +534,12 @@ protected AColGroup copyAndSet(IColIndex colIndexes, double[] newDictionary) { return create(colIndexes, Dictionary.create(newDictionary)); } - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, newDictionary); } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { if(colIndexes != null && preAgg != null) return create(colIndexes, preAgg); else @@ -548,7 +548,7 @@ protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex col public static ColGroupConst read(DataInput in) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); return new ColGroupConst(cols, dict); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index 3882fc2999f..9eb2b22cb0e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -28,9 +28,9 @@ import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; @@ -62,7 +62,7 @@ public class ColGroupDDC extends APreAgg implements IMapToDataGroup { protected final AMapToData _data; - private ColGroupDDC(IColIndex colIndexes, ADictionary dict, AMapToData data, int[] cachedCounts) { + private ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { super(colIndexes, dict, cachedCounts); _data = data; @@ -77,7 +77,7 @@ private ColGroupDDC(IColIndex colIndexes, ADictionary dict, AMapToData data, int } - public static AColGroup create(IColIndex colIndexes, ADictionary dict, AMapToData data, int[] cachedCounts) { + public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { if(data.getUnique() == 1) return ColGroupConst.create(colIndexes, dict); else if(dict == null) @@ -431,7 +431,7 @@ public AColGroup unaryOperation(UnaryOperator op) { @Override public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { - ADictionary ret = _dict.binOpLeft(op, v, _colIndexes); + IDictionary ret = _dict.binOpLeft(op, v, _colIndexes); return create(_colIndexes, ret, _data, getCachedCounts()); } @@ -442,7 +442,7 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes); return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference); } - final ADictionary ret = _dict.binOpRight(op, v, _colIndexes); + final IDictionary ret = _dict.binOpRight(op, v, _colIndexes); return create(_colIndexes, ret, _data, getCachedCounts()); } @@ -454,7 +454,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupDDC read(DataInput in) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AMapToData data = MapToFactory.readIn(in); return new ColGroupDDC(cols, dict, data, null); } @@ -494,7 +494,7 @@ public boolean containsValue(double pattern) { } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { if(preAgg != null) return create(colIndexes, preAgg, _data, getCachedCounts()); else @@ -512,7 +512,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, newDictionary, _data, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java index 70029e21ec0..2e7e241accf 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.DMLRuntimeException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -63,14 +63,14 @@ public class ColGroupDDCFOR extends AMorphingMMColGroup implements IFrameOfRefer /** Reference values in this column group */ protected final double[] _reference; - private ColGroupDDCFOR(IColIndex colIndexes, ADictionary dict, double[] reference, AMapToData data, + private ColGroupDDCFOR(IColIndex colIndexes, IDictionary dict, double[] reference, AMapToData data, int[] cachedCounts) { super(colIndexes, dict, cachedCounts); _data = data; _reference = reference; } - public static AColGroup create(IColIndex colIndexes, ADictionary dict, AMapToData data, int[] cachedCounts, + public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts, double[] reference) { final boolean allZero = ColGroupUtils.allZero(reference); if(dict == null && allZero) @@ -154,11 +154,11 @@ public AColGroup scalarOperation(ScalarOperator op) { if(op.fn instanceof Plus || op.fn instanceof Minus) return create(_colIndexes, _dict, _data, getCachedCounts(), newRef); else if(op.fn instanceof Multiply || op.fn instanceof Divide) { - final ADictionary newDict = _dict.applyScalarOp(op); + final IDictionary newDict = _dict.applyScalarOp(op); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } else { - final ADictionary newDict = _dict.applyScalarOpWithReference(op, _reference, newRef); + final IDictionary newDict = _dict.applyScalarOpWithReference(op, _reference, newRef); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } } @@ -166,7 +166,7 @@ else if(op.fn instanceof Multiply || op.fn instanceof Divide) { @Override public AColGroup unaryOperation(UnaryOperator op) { final double[] newRef = ColGroupUtils.unaryOperator(op, _reference); - final ADictionary newDict = _dict.applyUnaryOpWithReference(op, _reference, newRef); + final IDictionary newDict = _dict.applyUnaryOpWithReference(op, _reference, newRef); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } @@ -180,11 +180,11 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf return create(_colIndexes, _dict, _data, getCachedCounts(), newRef); else if(op.fn instanceof Multiply || op.fn instanceof Divide) { // possible to simply process on dict and keep reference - final ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } else { // have to apply reference while processing - final ADictionary newDict = _dict.binOpLeftWithReference(op, v, _colIndexes, _reference, newRef); + final IDictionary newDict = _dict.binOpLeftWithReference(op, v, _colIndexes, _reference, newRef); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } } @@ -199,11 +199,11 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa return create(_colIndexes, _dict, _data, getCachedCounts(), newRef); else if(op.fn instanceof Multiply || op.fn instanceof Divide) { // possible to simply process on dict and keep reference - final ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpRight(op, v, _colIndexes); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } else { // have to apply reference while processing - final ADictionary newDict = _dict.binOpRightWithReference(op, v, _colIndexes, _reference, newRef); + final IDictionary newDict = _dict.binOpRightWithReference(op, v, _colIndexes, _reference, newRef); return create(_colIndexes, newDict, _data, getCachedCounts(), newRef); } } @@ -218,7 +218,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupDDCFOR read(DataInput in) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AMapToData data = MapToFactory.readIn(in); double[] ref = ColGroupIO.readDoubleArray(cols.size(), in); return new ColGroupDDCFOR(cols, dict, ref, data, null); @@ -241,7 +241,7 @@ public double getCost(ComputationCostEstimator e, int nRows) { @Override public AColGroup replace(double pattern, double replace) { - final ADictionary newDict = _dict.replaceWithReference(pattern, replace, _reference); + final IDictionary newDict = _dict.replaceWithReference(pattern, replace, _reference); boolean patternInReference = false; for(double d : _reference) if(pattern == d) { @@ -339,7 +339,7 @@ protected void computeColProduct(double[] c, int nRows) { @Override protected AColGroup sliceMultiColumns(int idStart, int idEnd, IColIndex outputCols) { - ADictionary retDict = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); + IDictionary retDict = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); final double[] newDef = new double[idEnd - idStart]; for(int i = idStart, j = 0; i < idEnd; i++, j++) newDef[j] = _reference[i]; @@ -352,7 +352,7 @@ protected AColGroup sliceSingleColumn(int idx) { if(_colIndexes.size() == 1) // early abort, only single column already. return create(retIndexes, _dict, _data, getCounts(), _reference); final double[] newDef = new double[] {_reference[idx]}; - final ADictionary retDict = _dict.sliceOutColumnRange(idx, idx + 1, _colIndexes.size()); + final IDictionary retDict = _dict.sliceOutColumnRange(idx, idx + 1, _colIndexes.size()); return create(retIndexes, retDict, _data, getCounts(), newDef); } @@ -382,7 +382,7 @@ public AColGroup extractCommon(double[] constV) { @Override public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) { final int def = (int) _reference[0]; - ADictionary d = _dict.rexpandColsWithReference(max, ignore, cast, def); + IDictionary d = _dict.rexpandColsWithReference(max, ignore, cast, def); if(d == null) { if(def <= 0 || def > max) @@ -425,7 +425,7 @@ public double[] getCommon() { } @Override - protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, IDictionary preAgg) { return create(colIndexes, preAgg, _data, getCachedCounts(), common); } @@ -436,7 +436,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, newDictionary, _data, getCachedCounts(), _reference); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java index cdec096da83..631da3edd17 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.bitmap.ABitmap; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -49,19 +49,19 @@ public class ColGroupOLE extends AColGroupOffset { private static final long serialVersionUID = 5723227906925121066L; - private ColGroupOLE(IColIndex colIndices, int numRows, boolean zero, ADictionary dict, char[] bitmaps, + private ColGroupOLE(IColIndex colIndices, int numRows, boolean zero, IDictionary dict, char[] bitmaps, int[] bitmapOffs, int[] counts) { super(colIndices, numRows, zero, dict, bitmapOffs, bitmaps, counts); } - protected static AColGroup create(IColIndex colIndices, int numRows, boolean zeros, ADictionary dict, char[] bitmaps, + protected static AColGroup create(IColIndex colIndices, int numRows, boolean zeros, IDictionary dict, char[] bitmaps, int[] bitmapOffs, int[] counts) { return new ColGroupOLE(colIndices, numRows, zeros, dict, bitmaps, bitmapOffs, counts); } protected static AColGroup compressOLE(IColIndex colIndexes, ABitmap ubm, int nRow, double tupleSparsity) { - ADictionary dict = DictionaryFactory.create(ubm, tupleSparsity); + IDictionary dict = DictionaryFactory.create(ubm, tupleSparsity); final int numVals = ubm.getNumValues(); char[][] lBitMaps = new char[numVals][]; @@ -187,7 +187,7 @@ public AColGroup scalarOperation(ScalarOperator op) { "Not implemented because dictionaries no longer should support extending by a tuple" + " Ideally implement a modification such that OLE becomes SDC group when materializing Zero tuples"); - // ADictionary rvalues = _dict.applyScalarOp(op, val0, getNumCols()); + // IDictionary rvalues = _dict.applyScalarOp(op, val0, getNumCols()); // char[] lbitmap = genOffsetBitmap(loff, loff.length); // char[] rbitmaps = Arrays.copyOf(_data, _data.length + lbitmap.length); // System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length); @@ -230,7 +230,7 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa // return new ColGroupOLE(_colIndexes, _numRows, false, applyBinaryRowOp(op, v, true, left), _data, _ptr, // getCachedCounts()); // } - // ADictionary rvalues = applyBinaryRowOp(op, v, sparseSafe, left); + // IDictionary rvalues = applyBinaryRowOp(op, v, sparseSafe, left); // char[] lbitmap = genOffsetBitmap(loff, loff.length); // char[] rbitmaps = Arrays.copyOf(_data, _data.length + lbitmap.length); // System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length); @@ -492,7 +492,7 @@ public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int // } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { throw new NotImplementedException(); } @@ -642,7 +642,7 @@ public double getCost(ComputationCostEstimator e, int nRows) { public static ColGroupOLE read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); int[] ptr = readPointers(in); char[] data = readData(in); boolean zeros = in.readBoolean(); @@ -655,7 +655,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, _zeros, newDictionary, _data, _ptr, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java index 1840d248764..d1c62387f1f 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java @@ -27,7 +27,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.bitmap.ABitmap; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -50,12 +50,12 @@ public class ColGroupRLE extends AColGroupOffset { private static final long serialVersionUID = -1560710477952862791L; - private ColGroupRLE(IColIndex colIndexes, int numRows, boolean zeros, ADictionary dict, char[] bitmaps, + private ColGroupRLE(IColIndex colIndexes, int numRows, boolean zeros, IDictionary dict, char[] bitmaps, int[] bitmapOffs, int[] cachedCounts) { super(colIndexes, numRows, zeros, dict, bitmapOffs, bitmaps, cachedCounts); } - protected static AColGroup create(IColIndex colIndexes, int numRows, boolean zeros, ADictionary dict, char[] bitmaps, + protected static AColGroup create(IColIndex colIndexes, int numRows, boolean zeros, IDictionary dict, char[] bitmaps, int[] bitmapOffs, int[] cachedCounts) { if(dict == null) return new ColGroupEmpty(colIndexes); @@ -64,7 +64,7 @@ protected static AColGroup create(IColIndex colIndexes, int numRows, boolean zer } protected static AColGroup compressRLE(IColIndex colIndexes, ABitmap ubm, int nRow, double tupleSparsity) { - ADictionary dict = DictionaryFactory.create(ubm, tupleSparsity); + IDictionary dict = DictionaryFactory.create(ubm, tupleSparsity); // compress the bitmaps final int numVals = ubm.getNumValues(); @@ -313,7 +313,7 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa return appendRun(_dict.binOpRightAndAppend(op, v, _colIndexes)); } - private AColGroup appendRun(ADictionary dict) { + private AColGroup appendRun(IDictionary dict) { // find the locations missing runs final boolean[] lind = computeZeroIndicatorVector(); // compute them as offsets... waste full @@ -710,7 +710,7 @@ private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock resul } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { if(preAgg == null) return null; return create(colIndexes, _numRows, _zeros, preAgg, _data, _ptr, getCachedCounts()); @@ -954,7 +954,7 @@ public double getCost(ComputationCostEstimator e, int nRows) { public static ColGroupRLE read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); int[] ptr = readPointers(in); char[] data = readData(in); boolean zeros = in.readBoolean(); @@ -967,7 +967,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, _zeros, newDictionary, _data, _ptr, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java index e4977c05595..91e64468b9c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java @@ -26,7 +26,7 @@ import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; @@ -64,7 +64,7 @@ public class ColGroupSDC extends ASDC implements IMapToDataGroup { /** The default value stored in this column group */ protected final double[] _defaultTuple; - protected ColGroupSDC(IColIndex colIndices, int numRows, ADictionary dict, double[] defaultTuple, AOffset offsets, + protected ColGroupSDC(IColIndex colIndices, int numRows, IDictionary dict, double[] defaultTuple, AOffset offsets, AMapToData data, int[] cachedCounts) { super(colIndices, numRows, dict, offsets, cachedCounts); if(data.getUnique() != dict.getNumberOfValues(colIndices.size())) { @@ -81,7 +81,7 @@ protected ColGroupSDC(IColIndex colIndices, int numRows, ADictionary dict, doubl _defaultTuple = defaultTuple; } - public static AColGroup create(IColIndex colIndices, int numRows, ADictionary dict, double[] defaultTuple, + public static AColGroup create(IColIndex colIndices, int numRows, IDictionary dict, double[] defaultTuple, AOffset offsets, AMapToData data, int[] cachedCounts) { final boolean allZero = ColGroupUtils.allZero(defaultTuple); if(dict == null && allZero) @@ -394,7 +394,7 @@ public AColGroup scalarOperation(ScalarOperator op) { final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.executeScalar(_defaultTuple[i]); - final ADictionary nDict = _dict.applyScalarOp(op); + final IDictionary nDict = _dict.applyScalarOp(op); return create(_colIndexes, _numRows, nDict, newDefaultTuple, _indexes, _data, getCachedCounts()); } @@ -403,7 +403,7 @@ public AColGroup unaryOperation(UnaryOperator op) { final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.fn.execute(_defaultTuple[i]); - final ADictionary nDict = _dict.applyUnaryOp(op); + final IDictionary nDict = _dict.applyUnaryOp(op); return create(_colIndexes, _numRows, nDict, newDefaultTuple, _indexes, _data, getCachedCounts()); } @@ -412,7 +412,7 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.fn.execute(v[_colIndexes.get(i)], _defaultTuple[i]); - final ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, newDefaultTuple, _indexes, _data, getCachedCounts()); } @@ -421,7 +421,7 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.fn.execute(_defaultTuple[i], v[_colIndexes.get(i)]); - final ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpRight(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, newDefaultTuple, _indexes, _data, getCachedCounts()); } @@ -436,7 +436,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupSDC read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AOffset indexes = OffsetFactory.readIn(in); AMapToData data = MapToFactory.readIn(in); double[] defaultTuple = ColGroupIO.readDoubleArray(cols.size(), in); @@ -454,7 +454,7 @@ public long getExactSizeOnDisk() { @Override public AColGroup replace(double pattern, double replace) { - ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); + IDictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = _defaultTuple[i] == pattern ? replace : _defaultTuple[i]; @@ -467,12 +467,12 @@ public AColGroup extractCommon(double[] constV) { for(int i = 0; i < _colIndexes.size(); i++) constV[_colIndexes.get(i)] += _defaultTuple[i]; - ADictionary subtractedDict = _dict.subtractTuple(_defaultTuple); + IDictionary subtractedDict = _dict.subtractTuple(_defaultTuple); return ColGroupSDCZeros.create(_colIndexes, _numRows, subtractedDict, _indexes, _data, getCounts()); } public AColGroup subtractDefaultTuple() { - ADictionary subtractedDict = _dict.subtractTuple(_defaultTuple); + IDictionary subtractedDict = _dict.subtractTuple(_defaultTuple); return ColGroupSDCZeros.create(_colIndexes, _numRows, subtractedDict, _indexes, _data, getCounts()); } @@ -483,11 +483,11 @@ public CM_COV_Object centralMoment(CMOperator op, int nRows) { @Override public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) { - ADictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size()); + IDictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size()); return rexpandCols(max, ignore, cast, nRows, d, _indexes, _data, getCachedCounts(), (int) _defaultTuple[0]); } - protected static AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows, ADictionary d, + protected static AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows, IDictionary d, AOffset indexes, AMapToData data, int[] counts, int def) { if(d == null) { @@ -528,7 +528,7 @@ public double getCost(ComputationCostEstimator e, int nRows) { @Override protected AColGroup sliceMultiColumns(int idStart, int idEnd, IColIndex outputCols) { - ADictionary retDict = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); + IDictionary retDict = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); final double[] newDef = new double[idEnd - idStart]; for(int i = idStart, j = 0; i < idEnd; i++, j++) newDef[j] = _defaultTuple[i]; @@ -541,7 +541,7 @@ protected AColGroup sliceSingleColumn(int idx) { if(_colIndexes.size() == 1) // early abort, only single column already. return create(retIndexes, _numRows, _dict, _defaultTuple, _indexes, _data, getCounts()); final double[] newDef = new double[] {_defaultTuple[idx]}; - final ADictionary retDict = _dict.sliceOutColumnRange(idx, idx + 1, _colIndexes.size()); + final IDictionary retDict = _dict.sliceOutColumnRange(idx, idx + 1, _colIndexes.size()); return create(retIndexes, _numRows, retDict, newDef, _indexes, _data, getCounts()); } @@ -561,7 +561,7 @@ public double[] getCommon() { } @Override - protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, IDictionary preAgg) { return create(colIndexes, _numRows, preAgg, common, _indexes, _data, getCachedCounts()); } @@ -578,7 +578,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, newDictionary, _defaultTuple, _indexes, _data, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java index 294a47c4372..4c46f907efc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -72,7 +72,7 @@ public class ColGroupSDCFOR extends ASDC implements IMapToDataGroup, IFrameOfRef /** Reference values in this column group */ protected final double[] _reference; - private ColGroupSDCFOR(IColIndex colIndices, int numRows, ADictionary dict, AOffset indexes, AMapToData data, + private ColGroupSDCFOR(IColIndex colIndices, int numRows, IDictionary dict, AOffset indexes, AMapToData data, int[] cachedCounts, double[] reference) { super(colIndices, numRows, dict, indexes, cachedCounts); // allow for now 1 data unique. @@ -84,7 +84,7 @@ else if(data.getUnique() != dict.getNumberOfValues(colIndices.size())) _reference = reference; } - public static AColGroup create(IColIndex colIndexes, int numRows, ADictionary dict, AOffset offsets, AMapToData data, + public static AColGroup create(IColIndex colIndexes, int numRows, IDictionary dict, AOffset offsets, AMapToData data, int[] cachedCounts, double[] reference) { final boolean allZero = ColGroupUtils.allZero(reference); if(allZero && dict == null) @@ -157,11 +157,11 @@ public AColGroup scalarOperation(ScalarOperator op) { if(op.fn instanceof Plus || op.fn instanceof Minus) return create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), newRef); else if(op.fn instanceof Multiply || op.fn instanceof Divide) { - final ADictionary newDict = _dict.applyScalarOp(op); + final IDictionary newDict = _dict.applyScalarOp(op); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } else { - final ADictionary newDict = _dict.applyScalarOpWithReference(op, _reference, newRef); + final IDictionary newDict = _dict.applyScalarOpWithReference(op, _reference, newRef); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } } @@ -169,7 +169,7 @@ else if(op.fn instanceof Multiply || op.fn instanceof Divide) { @Override public AColGroup unaryOperation(UnaryOperator op) { final double[] newRef = ColGroupUtils.unaryOperator(op, _reference); - final ADictionary newDict = _dict.applyUnaryOpWithReference(op, _reference, newRef); + final IDictionary newDict = _dict.applyUnaryOpWithReference(op, _reference, newRef); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } @@ -183,11 +183,11 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf return create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), newRef); else if(op.fn instanceof Multiply || op.fn instanceof Divide) { // possible to simply process on dict and keep reference - final ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } else { // have to apply reference while processing - final ADictionary newDict = _dict.binOpLeftWithReference(op, v, _colIndexes, _reference, newRef); + final IDictionary newDict = _dict.binOpLeftWithReference(op, v, _colIndexes, _reference, newRef); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } } @@ -202,11 +202,11 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa return create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), newRef); else if(op.fn instanceof Multiply || op.fn instanceof Divide) { // possible to simply process on dict and keep reference - final ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpRight(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } else { // have to apply reference while processing - final ADictionary newDict = _dict.binOpRightWithReference(op, v, _colIndexes, _reference, newRef); + final IDictionary newDict = _dict.binOpRightWithReference(op, v, _colIndexes, _reference, newRef); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts(), newRef); } } @@ -222,7 +222,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupSDCFOR read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AOffset indexes = OffsetFactory.readIn(in); AMapToData data = MapToFactory.readIn(in); double[] reference = ColGroupIO.readDoubleArray(cols.size(), in); @@ -250,7 +250,7 @@ public long estimateInMemorySize() { @Override public AColGroup replace(double pattern, double replace) { - final ADictionary newDict = _dict.replaceWithReference(pattern, replace, _reference); + final IDictionary newDict = _dict.replaceWithReference(pattern, replace, _reference); boolean patternInReference = false; for(double d : _reference) if(pattern == d) { @@ -362,7 +362,7 @@ protected void computeColProduct(double[] c, int nRows) { @Override protected AColGroup sliceMultiColumns(int idStart, int idEnd, IColIndex outputCols) { - ADictionary retDict = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); + IDictionary retDict = _dict.sliceOutColumnRange(idStart, idEnd, _colIndexes.size()); final double[] newDef = new double[idEnd - idStart]; for(int i = idStart, j = 0; i < idEnd; i++, j++) newDef[j] = _reference[i]; @@ -375,7 +375,7 @@ protected AColGroup sliceSingleColumn(int idx) { if(_colIndexes.size() == 1) // early abort, only single column already. return create(retIndexes, _numRows, _dict, _indexes, _data, getCounts(), _reference); final double[] newDef = new double[] {_reference[idx]}; - final ADictionary retDict = _dict.sliceOutColumnRange(idx, idx + 1, _colIndexes.size()); + final IDictionary retDict = _dict.sliceOutColumnRange(idx, idx + 1, _colIndexes.size()); return create(retIndexes, _numRows, retDict, _indexes, _data, getCounts(), newDef); } @@ -412,7 +412,7 @@ public AColGroup extractCommon(double[] constV) { @Override public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) { - ADictionary d = _dict.rexpandColsWithReference(max, ignore, cast, (int) _reference[0]); + IDictionary d = _dict.rexpandColsWithReference(max, ignore, cast, (int) _reference[0]); return ColGroupSDC.rexpandCols(max, ignore, cast, nRows, d, _indexes, _data, getCachedCounts(), (int) _reference[0]); } @@ -437,7 +437,7 @@ public double[] getCommon() { } @Override - protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, IDictionary preAgg) { return create(colIndexes, _numRows, preAgg, _indexes, _data, getCachedCounts(), common); } @@ -451,7 +451,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, newDictionary, _indexes, _data, getCachedCounts(), _reference); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java index 1182d80ba57..fc8bf1aa0bc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java @@ -25,7 +25,7 @@ import java.util.Arrays; import org.apache.sysds.runtime.DMLRuntimeException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; @@ -60,14 +60,14 @@ public class ColGroupSDCSingle extends ASDC { /** The default value stored in this column group */ protected final double[] _defaultTuple; - private ColGroupSDCSingle(IColIndex colIndices, int numRows, ADictionary dict, double[] defaultTuple, + private ColGroupSDCSingle(IColIndex colIndices, int numRows, IDictionary dict, double[] defaultTuple, AOffset offsets, int[] cachedCounts) { super(colIndices, numRows, dict == null ? Dictionary.createNoCheck(new double[colIndices.size()]) : dict, offsets, cachedCounts); _defaultTuple = defaultTuple; } - public static AColGroup create(IColIndex colIndexes, int numRows, ADictionary dict, double[] defaultTuple, + public static AColGroup create(IColIndex colIndexes, int numRows, IDictionary dict, double[] defaultTuple, AOffset offsets, int[] cachedCounts) { final boolean allZero = ColGroupUtils.allZero(defaultTuple); if(dict == null && allZero) @@ -361,7 +361,7 @@ public AColGroup scalarOperation(ScalarOperator op) { final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.executeScalar(_defaultTuple[i]); - final ADictionary nDict = _dict.applyScalarOp(op); + final IDictionary nDict = _dict.applyScalarOp(op); return create(_colIndexes, _numRows, nDict, newDefaultTuple, _indexes, getCachedCounts()); } @@ -370,7 +370,7 @@ public AColGroup unaryOperation(UnaryOperator op) { final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.fn.execute(_defaultTuple[i]); - final ADictionary nDict = _dict.applyUnaryOp(op); + final IDictionary nDict = _dict.applyUnaryOp(op); return create(_colIndexes, _numRows, nDict, newDefaultTuple, _indexes, getCachedCounts()); } @@ -379,7 +379,7 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.fn.execute(v[_colIndexes.get(i)], _defaultTuple[i]); - final ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, newDefaultTuple, _indexes, getCachedCounts()); } @@ -388,7 +388,7 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa final double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = op.fn.execute(_defaultTuple[i], v[_colIndexes.get(i)]); - final ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + final IDictionary newDict = _dict.binOpRight(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, newDefaultTuple, _indexes, getCachedCounts()); } @@ -402,7 +402,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupSDCSingle read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AOffset indexes = OffsetFactory.readIn(in); double[] defaultTuple = ColGroupIO.readDoubleArray(cols.size(), in); return new ColGroupSDCSingle(cols, nRows, dict, defaultTuple, indexes, null); @@ -418,7 +418,7 @@ public long getExactSizeOnDisk() { @Override public AColGroup replace(double pattern, double replace) { - ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); + IDictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); double[] newDefaultTuple = new double[_defaultTuple.length]; for(int i = 0; i < _defaultTuple.length; i++) newDefaultTuple[i] = _defaultTuple[i] == pattern ? replace : _defaultTuple[i]; @@ -431,7 +431,7 @@ public AColGroup extractCommon(double[] constV) { for(int i = 0; i < _colIndexes.size(); i++) constV[_colIndexes.get(i)] += _defaultTuple[i]; - ADictionary subtractedDict = _dict.subtractTuple(_defaultTuple); + IDictionary subtractedDict = _dict.subtractTuple(_defaultTuple); return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, subtractedDict, _indexes, getCachedCounts()); } @@ -451,7 +451,7 @@ public CM_COV_Object centralMoment(CMOperator op, int nRows) { @Override public AColGroup rexpandCols(int max, boolean ignore, boolean cast, int nRows) { - ADictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size()); + IDictionary d = _dict.rexpandCols(max, ignore, cast, _colIndexes.size()); final int def = (int) _defaultTuple[0]; if(d == null) { if(def <= 0 || def > max) @@ -532,7 +532,7 @@ public double[] getCommon() { } @Override - protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplicationCommon(double[] common, IColIndex colIndexes, IDictionary preAgg) { return create(colIndexes, _numRows, preAgg, common, _indexes, getCachedCounts()); } @@ -545,7 +545,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, newDictionary, _defaultTuple, _indexes, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java index 69a93d8ef15..817e061a254 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; @@ -60,7 +60,7 @@ public class ColGroupSDCSingleZeros extends ASDCZero { private static final long serialVersionUID = 8033235615964315078L; - private ColGroupSDCSingleZeros(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, + private ColGroupSDCSingleZeros(IColIndex colIndices, int numRows, IDictionary dict, AOffset offsets, int[] cachedCounts) { super(colIndices, numRows, dict, offsets, cachedCounts); if(offsets.getSize() * 2 > numRows + 2) @@ -68,13 +68,13 @@ private ColGroupSDCSingleZeros(IColIndex colIndices, int numRows, ADictionary di + " vs " + _indexes + "\n" + this); } - public static AColGroup create(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, + public static AColGroup create(IColIndex colIndices, int numRows, IDictionary dict, AOffset offsets, int[] cachedCounts) { if(dict == null) return new ColGroupEmpty(colIndices); else if(offsets.getSize() * 2 > numRows + 2) { AOffset rev = AOffset.reverse(numRows, offsets); - ADictionary empty = MatrixBlockDictionary.create(new MatrixBlock(1, colIndices.size(), true)); + IDictionary empty = MatrixBlockDictionary.create(new MatrixBlock(1, colIndices.size(), true)); return ColGroupSDCSingle.create(colIndices, numRows, empty, dict.getValues(), rev, null); } else @@ -490,7 +490,7 @@ public long estimateInMemorySize() { public AColGroup scalarOperation(ScalarOperator op) { final double val0 = op.executeScalar(0); final boolean isSparseSafeOp = val0 == 0; - final ADictionary nDict = _dict.applyScalarOp(op); + final IDictionary nDict = _dict.applyScalarOp(op); if(isSparseSafeOp) return create(_colIndexes, _numRows, nDict, _indexes, getCachedCounts()); else { @@ -503,7 +503,7 @@ public AColGroup scalarOperation(ScalarOperator op) { @Override public AColGroup unaryOperation(UnaryOperator op) { final double val0 = op.fn.execute(0); - final ADictionary nDict = _dict.applyUnaryOp(op); + final IDictionary nDict = _dict.applyUnaryOp(op); if(val0 == 0) return create(_colIndexes, _numRows, nDict, _indexes, getCachedCounts()); else { @@ -535,11 +535,11 @@ public boolean containsValue(double pattern) { @Override public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { if(isRowSafe) { - ADictionary ret = _dict.binOpLeft(op, v, _colIndexes); + IDictionary ret = _dict.binOpLeft(op, v, _colIndexes); return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, ret, _indexes, getCachedCounts()); } else { - ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); double[] defaultTuple = new double[_colIndexes.size()]; for(int i = 0; i < _colIndexes.size(); i++) defaultTuple[i] = op.fn.execute(v[_colIndexes.get(i)], 0); @@ -550,11 +550,11 @@ public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSaf @Override public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { if(isRowSafe) { - ADictionary ret = _dict.binOpRight(op, v, _colIndexes); + IDictionary ret = _dict.binOpRight(op, v, _colIndexes); return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, ret, _indexes, getCachedCounts()); } else { - ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + IDictionary newDict = _dict.binOpRight(op, v, _colIndexes); double[] defaultTuple = new double[_colIndexes.size()]; for(int i = 0; i < _colIndexes.size(); i++) defaultTuple[i] = op.fn.execute(0, v[_colIndexes.get(i)]); @@ -570,7 +570,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupSDCSingleZeros read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AOffset indexes = OffsetFactory.readIn(in); return new ColGroupSDCSingleZeros(cols, nRows, dict, indexes, null); } @@ -776,7 +776,7 @@ public int getPreAggregateSize() { @Override public AColGroup replace(double pattern, double replace) { - ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); + IDictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); if(pattern == 0) { double[] defaultTuple = new double[_colIndexes.size()]; for(int i = 0; i < _colIndexes.size(); i++) @@ -811,7 +811,7 @@ protected int numRowsToMultiply() { } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { if(colIndexes != null && preAgg != null) return create(colIndexes, _numRows, preAgg, _indexes, getCachedCounts()); else @@ -827,7 +827,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, newDictionary, _indexes, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java index 4a6f6b50b83..c26c39c0b85 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java @@ -26,7 +26,7 @@ import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; @@ -67,7 +67,7 @@ public class ColGroupSDCZeros extends ASDCZero implements IMapToDataGroup { /** Pointers to row indexes in the dictionary. Note the dictionary has one extra entry. */ protected final AMapToData _data; - private ColGroupSDCZeros(IColIndex colIndices, int numRows, ADictionary dict, AOffset indexes, AMapToData data, + private ColGroupSDCZeros(IColIndex colIndices, int numRows, IDictionary dict, AOffset indexes, AMapToData data, int[] cachedCounts) { super(colIndices, numRows, dict, indexes, cachedCounts); if(data.getUnique() != dict.getNumberOfValues(colIndices.size())) @@ -76,7 +76,7 @@ private ColGroupSDCZeros(IColIndex colIndices, int numRows, ADictionary dict, AO _data = data; } - public static AColGroup create(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, AMapToData data, + public static AColGroup create(IColIndex colIndices, int numRows, IDictionary dict, AOffset offsets, AMapToData data, int[] cachedCounts) { if(dict == null) return new ColGroupEmpty(colIndices); @@ -512,7 +512,7 @@ else if(op.fn instanceof Plus || (op.fn instanceof Minus && op instanceof RightS return ColGroupSDCFOR.create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), reference); } else { - final ADictionary newDict = _dict.applyScalarOp(op); + final IDictionary newDict = _dict.applyScalarOp(op); final double[] defaultTuple = ColGroupUtils.createReference(_colIndexes.size(), val0); return ColGroupSDC.create(_colIndexes, _numRows, newDict, defaultTuple, _indexes, _data, getCachedCounts()); } @@ -521,7 +521,7 @@ else if(op.fn instanceof Plus || (op.fn instanceof Minus && op instanceof RightS @Override public AColGroup unaryOperation(UnaryOperator op) { final double val0 = op.fn.execute(0); - final ADictionary nDict = _dict.applyUnaryOp(op); + final IDictionary nDict = _dict.applyUnaryOp(op); if(val0 == 0) return create(_colIndexes, _numRows, nDict, _indexes, _data, getCachedCounts()); else { @@ -534,7 +534,7 @@ public AColGroup unaryOperation(UnaryOperator op) { @Override public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { if(isRowSafe) { - ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); return create(_colIndexes, _numRows, newDict, _indexes, _data, getCachedCounts()); } else if(op.fn instanceof Plus) { @@ -542,7 +542,7 @@ else if(op.fn instanceof Plus) { return ColGroupSDCFOR.create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), reference); } else { - ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); + IDictionary newDict = _dict.binOpLeft(op, v, _colIndexes); double[] defaultTuple = new double[_colIndexes.size()]; for(int i = 0; i < _colIndexes.size(); i++) defaultTuple[i] = op.fn.execute(v[_colIndexes.get(i)], 0); @@ -553,7 +553,7 @@ else if(op.fn instanceof Plus) { @Override public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { if(isRowSafe) { - ADictionary ret = _dict.binOpRight(op, v, _colIndexes); + IDictionary ret = _dict.binOpRight(op, v, _colIndexes); return create(_colIndexes, _numRows, ret, _indexes, _data, getCachedCounts()); } else if(op.fn instanceof Plus) { @@ -561,7 +561,7 @@ else if(op.fn instanceof Plus) { return ColGroupSDCFOR.create(_colIndexes, _numRows, _dict, _indexes, _data, getCachedCounts(), def); } else { - ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); + IDictionary newDict = _dict.binOpRight(op, v, _colIndexes); double[] defaultTuple = new double[_colIndexes.size()]; for(int i = 0; i < _colIndexes.size(); i++) defaultTuple[i] = op.fn.execute(0, v[_colIndexes.get(i)]); @@ -578,7 +578,7 @@ public void write(DataOutput out) throws IOException { public static ColGroupSDCZeros read(DataInput in, int nRows) throws IOException { IColIndex cols = ColIndexFactory.read(in); - ADictionary dict = DictionaryFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); AOffset indexes = OffsetFactory.readIn(in); AMapToData data = MapToFactory.readIn(in); return new ColGroupSDCZeros(cols, nRows, dict, indexes, data, null); @@ -682,7 +682,7 @@ protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) { @Override public AColGroup replace(double pattern, double replace) { - ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); + IDictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); if(pattern == 0) { double[] defaultTuple = new double[_colIndexes.size()]; for(int i = 0; i < _colIndexes.size(); i++) @@ -718,7 +718,7 @@ protected int numRowsToMultiply() { } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { if(colIndexes != null && preAgg != null) return create(colIndexes, _numRows, preAgg, _indexes, _data, getCachedCounts()); else @@ -756,7 +756,7 @@ public AColGroup sliceRows(int rl, int ru) { } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { return create(colIndexes, _numRows, newDictionary, _indexes, _data, getCachedCounts()); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/IContainADictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/IContainADictionary.java index 966233f6ad3..d7880ff95d3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/IContainADictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/IContainADictionary.java @@ -19,8 +19,8 @@ package org.apache.sysds.runtime.compress.colgroup; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; public interface IContainADictionary { - public ADictionary getDictionary(); + public IDictionary getDictionary(); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java index 973a737351e..9859455307b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java @@ -19,917 +19,64 @@ package org.apache.sysds.runtime.compress.colgroup.dictionary; -import java.io.DataOutput; -import java.io.IOException; import java.io.Serializable; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; -import org.apache.sysds.runtime.data.SparseBlock; -import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.ValueFunction; import org.apache.sysds.runtime.instructions.cp.CM_COV_Object; -import org.apache.sysds.runtime.matrix.data.MatrixBlock; -import org.apache.sysds.runtime.matrix.operators.BinaryOperator; -import org.apache.sysds.runtime.matrix.operators.ScalarOperator; -import org.apache.sysds.runtime.matrix.operators.UnaryOperator; /** * This dictionary class aims to encapsulate the storage and operations over unique tuple values of a column group. */ -public abstract class ADictionary implements Serializable { - +public abstract class ADictionary implements IDictionary, Serializable { private static final long serialVersionUID = 9118692576356558592L; - protected static final Log LOG = LogFactory.getLog(ADictionary.class.getName()); - - public static enum DictType { - Delta, Dict, MatrixBlock, UInt8, Identity, IdentitySlice; - } - - /** - * Get all the values contained in the dictionary as a linearized double array. - * - * @return linearized double array - */ - public abstract double[] getValues(); - - /** - * Get Specific value contained in the dictionary at index. - * - * @param i The index to extract the value from - * @return The value contained at the index - */ - public abstract double getValue(int i); - - /** - * Get Specific value contain in dictionary at index. - * - * @param r Row target - * @param col Col target - * @param nCol nCol in dictionary - * @return value - */ - public abstract double getValue(int r, int col, int nCol); - - /** - * Returns the memory usage of the dictionary. - * - * @return a long value in number of bytes for the dictionary. - */ - public abstract long getInMemorySize(); - - /** - * Aggregate all the contained values, useful in value only computations where the operation is iterating through all - * values contained in the dictionary. - * - * @param init The initial Value, in cases such as Max value, this could be -infinity - * @param fn The Function to apply to values - * @return The aggregated value as a double. - */ - public abstract double aggregate(double init, Builtin fn); - - /** - * Aggregate all the contained values, with a reference offset. - * - * @param init The initial value, in cases such as Max value this could be -infinity. - * @param fn The function to apply to the values - * @param reference The reference offset to each value in the dictionary - * @param def If the reference should be treated as an instance of only as reference - * @return The aggregated value as a double. - */ - public abstract double aggregateWithReference(double init, Builtin fn, double[] reference, boolean def); - - /** - * Aggregate all entries in the rows. - * - * @param fn The aggregate function - * @param nCol The number of columns contained in the dictionary. - * @return Aggregates for this dictionary tuples. - */ - public abstract double[] aggregateRows(Builtin fn, int nCol); - - /** - * Aggregate all entries in the rows of the dictionary with a extra cell in the end that contains the aggregate of - * the given defaultTuple. - * - * @param fn The aggregate function - * @param defaultTuple The default tuple to aggregate in last cell - * @return Aggregates for this dictionary tuples. - */ - public abstract double[] aggregateRowsWithDefault(Builtin fn, double[] defaultTuple); - - /** - * Aggregate all entries in the rows with an offset value reference added. - * - * @param fn The aggregate function - * @param reference The reference offset to each value in the dictionary - * @return Aggregates for this dictionary tuples. - */ - public abstract double[] aggregateRowsWithReference(Builtin fn, double[] reference); - - /** - * Aggregates the columns into the target double array provided. - * - * @param c The target double array, this contains the full number of columns, therefore the colIndexes for - * this specific dictionary is needed. - * @param fn The function to apply to individual columns - * @param colIndexes The mapping to the target columns from the individual columns - */ - public abstract void aggregateCols(double[] c, Builtin fn, IColIndex colIndexes); - - /** - * Aggregates the columns into the target double array provided. - * - * @param c The target double array, this contains the full number of columns, therefore the colIndexes for - * this specific dictionary is needed. - * @param fn The function to apply to individual columns - * @param colIndexes The mapping to the target columns from the individual columns - * @param reference The reference offset values to add to each cell. - * @param def If the reference should be treated as a tuple as well - */ - public abstract void aggregateColsWithReference(double[] c, Builtin fn, IColIndex colIndexes, double[] reference, - boolean def); - - /** - * Allocate a new dictionary and applies the scalar operation on each cell of to then return the new dictionary. - * - * @param op The operator. - * @return The new dictionary to return. - */ - public abstract ADictionary applyScalarOp(ScalarOperator op); - - /** - * Allocate a new dictionary with one extra row and applies the scalar operation on each cell of to then return the - * new dictionary. - * - * @param op The operator - * @param v0 The new value to put into each cell in the new row - * @param nCol The number of columns in the dictionary - * @return The new dictionary to return. - */ - public abstract ADictionary applyScalarOpAndAppend(ScalarOperator op, double v0, int nCol); - - /** - * Allocate a new dictionary and apply the unary operator on each cell. - * - * @param op The operator. - * @return The new dictionary to return. - */ - public abstract ADictionary applyUnaryOp(UnaryOperator op); - - /** - * Allocate a new dictionary with one extra row and apply the unary operator on each cell. - * - * @param op The operator. - * @param v0 The new value to put into each cell in the new row - * @param nCol The number of columns in the dictionary - * @return The new dictionary to return. - */ - public abstract ADictionary applyUnaryOpAndAppend(UnaryOperator op, double v0, int nCol); - - /** - * Allocate a new dictionary and apply the scalar operation on each cell to then return a new dictionary. - * - * outValues[j] = op(this.values[j] + reference[i]) - newReference[i] - * - * @param op The operator to apply to each cell. - * @param reference The reference value to add before the operator. - * @param newReference The reference value to subtract after the operator. - * @return A New Dictionary. - */ - public abstract ADictionary applyScalarOpWithReference(ScalarOperator op, double[] reference, double[] newReference); - - /** - * Allocate a new dictionary and apply the scalar operation on each cell to then return a new dictionary. - * - * outValues[j] = op(this.values[j] + reference[i]) - newReference[i] - * - * @param op The unary operator to apply to each cell. - * @param reference The reference value to add before the operator. - * @param newReference The reference value to subtract after the operator. - * @return A New Dictionary. - */ - public abstract ADictionary applyUnaryOpWithReference(UnaryOperator op, double[] reference, double[] newReference); - - /** - * Apply binary row operation on the left side - * - * @param op The operation to this dictionary - * @param v The values to use on the left hand side. - * @param colIndexes The column indexes to consider inside v. - * @return A new dictionary containing the updated values. - */ - public abstract ADictionary binOpLeft(BinaryOperator op, double[] v, IColIndex colIndexes); - - /** - * Apply binary row operation on the left side with one extra row evaluating with zeros. - * - * @param op The operation to this dictionary - * @param v The values to use on the left hand side. - * @param colIndexes The column indexes to consider inside v. - * @return A new dictionary containing the updated values. - */ - public abstract ADictionary binOpLeftAndAppend(BinaryOperator op, double[] v, IColIndex colIndexes); - - /** - * Apply the binary operator such that each value is offset by the reference before application. Then put the result - * into the new dictionary, but offset it by the new reference. - * - * outValues[j] = op(v[colIndexes[i]], this.values[j] + reference[i]) - newReference[i] - * - * - * @param op The operation to apply on the dictionary values. - * @param v The values to use on the left side of the operator. - * @param colIndexes The column indexes to use. - * @param reference The reference value to add before operator. - * @param newReference The reference value to subtract after operator. - * @return A new dictionary. - */ - public abstract ADictionary binOpLeftWithReference(BinaryOperator op, double[] v, IColIndex colIndexes, - double[] reference, double[] newReference); - - /** - * Apply binary row operation on the right side. - * - * @param op The operation to this dictionary - * @param v The values to use on the right hand side. - * @param colIndexes The column indexes to consider inside v. - * @return A new dictionary containing the updated values. - */ - public abstract ADictionary binOpRight(BinaryOperator op, double[] v, IColIndex colIndexes); + public abstract IDictionary clone(); - /** - * Apply binary row operation on the right side with one extra row evaluating with zeros. - * - * @param op The operation to this dictionary - * @param v The values to use on the right hand side. - * @param colIndexes The column indexes to consider inside v. - * @return A new dictionary containing the updated values. - */ - public abstract ADictionary binOpRightAndAppend(BinaryOperator op, double[] v, IColIndex colIndexes); - - /** - * Apply binary row operation on the right side as with no columns to extract from v. - * - * @param op The operation to this dictionary - * @param v The values to apply on the dictionary (same number of cols as the dictionary) - * @return A new dictionary containing the updated values. - */ - public abstract ADictionary binOpRight(BinaryOperator op, double[] v); - - /** - * Apply the binary operator such that each value is offset by the reference before application. Then put the result - * into the new dictionary, but offset it by the new reference. - * - * outValues[j] = op(this.values[j] + reference[i], v[colIndexes[i]]) - newReference[i] - * - * @param op The operation to apply on the dictionary values. - * @param v The values to use on the right side of the operator. - * @param colIndexes The column indexes to use. - * @param reference The reference value to add before operator. - * @param newReference The reference value to subtract after operator. - * @return A new dictionary. - */ - public abstract ADictionary binOpRightWithReference(BinaryOperator op, double[] v, IColIndex colIndexes, - double[] reference, double[] newReference); - - /** - * Returns a deep clone of the dictionary. - */ - public abstract ADictionary clone(); - - /** - * Write the dictionary to a DataOutput. - * - * @param out the output sink to write the dictionary to. - * @throws IOException if the sink fails. - */ - public abstract void write(DataOutput out) throws IOException; - - /** - * Calculate the space consumption if the dictionary is stored on disk. - * - * @return the long count of bytes to store the dictionary. - */ - public abstract long getExactSizeOnDisk(); - - /** - * Get the dictionary type this dictionary is. - * - * @return The Dictionary type this is. - */ - public abstract DictType getDictType(); - - /** - * Get the number of distinct tuples given that the column group has n columns - * - * @param ncol The number of Columns in the ColumnGroup. - * @return the number of value tuples contained in the dictionary. - */ - public abstract int getNumberOfValues(int ncol); - - /** - * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. - * - * Note if the number of columns is one the actual dictionaries values are simply returned. - * - * - * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. - * @return a double array containing the row sums from this dictionary. - */ - public abstract double[] sumAllRowsToDouble(int nrColumns); - - /** - * Do exactly the same as the sumAllRowsToDouble but also sum the array given to a extra index in the end of the - * array. - * - * @param defaultTuple The default row to sum in the end index returned. - * @return a double array containing the row sums from this dictionary. - */ - public abstract double[] sumAllRowsToDoubleWithDefault(double[] defaultTuple); - - /** - * Method used as a pre-aggregate of each tuple in the dictionary, to single double values with a reference. - * - * @param reference The reference values to add to each cell. - * @return a double array containing the row sums from this dictionary. - */ - public abstract double[] sumAllRowsToDoubleWithReference(double[] reference); - - /** - * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. - * - * Note if the number of columns is one the actual dictionaries values are simply returned. - * - * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. - * @return a double array containing the row sums from this dictionary. - */ - public abstract double[] sumAllRowsToDoubleSq(int nrColumns); - - /** - * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. But adds another cell to - * the return with an extra value that is the sum of the given defaultTuple. - * - * @param defaultTuple The default row to sum in the end index returned. - * @return a double array containing the row sums from this dictionary. - */ - public abstract double[] sumAllRowsToDoubleSqWithDefault(double[] defaultTuple); - - /** - * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. - * - * @param reference The reference values to add to each cell. - * @return a double array containing the row sums from this dictionary. - */ - public abstract double[] sumAllRowsToDoubleSqWithReference(double[] reference); - - /** - * Method to product all rows to a column vector. - * - * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. - * @return A row product - */ - public abstract double[] productAllRowsToDouble(int nrColumns); - - /** - * Method to product all rows to a column vector with a default value added in the end. - * - * @param defaultTuple The default row that aggregate to last cell - * @return A row product - */ - public abstract double[] productAllRowsToDoubleWithDefault(double[] defaultTuple); - - /** - * Method to product all rows to a column vector with a reference values added to all cells, and a reference product - * in the end - * - * @param reference The reference row - * @return A row product - */ - public abstract double[] productAllRowsToDoubleWithReference(double[] reference); - - /** - * Get the column sum of the values contained in the dictionary - * - * @param c The output array allocated to contain all column groups output. - * @param counts The counts of the individual tuples. - * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into - * the c output. - */ - public abstract void colSum(double[] c, int[] counts, IColIndex colIndexes); - - /** - * Get the column sum of the values contained in the dictionary - * - * @param c The output array allocated to contain all column groups output. - * @param counts The counts of the individual tuples. - * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into - * the c output. - */ - public abstract void colSumSq(double[] c, int[] counts, IColIndex colIndexes); - - /** - * Get the column sum of the values contained in the dictionary with an offset reference value added to each cell. - * - * @param c The output array allocated to contain all column groups output. - * @param counts The counts of the individual tuples. - * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into - * the c output. - * @param reference The reference values to add to each cell. - */ - public abstract void colSumSqWithReference(double[] c, int[] counts, IColIndex colIndexes, double[] reference); - - /** - * Get the sum of the values contained in the dictionary - * - * @param counts The counts of the individual tuples - * @param nCol The number of columns contained - * @return The sum scaled by the counts provided. - */ - public abstract double sum(int[] counts, int nCol); - - /** - * Get the square sum of the values contained in the dictionary - * - * @param counts The counts of the individual tuples - * @param nCol The number of columns contained - * @return The square sum scaled by the counts provided. - */ - public abstract double sumSq(int[] counts, int nCol); - - /** - * Get the square sum of the values contained in the dictionary with a reference offset on each value. - * - * @param counts The counts of the individual tuples - * @param reference The reference value - * @return The square sum scaled by the counts and reference. - */ - public abstract double sumSqWithReference(int[] counts, double[] reference); - - /** - * Get a string representation of the dictionary, that considers the layout of the data. - * - * @param colIndexes The number of columns in the dictionary. - * @return A string that is nicer to print. - */ - public abstract String getString(int colIndexes); - - /** - * Modify the dictionary by removing columns not within the index range. - * - * @param idxStart The column index to start at. - * @param idxEnd The column index to end at (not inclusive) - * @param previousNumberOfColumns The number of columns contained in the dictionary. - * @return A dictionary containing the sliced out columns values only. - */ - public abstract ADictionary sliceOutColumnRange(int idxStart, int idxEnd, int previousNumberOfColumns); - - /** - * Detect if the dictionary contains a specific value. - * - * @param pattern The value to search for - * @return true if the value is contained else false. - */ - public abstract boolean containsValue(double pattern); - - /** - * Detect if the dictionary contains a specific value with reference offset. - * - * @param pattern The pattern/ value to search for - * @param reference The reference double array. - * @return true if the value is contained else false. - */ - public abstract boolean containsValueWithReference(double pattern, double[] reference); - - /** - * Calculate the number of non zeros in the dictionary. The number of non zeros should be scaled with the counts - * given. This gives the exact number of non zero values in the parent column group. - * - * @param counts The counts of each dictionary entry - * @param nCol The number of columns in this dictionary - * @return The nonZero count - */ - public abstract long getNumberNonZeros(int[] counts, int nCol); - - /** - * Calculate the number of non zeros in the dictionary. - * - * Each value in the dictionary should be added to the reference value. - * - * The number of non zeros should be scaled with the given counts. - * - * @param counts The Counts of each dict entry. - * @param reference The reference vector. - * @param nRows The number of rows in the input. - * @return The NonZero Count. - */ - public abstract long getNumberNonZerosWithReference(int[] counts, double[] reference, int nRows); - - /** - * Copies and adds the dictionary entry from this dictionary to the d dictionary - * - * @param v the target dictionary (dense double array) - * @param fr the from index - * @param to the to index - * @param nCol the number of columns - */ - public abstract void addToEntry(double[] v, int fr, int to, int nCol); - - /** - * copies and adds the dictonary entry from this dictionary yo the d dictionary rep times. - * - * @param v the target dictionary (dense double array) - * @param fr the from index - * @param to the to index - * @param nCol the number of columns - * @param rep the number of repetitions to apply (simply multiply do not loop) - */ - public abstract void addToEntry(double[] v, int fr, int to, int nCol, int rep); - - public abstract void addToEntryVectorized(double[] v, int f1, int f2, int f3, int f4, int f5, int f6, int f7, int f8, - int t1, int t2, int t3, int t4, int t5, int t6, int t7, int t8, int nCol); - - /** - * Allocate a new dictionary where the tuple given is subtracted from all tuples in the previous dictionary. - * - * @param tuple a double list representing a tuple, it is given that the tuple with is the same as this dictionaries. - * @return a new instance of dictionary with the tuple subtracted. - */ - public abstract ADictionary subtractTuple(double[] tuple); - - /** - * Get this dictionary as a MatrixBlock dictionary. This allows us to use optimized kernels coded elsewhere in the - * system, such as matrix multiplication. - * - * Return null if the matrix is empty. - * - * @param nCol The number of columns contained in this column group. - * @return A Dictionary containing a MatrixBlock. - */ - public abstract MatrixBlockDictionary getMBDict(int nCol); - - /** - * Scale all tuples contained in the dictionary by the scaling factor given in the int list. - * - * @param scaling The amount to multiply the given tuples with - * @param nCol The number of columns contained in this column group. - * @return A New dictionary (since we don't want to modify the underlying dictionary) - */ - public abstract ADictionary scaleTuples(int[] scaling, int nCol); - - /** - * Pre Aggregate values for Right Matrix Multiplication. - * - * @param numVals The number of values contained in this dictionary - * @param colIndexes The column indexes that is associated with the parent column group - * @param aggregateColumns The column to aggregate, this is preprocessed, to find remove consideration for empty - * columns - * @param b The values in the right hand side matrix - * @param cut The number of columns in b. - * @return A new dictionary with the pre aggregated values. - */ - public abstract ADictionary preaggValuesFromDense(final int numVals, final IColIndex colIndexes, - final IColIndex aggregateColumns, final double[] b, final int cut); - - /** - * Make a copy of the values, and replace all values that match pattern with replacement value. If needed add a new - * column index. - * - * @param pattern The value to look for - * @param replace The value to replace the other value with - * @param nCol The number of columns contained in the dictionary. - * @return A new Column Group, reusing the index structure but with new values. - */ - public abstract ADictionary replace(double pattern, double replace, int nCol); - - /** - * Make a copy of the values, and replace all values that match pattern with replacement value. If needed add a new - * column index. With reference such that each value in the dict is considered offset by the values contained in the - * reference. - * - * @param pattern The value to look for - * @param replace The value to replace the other value with - * @param reference The reference tuple to add to all entries when replacing - * @return A new Column Group, reusing the index structure but with new values. - */ - public abstract ADictionary replaceWithReference(double pattern, double replace, double[] reference); - - /** - * Calculate the product of the dictionary weighted by counts. - * - * @param ret The result dense double array (containing one value) - * @param counts The count of individual tuples - * @param nCol Number of columns in the dictionary. - */ - public abstract void product(double[] ret, int[] counts, int nCol); - - /** - * Calculate the product of the dictionary weighted by counts with a default value added . - * - * @param ret The result dense double array (containing one value) - * @param counts The count of individual tuples - * @param def The default tuple - * @param defCount The count of the default tuple - */ - public abstract void productWithDefault(double[] ret, int[] counts, double[] def, int defCount); - - /** - * Calculate the product of the dictionary weighted by counts and offset by reference - * - * @param ret The result dense double array (containing one value) - * @param counts The counts of each entry in the dictionary - * @param reference The reference value. - * @param refCount The number of occurrences of the ref value. - */ - public abstract void productWithReference(double[] ret, int[] counts, double[] reference, int refCount); - - /** - * Calculate the column product of the dictionary weighted by counts. - * - * @param res The result vector to put the result into - * @param counts The weighted count of individual tuples - * @param colIndexes The column indexes. - */ - public abstract void colProduct(double[] res, int[] counts, IColIndex colIndexes); - - /** - * Calculate the column product of the dictionary weighted by counts. - * - * @param res The result vector to put the result into - * @param counts The weighted count of individual tuples - * @param colIndexes The column indexes. - * @param reference The reference value. - */ - public abstract void colProductWithReference(double[] res, int[] counts, IColIndex colIndexes, double[] reference); - - /** - * Central moment function to calculate the central moment of this column group. MUST be on a single column - * dictionary. - * - * @param fn The value function to apply - * @param counts The weight of individual tuples - * @param nRows The number of rows in total of the column group - * @return The central moment Object - */ public final CM_COV_Object centralMoment(ValueFunction fn, int[] counts, int nRows) { return centralMoment(new CM_COV_Object(), fn, counts, nRows); } - /** - * Central moment function to calculate the central moment of this column group. MUST be on a single column - * dictionary. - * - * @param ret The Central Moment object to be modified and returned - * @param fn The value function to apply - * @param counts The weight of individual tuples - * @param nRows The number of rows in total of the column group - * @return The central moment Object - */ - public abstract CM_COV_Object centralMoment(CM_COV_Object ret, ValueFunction fn, int[] counts, int nRows); - - /** - * Central moment function to calculate the central moment of this column group with a default offset on all missing - * tuples. MUST be on a single column dictionary. - * - * @param fn The value function to apply - * @param counts The weight of individual tuples - * @param def The default values to offset the tuples with - * @param nRows The number of rows in total of the column group - * @return The central moment Object - */ public final CM_COV_Object centralMomentWithDefault(ValueFunction fn, int[] counts, double def, int nRows) { return centralMomentWithDefault(new CM_COV_Object(), fn, counts, def, nRows); } - /** - * Central moment function to calculate the central moment of this column group with a default offset on all missing - * tuples. MUST be on a single column dictionary. - * - * @param ret The Central Moment object to be modified and returned - * @param fn The value function to apply - * @param counts The weight of individual tuples - * @param def The default values to offset the tuples with - * @param nRows The number of rows in total of the column group - * @return The central moment Object - */ - public abstract CM_COV_Object centralMomentWithDefault(CM_COV_Object ret, ValueFunction fn, int[] counts, double def, - int nRows); - - /** - * Central moment function to calculate the central moment of this column group with a reference offset on each - * tuple. MUST be on a single column dictionary. - * - * @param fn The value function to apply - * @param counts The weight of individual tuples - * @param reference The reference values to offset the tuples with - * @param nRows The number of rows in total of the column group - * @return The central moment Object - */ public final CM_COV_Object centralMomentWithReference(ValueFunction fn, int[] counts, double reference, int nRows) { return centralMomentWithReference(new CM_COV_Object(), fn, counts, reference, nRows); } - /** - * Central moment function to calculate the central moment of this column group with a reference offset on each - * tuple. MUST be on a single column dictionary. - * - * @param ret The Central Moment object to be modified and returned - * @param fn The value function to apply - * @param counts The weight of individual tuples - * @param reference The reference values to offset the tuples with - * @param nRows The number of rows in total of the column group - * @return The central moment Object - */ - public abstract CM_COV_Object centralMomentWithReference(CM_COV_Object ret, ValueFunction fn, int[] counts, - double reference, int nRows); - - /** - * Rexpand the dictionary (one hot encode) - * - * @param max the tuple width of the output - * @param ignore If we should ignore zero and negative values - * @param cast If we should cast all double values to whole integer values - * @param nCol The number of columns in the dictionary already (should be 1) - * @return A new dictionary - */ - public abstract ADictionary rexpandCols(int max, boolean ignore, boolean cast, int nCol); - - /** - * Rexpand the dictionary (one hot encode) - * - * @param max the tuple width of the output - * @param ignore If we should ignore zero and negative values - * @param cast If we should cast all double values to whole integer values - * @param reference A reference value to add to all tuples before expanding - * @return A new dictionary - */ - public abstract ADictionary rexpandColsWithReference(int max, boolean ignore, boolean cast, int reference); - - /** - * Get the sparsity of the dictionary. - * - * @return a sparsity between 0 and 1 - */ - public abstract double getSparsity(); - - /** - * Multiply the v value with the dictionary entry at dictIdx and add it to the ret matrix at the columns specified in - * the int array. - * - * @param v Value to multiply - * @param ret Output dense double array location - * @param off Offset into the ret array that the "row" output starts at - * @param dictIdx The dictionary entry to multiply. - * @param cols The columns to multiply into of the output. - */ - public abstract void multiplyScalar(double v, double[] ret, int off, int dictIdx, IColIndex cols); - - /** - * Transpose self matrix multiplication with a scaling factor on each pair of values. - * - * @param counts The scaling factor - * @param rows The row indexes - * @param cols The col indexes - * @param ret The output matrix block - */ - protected abstract void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret); - - /** - * Matrix multiplication of dictionaries - * - * Note the left is this, and it is transposed - * - * @param right Right hand side of multiplication - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param result The output matrix block - */ - protected abstract void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); - - /** - * Matrix multiplication of dictionaries left side dense and transposed right side is this. - * - * @param left Dense left side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param result The output matrix block - */ - protected abstract void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); - - /** - * Matrix multiplication of dictionaries left side sparse and transposed right side is this. - * - * @param left Sparse left side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param result The output matrix block - */ - protected abstract void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); - - /** - * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is left - * - * @param right Right side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param result The output matrix block - */ - protected abstract void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, - MatrixBlock result); - - /** - * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right - * - * @param left Dense left side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param result The output matrix block - */ - protected abstract void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, - MatrixBlock result); - - /** - * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right - * - * @param left Sparse left side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param result The output matrix block - */ - protected abstract void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, - MatrixBlock result); - - /** - * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is left - * - * @param right Right side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param scale Scale factor - * @param result The output matrix block - */ - protected abstract void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, - int[] scale, MatrixBlock result); - - /** - * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right - * - * @param left Dense left side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param scale Scale factor - * @param result The output matrix block - */ - protected abstract void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, - int[] scale, MatrixBlock result); - - /** - * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right - * - * @param left Sparse left side - * @param rowsLeft Offset rows on the left - * @param colsRight Offset cols on the right - * @param scale Scale factor - * @param result The output matrix block - */ - protected abstract void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, - int[] scale, MatrixBlock result); + @Override + public final boolean equals(Object o) { + if(o instanceof IDictionary) + return equals((IDictionary) o); + return false; + } /** - * Cbind this dictionary with that dictionary + * Make a double into a string, if the double is a whole number then return it without decimal points * - * @param that the right hand side dictionary to cbind - * @param nCol the right hand side number of columns - * @return The combined dictionary + * @param v The value + * @return The string */ - public abstract ADictionary cbind(ADictionary that, int nCol); - - protected static String doubleToString(double v) { + public static String doubleToString(double v) { if(v == (long) v) return Long.toString(((long) v)); else return Double.toString(v); } - protected static void correctNan(double[] res, IColIndex colIndexes) { - // since there is no nan values every in a dictionary, we exploit that - // nan oly occur if we multiplied infinity with 0. + /** + * Correct Nan Values in an result. If there are any NaN values in the given Res then they are replaced with 0. + * + * @param res The array to correct + * @param colIndexes The column indexes. + */ + public static void correctNan(double[] res, IColIndex colIndexes) { + // since there is no nan values in most dictionaries, we exploit that + // nan only occur if we multiplied infinity with 0. for(int j = 0; j < colIndexes.size(); j++) { final int cix = colIndexes.get(j); res[cix] = Double.isNaN(res[cix]) ? 0 : res[cix]; } } - - @Override - public final boolean equals(Object o) { - if(o instanceof ADictionary) - return equals((ADictionary) o); - return false; - } - - public abstract ADictionary reorder(int[] reorder); - - public abstract boolean equals(ADictionary o); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictLibMatrixMult.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictLibMatrixMult.java index 21b4cfd24b0..4c4f07d98b5 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictLibMatrixMult.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictLibMatrixMult.java @@ -63,12 +63,12 @@ else if(row > col) // swap because in lower triangle * @param result The result matrix * @param counts The scaling factors */ - public static void MMDictsWithScaling(ADictionary left, ADictionary right, IColIndex leftRows, + public static void MMDictsWithScaling(IDictionary left, IDictionary right, IColIndex leftRows, IColIndex rightColumns, MatrixBlock result, int[] counts) { LOG.warn("Inefficient double allocation of dictionary"); final boolean modifyRight = right.getInMemorySize() > left.getInMemorySize(); - final ADictionary rightM = modifyRight ? right.scaleTuples(counts, rightColumns.size()) : right; - final ADictionary leftM = modifyRight ? left : left.scaleTuples(counts, leftRows.size()); + final IDictionary rightM = modifyRight ? right.scaleTuples(counts, rightColumns.size()) : right; + final IDictionary leftM = modifyRight ? left : left.scaleTuples(counts, leftRows.size()); MMDicts(leftM, rightM, leftRows, rightColumns, result); } @@ -81,7 +81,7 @@ public static void MMDictsWithScaling(ADictionary left, ADictionary right, IColI * @param cols The cols of the dictionary * @param ret The output to add the results to */ - public static void TSMMDictionaryWithScaling(ADictionary dict, int[] counts, IColIndex rows, IColIndex cols, + public static void TSMMDictionaryWithScaling(IDictionary dict, int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { dict.TSMMWithScaling(counts, rows, cols, ret); } @@ -96,7 +96,7 @@ public static void TSMMDictionaryWithScaling(ADictionary dict, int[] counts, ICo * @param colsRight The column indexes on the right hand side * @param result The result matrix to put the results into. */ - public static void MMDicts(ADictionary left, ADictionary right, IColIndex rowsLeft, IColIndex colsRight, + public static void MMDicts(IDictionary left, IDictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { left.MMDict(right, rowsLeft, colsRight, result); } @@ -121,7 +121,7 @@ public static void MMDicts(ADictionary left, ADictionary right, IColIndex rowsLe * @param colsRight cols for the right dictionary * @param result the result */ - public static void TSMMToUpperTriangle(ADictionary left, ADictionary right, IColIndex rowsLeft, IColIndex colsRight, + public static void TSMMToUpperTriangle(IDictionary left, IDictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { left.TSMMToUpperTriangle(right, rowsLeft, colsRight, result); } @@ -147,7 +147,7 @@ public static void TSMMToUpperTriangle(ADictionary left, ADictionary right, ICol * @param scale A multiplier to each dictionary entry * @param result The result */ - public static void TSMMToUpperTriangleScaling(ADictionary left, ADictionary right, IColIndex rowsLeft, + public static void TSMMToUpperTriangleScaling(IDictionary left, IDictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { left.TSMMToUpperTriangleScaling(left, rowsLeft, colsRight, scale, result); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java index 632db578e6e..343cda04e15 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java @@ -574,7 +574,6 @@ public void colProductWithReference(double[] res, int[] counts, IColIndex colInd for(int j = 0; j < nCol; j++) res[colIndexes.get(j)] *= Math.pow(_values[off + j] + reference[j], cntk); } - correctNan(res, colIndexes); } @@ -1032,61 +1031,61 @@ public void multiplyScalar(double v, double[] ret, int off, int dictIdx, IColInd } @Override - protected void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { + public void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { DictLibMatrixMult.TSMMDictsDenseWithScaling(_values, rows, cols, counts, ret); } @Override - protected void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { right.MMDictDense(_values, rowsLeft, colsRight, result); } @Override - protected void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { DictLibMatrixMult.MMDictsDenseDense(left, _values, rowsLeft, colsRight, result); } @Override - protected void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { DictLibMatrixMult.MMDictsSparseDense(left, _values, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { right.TSMMToUpperTriangleDense(_values, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { DictLibMatrixMult.MMToUpperTriangleDenseDense(left, _values, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, + public void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { DictLibMatrixMult.MMToUpperTriangleSparseDense(left, _values, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { right.TSMMToUpperTriangleDenseScaling(_values, rowsLeft, colsRight, scale, result); } @Override - protected void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { DictLibMatrixMult.TSMMToUpperTriangleDenseDenseScaling(left, _values, rowsLeft, colsRight, scale, result); } @Override - protected void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, + public void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { DictLibMatrixMult.TSMMToUpperTriangleSparseDenseScaling(left, _values, rowsLeft, colsRight, scale, result); } @Override - public boolean equals(ADictionary o) { + public boolean equals(IDictionary o) { if(o instanceof Dictionary) return Arrays.equals(_values, ((Dictionary) o)._values); else if(o instanceof MatrixBlockDictionary) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IDictionary.java new file mode 100644 index 00000000000..097b2fb9cd2 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IDictionary.java @@ -0,0 +1,940 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.colgroup.dictionary; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.ValueFunction; +import org.apache.sysds.runtime.instructions.cp.CM_COV_Object; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.BinaryOperator; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; + +public interface IDictionary { + + public static final Log LOG = LogFactory.getLog(IDictionary.class.getName()); + + public static enum DictType { + Delta, Dict, MatrixBlock, UInt8, Identity, IdentitySlice; + } + + /** + * Get all the values contained in the dictionary as a linearized double array. + * + * @return linearized double array + */ + public double[] getValues(); + + /** + * Get Specific value contained in the dictionary at index. + * + * @param i The index to extract the value from + * @return The value contained at the index + */ + public double getValue(int i); + + /** + * Get Specific value contain in dictionary at index. + * + * @param r Row target + * @param col Col target + * @param nCol nCol in dictionary + * @return value + */ + public double getValue(int r, int col, int nCol); + + /** + * Returns the memory usage of the dictionary. + * + * @return a long value in number of bytes for the dictionary. + */ + public long getInMemorySize(); + + /** + * Aggregate all the contained values, useful in value only computations where the operation is iterating through all + * values contained in the dictionary. + * + * @param init The initial Value, in cases such as Max value, this could be -infinity + * @param fn The Function to apply to values + * @return The aggregated value as a double. + */ + public double aggregate(double init, Builtin fn); + + /** + * Aggregate all the contained values, with a reference offset. + * + * @param init The initial value, in cases such as Max value this could be -infinity. + * @param fn The function to apply to the values + * @param reference The reference offset to each value in the dictionary + * @param def If the reference should be treated as an instance of only as reference + * @return The aggregated value as a double. + */ + public double aggregateWithReference(double init, Builtin fn, double[] reference, boolean def); + + /** + * Aggregate all entries in the rows. + * + * @param fn The aggregate function + * @param nCol The number of columns contained in the dictionary. + * @return Aggregates for this dictionary tuples. + */ + public double[] aggregateRows(Builtin fn, int nCol); + + /** + * Aggregate all entries in the rows of the dictionary with a extra cell in the end that contains the aggregate of + * the given defaultTuple. + * + * @param fn The aggregate function + * @param defaultTuple The default tuple to aggregate in last cell + * @return Aggregates for this dictionary tuples. + */ + public double[] aggregateRowsWithDefault(Builtin fn, double[] defaultTuple); + + /** + * Aggregate all entries in the rows with an offset value reference added. + * + * @param fn The aggregate function + * @param reference The reference offset to each value in the dictionary + * @return Aggregates for this dictionary tuples. + */ + public double[] aggregateRowsWithReference(Builtin fn, double[] reference); + + /** + * Aggregates the columns into the target double array provided. + * + * @param c The target double array, this contains the full number of columns, therefore the colIndexes for + * this specific dictionary is needed. + * @param fn The function to apply to individual columns + * @param colIndexes The mapping to the target columns from the individual columns + */ + public void aggregateCols(double[] c, Builtin fn, IColIndex colIndexes); + + /** + * Aggregates the columns into the target double array provided. + * + * @param c The target double array, this contains the full number of columns, therefore the colIndexes for + * this specific dictionary is needed. + * @param fn The function to apply to individual columns + * @param colIndexes The mapping to the target columns from the individual columns + * @param reference The reference offset values to add to each cell. + * @param def If the reference should be treated as a tuple as well + */ + public void aggregateColsWithReference(double[] c, Builtin fn, IColIndex colIndexes, double[] reference, + boolean def); + + /** + * Allocate a new dictionary and applies the scalar operation on each cell of to then return the new dictionary. + * + * @param op The operator. + * @return The new dictionary to return. + */ + public IDictionary applyScalarOp(ScalarOperator op); + + /** + * Allocate a new dictionary with one extra row and applies the scalar operation on each cell of to then return the + * new dictionary. + * + * @param op The operator + * @param v0 The new value to put into each cell in the new row + * @param nCol The number of columns in the dictionary + * @return The new dictionary to return. + */ + public IDictionary applyScalarOpAndAppend(ScalarOperator op, double v0, int nCol); + + /** + * Allocate a new dictionary and apply the unary operator on each cell. + * + * @param op The operator. + * @return The new dictionary to return. + */ + public IDictionary applyUnaryOp(UnaryOperator op); + + /** + * Allocate a new dictionary with one extra row and apply the unary operator on each cell. + * + * @param op The operator. + * @param v0 The new value to put into each cell in the new row + * @param nCol The number of columns in the dictionary + * @return The new dictionary to return. + */ + public IDictionary applyUnaryOpAndAppend(UnaryOperator op, double v0, int nCol); + + /** + * Allocate a new dictionary and apply the scalar operation on each cell to then return a new dictionary. + * + * outValues[j] = op(this.values[j] + reference[i]) - newReference[i] + * + * @param op The operator to apply to each cell. + * @param reference The reference value to add before the operator. + * @param newReference The reference value to subtract after the operator. + * @return A New Dictionary. + */ + public IDictionary applyScalarOpWithReference(ScalarOperator op, double[] reference, double[] newReference); + + /** + * Allocate a new dictionary and apply the scalar operation on each cell to then return a new dictionary. + * + * outValues[j] = op(this.values[j] + reference[i]) - newReference[i] + * + * @param op The unary operator to apply to each cell. + * @param reference The reference value to add before the operator. + * @param newReference The reference value to subtract after the operator. + * @return A New Dictionary. + */ + public IDictionary applyUnaryOpWithReference(UnaryOperator op, double[] reference, double[] newReference); + + /** + * Apply binary row operation on the left side + * + * @param op The operation to this dictionary + * @param v The values to use on the left hand side. + * @param colIndexes The column indexes to consider inside v. + * @return A new dictionary containing the updated values. + */ + public IDictionary binOpLeft(BinaryOperator op, double[] v, IColIndex colIndexes); + + /** + * Apply binary row operation on the left side with one extra row evaluating with zeros. + * + * @param op The operation to this dictionary + * @param v The values to use on the left hand side. + * @param colIndexes The column indexes to consider inside v. + * @return A new dictionary containing the updated values. + */ + public IDictionary binOpLeftAndAppend(BinaryOperator op, double[] v, IColIndex colIndexes); + + /** + * Apply the binary operator such that each value is offset by the reference before application. Then put the result + * into the new dictionary, but offset it by the new reference. + * + * outValues[j] = op(v[colIndexes[i]], this.values[j] + reference[i]) - newReference[i] + * + * + * @param op The operation to apply on the dictionary values. + * @param v The values to use on the left side of the operator. + * @param colIndexes The column indexes to use. + * @param reference The reference value to add before operator. + * @param newReference The reference value to subtract after operator. + * @return A new dictionary. + */ + public IDictionary binOpLeftWithReference(BinaryOperator op, double[] v, IColIndex colIndexes, double[] reference, + double[] newReference); + + /** + * Apply binary row operation on the right side. + * + * @param op The operation to this dictionary + * @param v The values to use on the right hand side. + * @param colIndexes The column indexes to consider inside v. + * @return A new dictionary containing the updated values. + */ + public IDictionary binOpRight(BinaryOperator op, double[] v, IColIndex colIndexes); + + /** + * Apply binary row operation on the right side with one extra row evaluating with zeros. + * + * @param op The operation to this dictionary + * @param v The values to use on the right hand side. + * @param colIndexes The column indexes to consider inside v. + * @return A new dictionary containing the updated values. + */ + public IDictionary binOpRightAndAppend(BinaryOperator op, double[] v, IColIndex colIndexes); + + /** + * Apply binary row operation on the right side as with no columns to extract from v. + * + * @param op The operation to this dictionary + * @param v The values to apply on the dictionary (same number of cols as the dictionary) + * @return A new dictionary containing the updated values. + */ + public IDictionary binOpRight(BinaryOperator op, double[] v); + + /** + * Apply the binary operator such that each value is offset by the reference before application. Then put the result + * into the new dictionary, but offset it by the new reference. + * + * outValues[j] = op(this.values[j] + reference[i], v[colIndexes[i]]) - newReference[i] + * + * @param op The operation to apply on the dictionary values. + * @param v The values to use on the right side of the operator. + * @param colIndexes The column indexes to use. + * @param reference The reference value to add before operator. + * @param newReference The reference value to subtract after operator. + * @return A new dictionary. + */ + public IDictionary binOpRightWithReference(BinaryOperator op, double[] v, IColIndex colIndexes, double[] reference, + double[] newReference); + + /** + * Returns a deep clone of the dictionary. + */ + public IDictionary clone(); + + /** + * Write the dictionary to a DataOutput. + * + * @param out the output sink to write the dictionary to. + * @throws IOException if the sink fails. + */ + public void write(DataOutput out) throws IOException; + + /** + * Calculate the space consumption if the dictionary is stored on disk. + * + * @return the long count of bytes to store the dictionary. + */ + public long getExactSizeOnDisk(); + + /** + * Get the dictionary type this dictionary is. + * + * @return The Dictionary type this is. + */ + public DictType getDictType(); + + /** + * Get the number of distinct tuples given that the column group has n columns + * + * @param ncol The number of Columns in the ColumnGroup. + * @return the number of value tuples contained in the dictionary. + */ + public int getNumberOfValues(int ncol); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. + * + * Note if the number of columns is one the actual dictionaries values are simply returned. + * + * + * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. + * @return a double array containing the row sums from this dictionary. + */ + public double[] sumAllRowsToDouble(int nrColumns); + + /** + * Do exactly the same as the sumAllRowsToDouble but also sum the array given to a extra index in the end of the + * array. + * + * @param defaultTuple The default row to sum in the end index returned. + * @return a double array containing the row sums from this dictionary. + */ + public double[] sumAllRowsToDoubleWithDefault(double[] defaultTuple); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values with a reference. + * + * @param reference The reference values to add to each cell. + * @return a double array containing the row sums from this dictionary. + */ + public double[] sumAllRowsToDoubleWithReference(double[] reference); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. + * + * Note if the number of columns is one the actual dictionaries values are simply returned. + * + * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. + * @return a double array containing the row sums from this dictionary. + */ + public double[] sumAllRowsToDoubleSq(int nrColumns); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. But adds another cell to + * the return with an extra value that is the sum of the given defaultTuple. + * + * @param defaultTuple The default row to sum in the end index returned. + * @return a double array containing the row sums from this dictionary. + */ + public double[] sumAllRowsToDoubleSqWithDefault(double[] defaultTuple); + + /** + * Method used as a pre-aggregate of each tuple in the dictionary, to single double values. + * + * @param reference The reference values to add to each cell. + * @return a double array containing the row sums from this dictionary. + */ + public double[] sumAllRowsToDoubleSqWithReference(double[] reference); + + /** + * Method to product all rows to a column vector. + * + * @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary. + * @return A row product + */ + public double[] productAllRowsToDouble(int nrColumns); + + /** + * Method to product all rows to a column vector with a default value added in the end. + * + * @param defaultTuple The default row that aggregate to last cell + * @return A row product + */ + public double[] productAllRowsToDoubleWithDefault(double[] defaultTuple); + + /** + * Method to product all rows to a column vector with a reference values added to all cells, and a reference product + * in the end + * + * @param reference The reference row + * @return A row product + */ + public double[] productAllRowsToDoubleWithReference(double[] reference); + + /** + * Get the column sum of the values contained in the dictionary + * + * @param c The output array allocated to contain all column groups output. + * @param counts The counts of the individual tuples. + * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into + * the c output. + */ + public void colSum(double[] c, int[] counts, IColIndex colIndexes); + + /** + * Get the column sum of the values contained in the dictionary + * + * @param c The output array allocated to contain all column groups output. + * @param counts The counts of the individual tuples. + * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into + * the c output. + */ + public void colSumSq(double[] c, int[] counts, IColIndex colIndexes); + + /** + * Get the column sum of the values contained in the dictionary with an offset reference value added to each cell. + * + * @param c The output array allocated to contain all column groups output. + * @param counts The counts of the individual tuples. + * @param colIndexes The columns indexes of the parent column group, this indicate where to put the column sum into + * the c output. + * @param reference The reference values to add to each cell. + */ + public void colSumSqWithReference(double[] c, int[] counts, IColIndex colIndexes, double[] reference); + + /** + * Get the sum of the values contained in the dictionary + * + * @param counts The counts of the individual tuples + * @param nCol The number of columns contained + * @return The sum scaled by the counts provided. + */ + public double sum(int[] counts, int nCol); + + /** + * Get the square sum of the values contained in the dictionary + * + * @param counts The counts of the individual tuples + * @param nCol The number of columns contained + * @return The square sum scaled by the counts provided. + */ + public double sumSq(int[] counts, int nCol); + + /** + * Get the square sum of the values contained in the dictionary with a reference offset on each value. + * + * @param counts The counts of the individual tuples + * @param reference The reference value + * @return The square sum scaled by the counts and reference. + */ + public double sumSqWithReference(int[] counts, double[] reference); + + /** + * Get a string representation of the dictionary, that considers the layout of the data. + * + * @param colIndexes The number of columns in the dictionary. + * @return A string that is nicer to print. + */ + public String getString(int colIndexes); + + /** + * Modify the dictionary by removing columns not within the index range. + * + * @param idxStart The column index to start at. + * @param idxEnd The column index to end at (not inclusive) + * @param previousNumberOfColumns The number of columns contained in the dictionary. + * @return A dictionary containing the sliced out columns values only. + */ + public IDictionary sliceOutColumnRange(int idxStart, int idxEnd, int previousNumberOfColumns); + + /** + * Detect if the dictionary contains a specific value. + * + * @param pattern The value to search for + * @return true if the value is contained else false. + */ + public boolean containsValue(double pattern); + + /** + * Detect if the dictionary contains a specific value with reference offset. + * + * @param pattern The pattern/ value to search for + * @param reference The reference double array. + * @return true if the value is contained else false. + */ + public boolean containsValueWithReference(double pattern, double[] reference); + + /** + * Calculate the number of non zeros in the dictionary. The number of non zeros should be scaled with the counts + * given. This gives the exact number of non zero values in the parent column group. + * + * @param counts The counts of each dictionary entry + * @param nCol The number of columns in this dictionary + * @return The nonZero count + */ + public long getNumberNonZeros(int[] counts, int nCol); + + /** + * Calculate the number of non zeros in the dictionary. + * + * Each value in the dictionary should be added to the reference value. + * + * The number of non zeros should be scaled with the given counts. + * + * @param counts The Counts of each dict entry. + * @param reference The reference vector. + * @param nRows The number of rows in the input. + * @return The NonZero Count. + */ + public long getNumberNonZerosWithReference(int[] counts, double[] reference, int nRows); + + /** + * Copies and adds the dictionary entry from this dictionary to the d dictionary + * + * @param v the target dictionary (dense double array) + * @param fr the from index + * @param to the to index + * @param nCol the number of columns + */ + public void addToEntry(double[] v, int fr, int to, int nCol); + + /** + * copies and adds the dictonary entry from this dictionary yo the d dictionary rep times. + * + * @param v the target dictionary (dense double array) + * @param fr the from index + * @param to the to index + * @param nCol the number of columns + * @param rep the number of repetitions to apply (simply multiply do not loop) + */ + public void addToEntry(double[] v, int fr, int to, int nCol, int rep); + + /** + * Vectorized add to entry, this call helps with a bit of locality for the cache. + * + * @param v THe target dictionary (dense double array) + * @param f1 from index 1 + * @param f2 from index 2 + * @param f3 from index 3 + * @param f4 from index 4 + * @param f5 from index 5 + * @param f6 from index 6 + * @param f7 from index 7 + * @param f8 from index 8 + * @param t1 to index 1 + * @param t2 to index 2 + * @param t3 to index 3 + * @param t4 to index 4 + * @param t5 to index 5 + * @param t6 to index 6 + * @param t7 to index 7 + * @param t8 to index 8 + * @param nCol Number of columns in the dictionary + */ + public void addToEntryVectorized(double[] v, int f1, int f2, int f3, int f4, int f5, int f6, int f7, int f8, int t1, + int t2, int t3, int t4, int t5, int t6, int t7, int t8, int nCol); + + /** + * Allocate a new dictionary where the tuple given is subtracted from all tuples in the previous dictionary. + * + * @param tuple a double list representing a tuple, it is given that the tuple with is the same as this dictionaries. + * @return a new instance of dictionary with the tuple subtracted. + */ + public IDictionary subtractTuple(double[] tuple); + + /** + * Get this dictionary as a MatrixBlock dictionary. This allows us to use optimized kernels coded elsewhere in the + * system, such as matrix multiplication. + * + * Return null if the matrix is empty. + * + * @param nCol The number of columns contained in this column group. + * @return A Dictionary containing a MatrixBlock. + */ + public MatrixBlockDictionary getMBDict(int nCol); + + /** + * Scale all tuples contained in the dictionary by the scaling factor given in the int list. + * + * @param scaling The amount to multiply the given tuples with + * @param nCol The number of columns contained in this column group. + * @return A New dictionary (since we don't want to modify the underlying dictionary) + */ + public IDictionary scaleTuples(int[] scaling, int nCol); + + /** + * Pre Aggregate values for Right Matrix Multiplication. + * + * @param numVals The number of values contained in this dictionary + * @param colIndexes The column indexes that is associated with the parent column group + * @param aggregateColumns The column to aggregate, this is preprocessed, to find remove consideration for empty + * columns + * @param b The values in the right hand side matrix + * @param cut The number of columns in b. + * @return A new dictionary with the pre aggregated values. + */ + public IDictionary preaggValuesFromDense(final int numVals, final IColIndex colIndexes, + final IColIndex aggregateColumns, final double[] b, final int cut); + + /** + * Make a copy of the values, and replace all values that match pattern with replacement value. If needed add a new + * column index. + * + * @param pattern The value to look for + * @param replace The value to replace the other value with + * @param nCol The number of columns contained in the dictionary. + * @return A new Column Group, reusing the index structure but with new values. + */ + public IDictionary replace(double pattern, double replace, int nCol); + + /** + * Make a copy of the values, and replace all values that match pattern with replacement value. If needed add a new + * column index. With reference such that each value in the dict is considered offset by the values contained in the + * reference. + * + * @param pattern The value to look for + * @param replace The value to replace the other value with + * @param reference The reference tuple to add to all entries when replacing + * @return A new Column Group, reusing the index structure but with new values. + */ + public IDictionary replaceWithReference(double pattern, double replace, double[] reference); + + /** + * Calculate the product of the dictionary weighted by counts. + * + * @param ret The result dense double array (containing one value) + * @param counts The count of individual tuples + * @param nCol Number of columns in the dictionary. + */ + public void product(double[] ret, int[] counts, int nCol); + + /** + * Calculate the product of the dictionary weighted by counts with a default value added . + * + * @param ret The result dense double array (containing one value) + * @param counts The count of individual tuples + * @param def The default tuple + * @param defCount The count of the default tuple + */ + public void productWithDefault(double[] ret, int[] counts, double[] def, int defCount); + + /** + * Calculate the product of the dictionary weighted by counts and offset by reference + * + * @param ret The result dense double array (containing one value) + * @param counts The counts of each entry in the dictionary + * @param reference The reference value. + * @param refCount The number of occurrences of the ref value. + */ + public void productWithReference(double[] ret, int[] counts, double[] reference, int refCount); + + /** + * Calculate the column product of the dictionary weighted by counts. + * + * @param res The result vector to put the result into + * @param counts The weighted count of individual tuples + * @param colIndexes The column indexes. + */ + public void colProduct(double[] res, int[] counts, IColIndex colIndexes); + + /** + * Calculate the column product of the dictionary weighted by counts. + * + * @param res The result vector to put the result into + * @param counts The weighted count of individual tuples + * @param colIndexes The column indexes. + * @param reference The reference value. + */ + public void colProductWithReference(double[] res, int[] counts, IColIndex colIndexes, double[] reference); + + /** + * Central moment function to calculate the central moment of this column group. MUST be on a single column + * dictionary. + * + * @param fn The value function to apply + * @param counts The weight of individual tuples + * @param nRows The number of rows in total of the column group + * @return The central moment Object + */ + public CM_COV_Object centralMoment(ValueFunction fn, int[] counts, int nRows); + + /** + * Central moment function to calculate the central moment of this column group. MUST be on a single column + * dictionary. + * + * @param ret The Central Moment object to be modified and returned + * @param fn The value function to apply + * @param counts The weight of individual tuples + * @param nRows The number of rows in total of the column group + * @return The central moment Object + */ + public CM_COV_Object centralMoment(CM_COV_Object ret, ValueFunction fn, int[] counts, int nRows); + + /** + * Central moment function to calculate the central moment of this column group with a default offset on all missing + * tuples. MUST be on a single column dictionary. + * + * @param fn The value function to apply + * @param counts The weight of individual tuples + * @param def The default values to offset the tuples with + * @param nRows The number of rows in total of the column group + * @return The central moment Object + */ + public CM_COV_Object centralMomentWithDefault(ValueFunction fn, int[] counts, double def, int nRows); + + /** + * Central moment function to calculate the central moment of this column group with a default offset on all missing + * tuples. MUST be on a single column dictionary. + * + * @param ret The Central Moment object to be modified and returned + * @param fn The value function to apply + * @param counts The weight of individual tuples + * @param def The default values to offset the tuples with + * @param nRows The number of rows in total of the column group + * @return The central moment Object + */ + public CM_COV_Object centralMomentWithDefault(CM_COV_Object ret, ValueFunction fn, int[] counts, double def, + int nRows); + + /** + * Central moment function to calculate the central moment of this column group with a reference offset on each + * tuple. MUST be on a single column dictionary. + * + * @param fn The value function to apply + * @param counts The weight of individual tuples + * @param reference The reference values to offset the tuples with + * @param nRows The number of rows in total of the column group + * @return The central moment Object + */ + public CM_COV_Object centralMomentWithReference(ValueFunction fn, int[] counts, double reference, int nRows); + + /** + * Central moment function to calculate the central moment of this column group with a reference offset on each + * tuple. MUST be on a single column dictionary. + * + * @param ret The Central Moment object to be modified and returned + * @param fn The value function to apply + * @param counts The weight of individual tuples + * @param reference The reference values to offset the tuples with + * @param nRows The number of rows in total of the column group + * @return The central moment Object + */ + public CM_COV_Object centralMomentWithReference(CM_COV_Object ret, ValueFunction fn, int[] counts, double reference, + int nRows); + + /** + * Rexpand the dictionary (one hot encode) + * + * @param max the tuple width of the output + * @param ignore If we should ignore zero and negative values + * @param cast If we should cast all double values to whole integer values + * @param nCol The number of columns in the dictionary already (should be 1) + * @return A new dictionary + */ + public IDictionary rexpandCols(int max, boolean ignore, boolean cast, int nCol); + + /** + * Rexpand the dictionary (one hot encode) + * + * @param max the tuple width of the output + * @param ignore If we should ignore zero and negative values + * @param cast If we should cast all double values to whole integer values + * @param reference A reference value to add to all tuples before expanding + * @return A new dictionary + */ + public IDictionary rexpandColsWithReference(int max, boolean ignore, boolean cast, int reference); + + /** + * Get the sparsity of the dictionary. + * + * @return a sparsity between 0 and 1 + */ + public double getSparsity(); + + /** + * Multiply the v value with the dictionary entry at dictIdx and add it to the ret matrix at the columns specified in + * the int array. + * + * @param v Value to multiply + * @param ret Output dense double array location + * @param off Offset into the ret array that the "row" output starts at + * @param dictIdx The dictionary entry to multiply. + * @param cols The columns to multiply into of the output. + */ + public void multiplyScalar(double v, double[] ret, int off, int dictIdx, IColIndex cols); + + /** + * Transpose self matrix multiplication with a scaling factor on each pair of values. + * + * @param counts The scaling factor + * @param rows The row indexes + * @param cols The col indexes + * @param ret The output matrix block + */ + public void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret); + + /** + * Matrix multiplication of dictionaries + * + * Note the left is this, and it is transposed + * + * @param right Right hand side of multiplication + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param result The output matrix block + */ + public void MMDict(IDictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); + + /** + * Matrix multiplication of dictionaries left side dense and transposed right side is this. + * + * @param left Dense left side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param result The output matrix block + */ + public void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); + + /** + * Matrix multiplication of dictionaries left side sparse and transposed right side is this. + * + * @param left Sparse left side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param result The output matrix block + */ + public void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); + + /** + * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is left + * + * @param right Right side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param result The output matrix block + */ + public void TSMMToUpperTriangle(IDictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); + + /** + * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right + * + * @param left Dense left side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param result The output matrix block + */ + public void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); + + /** + * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right + * + * @param left Sparse left side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param result The output matrix block + */ + public void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result); + + /** + * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is left + * + * @param right Right side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param scale Scale factor + * @param result The output matrix block + */ + public void TSMMToUpperTriangleScaling(IDictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result); + + /** + * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right + * + * @param left Dense left side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param scale Scale factor + * @param result The output matrix block + */ + public void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result); + + /** + * Matrix multiplication but allocate output in upper triangle and twice if on diagonal, note this is right + * + * @param left Sparse left side + * @param rowsLeft Offset rows on the left + * @param colsRight Offset cols on the right + * @param scale Scale factor + * @param result The output matrix block + */ + public void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result); + + /** + * Cbind this dictionary with that dictionary + * + * @param that the right hand side dictionary to cbind + * @param nCol the right hand side number of columns + * @return The combined dictionary + */ + public IDictionary cbind(IDictionary that, int nCol); + + /** + * Indicate if this object is equal to another this takes into part sematic equivalence + * + * @param o The other object + * @return If they are equal + */ + public boolean equals(Object o); + + /** + * Indicate if the other dictionary is equal to this. + * + * @param o The other object + * @return If it is equal + */ + public boolean equals(IDictionary o); + + /** + * Reorder the elements in the dictionary based on the reorder specification given. + * + * @param reorder The order to move to. + * @return A new Dictionary that is reordered.s + */ + public IDictionary reorder(int[] reorder); + +} diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionary.java index ca740c1a952..11113e4e765 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionary.java @@ -538,64 +538,64 @@ public void multiplyScalar(double v, double[] ret, int off, int dictIdx, IColInd } @Override - protected void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { + public void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { getMBDict().TSMMWithScaling(counts, rows, cols, ret); } @Override - protected void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { getMBDict().MMDict(right, rowsLeft, colsRight, result); // should replace with add to right to output cells. } @Override - protected void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { getMBDict().MMDictDense(left, rowsLeft, colsRight, result); // should replace with add to right to output cells. } @Override - protected void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { getMBDict().MMDictSparse(left, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { getMBDict().TSMMToUpperTriangle(right, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { getMBDict().TSMMToUpperTriangleDense(left, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, + public void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { getMBDict().TSMMToUpperTriangleSparse(left, rowsLeft, colsRight, result); } @Override - protected void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { getMBDict().TSMMToUpperTriangleScaling(right, rowsLeft, colsRight, scale, result); } @Override - protected void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { getMBDict().TSMMToUpperTriangleDenseScaling(left, rowsLeft, colsRight, scale, result); } @Override - protected void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, - int[] scale, MatrixBlock result) { + public void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result) { getMBDict().TSMMToUpperTriangleSparseScaling(left, rowsLeft, colsRight, scale, result); } @Override - public boolean equals(ADictionary o) { + public boolean equals(IDictionary o) { if(o instanceof IdentityDictionary) return ((IdentityDictionary) o).nRowCol == nRowCol; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionarySlice.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionarySlice.java index 6c072a2e139..d2493dfb184 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionarySlice.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/IdentityDictionarySlice.java @@ -279,7 +279,7 @@ public double getSparsity() { } @Override - public boolean equals(ADictionary o) { + public boolean equals(IDictionary o) { if(o instanceof IdentityDictionarySlice) { IdentityDictionarySlice os = ((IdentityDictionarySlice) o); return os.nRowCol == nRowCol && os.l == l && os.u == u; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java index 48d0b62dd50..7a8c7174886 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java @@ -2015,7 +2015,7 @@ private void multiplyScalarDense(double v, double[] ret, int off, int dictIdx, I } @Override - protected void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { + public void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { if(_data.isInSparseFormat()) DictLibMatrixMult.TSMMDictsSparseWithScaling(_data.getSparseBlock(), rows, cols, counts, ret); else @@ -2023,7 +2023,7 @@ protected void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, Mat } @Override - protected void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { if(_data.isInSparseFormat()) right.MMDictSparse(_data.getSparseBlock(), rowsLeft, colsRight, result); else @@ -2031,7 +2031,7 @@ protected void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight } @Override - protected void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { if(_data.isInSparseFormat()) DictLibMatrixMult.MMDictsDenseSparse(left, _data.getSparseBlock(), rowsLeft, colsRight, result); else @@ -2039,7 +2039,7 @@ protected void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRigh } @Override - protected void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { if(_data.isInSparseFormat()) DictLibMatrixMult.MMDictsSparseSparse(left, _data.getSparseBlock(), rowsLeft, colsRight, result); @@ -2048,7 +2048,7 @@ protected void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex cols } @Override - protected void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { if(_data.isInSparseFormat()) right.TSMMToUpperTriangleSparse(_data.getSparseBlock(), rowsLeft, colsRight, result); else @@ -2056,7 +2056,7 @@ protected void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIn } @Override - protected void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { if(_data.isInSparseFormat()) DictLibMatrixMult.MMToUpperTriangleDenseSparse(left, _data.getSparseBlock(), rowsLeft, colsRight, result); else @@ -2064,7 +2064,7 @@ protected void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColI } @Override - protected void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, + public void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { if(_data.isInSparseFormat()) DictLibMatrixMult.MMToUpperTriangleSparseSparse(left, _data.getSparseBlock(), rowsLeft, colsRight, result); @@ -2073,7 +2073,7 @@ protected void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, I } @Override - protected void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { if(_data.isInSparseFormat()) right.TSMMToUpperTriangleSparseScaling(_data.getSparseBlock(), rowsLeft, colsRight, scale, result); @@ -2082,7 +2082,7 @@ protected void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, } @Override - protected void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { if(_data.isInSparseFormat()) DictLibMatrixMult.TSMMToUpperTriangleDenseSparseScaling(left, _data.getSparseBlock(), rowsLeft, colsRight, @@ -2093,8 +2093,8 @@ protected void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft } @Override - protected void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, - int[] scale, MatrixBlock result) { + public void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result) { if(_data.isInSparseFormat()) DictLibMatrixMult.TSMMToUpperTriangleSparseSparseScaling(left, _data.getSparseBlock(), rowsLeft, colsRight, scale, result); @@ -2104,7 +2104,7 @@ protected void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rows } @Override - public boolean equals(ADictionary o) { + public boolean equals(IDictionary o) { if(o instanceof MatrixBlockDictionary) return _data.equals(((MatrixBlockDictionary) o)._data); else if(o instanceof Dictionary) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/PlaceHolderDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/PlaceHolderDictionary.java new file mode 100644 index 00000000000..3b03ef3e080 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/PlaceHolderDictionary.java @@ -0,0 +1,474 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.colgroup.dictionary; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.ValueFunction; +import org.apache.sysds.runtime.instructions.cp.CM_COV_Object; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.BinaryOperator; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; + +/** + * This dictionary is a PlaceHolder Dictionary that contains no logic and no data. + * + * It is used to be a low overhead writable dictionary for ColumnGroups that can be written only containing their index + * structures and in a second pass combined with their separately stored dictionaries. + */ +public class PlaceHolderDictionary extends ADictionary { + + @Override + public double[] getValues() { + throw new UnsupportedOperationException("Unimplemented method 'getValues'"); + } + + @Override + public double getValue(int i) { + throw new UnsupportedOperationException("Unimplemented method 'getValue'"); + } + + @Override + public double getValue(int r, int col, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'getValue'"); + } + + @Override + public long getInMemorySize() { + throw new UnsupportedOperationException("Unimplemented method 'getInMemorySize'"); + } + + @Override + public double aggregate(double init, Builtin fn) { + throw new UnsupportedOperationException("Unimplemented method 'aggregate'"); + } + + @Override + public double aggregateWithReference(double init, Builtin fn, double[] reference, boolean def) { + throw new UnsupportedOperationException("Unimplemented method 'aggregateWithReference'"); + } + + @Override + public double[] aggregateRows(Builtin fn, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'aggregateRows'"); + } + + @Override + public double[] aggregateRowsWithDefault(Builtin fn, double[] defaultTuple) { + throw new UnsupportedOperationException("Unimplemented method 'aggregateRowsWithDefault'"); + } + + @Override + public double[] aggregateRowsWithReference(Builtin fn, double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'aggregateRowsWithReference'"); + } + + @Override + public void aggregateCols(double[] c, Builtin fn, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'aggregateCols'"); + } + + @Override + public void aggregateColsWithReference(double[] c, Builtin fn, IColIndex colIndexes, double[] reference, + boolean def) { + throw new UnsupportedOperationException("Unimplemented method 'aggregateColsWithReference'"); + } + + @Override + public ADictionary applyScalarOp(ScalarOperator op) { + throw new UnsupportedOperationException("Unimplemented method 'applyScalarOp'"); + } + + @Override + public ADictionary applyScalarOpAndAppend(ScalarOperator op, double v0, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'applyScalarOpAndAppend'"); + } + + @Override + public ADictionary applyUnaryOp(UnaryOperator op) { + throw new UnsupportedOperationException("Unimplemented method 'applyUnaryOp'"); + } + + @Override + public ADictionary applyUnaryOpAndAppend(UnaryOperator op, double v0, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'applyUnaryOpAndAppend'"); + } + + @Override + public ADictionary applyScalarOpWithReference(ScalarOperator op, double[] reference, double[] newReference) { + throw new UnsupportedOperationException("Unimplemented method 'applyScalarOpWithReference'"); + } + + @Override + public ADictionary applyUnaryOpWithReference(UnaryOperator op, double[] reference, double[] newReference) { + throw new UnsupportedOperationException("Unimplemented method 'applyUnaryOpWithReference'"); + } + + @Override + public ADictionary binOpLeft(BinaryOperator op, double[] v, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'binOpLeft'"); + } + + @Override + public ADictionary binOpLeftAndAppend(BinaryOperator op, double[] v, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'binOpLeftAndAppend'"); + } + + @Override + public ADictionary binOpLeftWithReference(BinaryOperator op, double[] v, IColIndex colIndexes, double[] reference, + double[] newReference) { + throw new UnsupportedOperationException("Unimplemented method 'binOpLeftWithReference'"); + } + + @Override + public ADictionary binOpRight(BinaryOperator op, double[] v, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'binOpRight'"); + } + + @Override + public ADictionary binOpRightAndAppend(BinaryOperator op, double[] v, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'binOpRightAndAppend'"); + } + + @Override + public ADictionary binOpRight(BinaryOperator op, double[] v) { + throw new UnsupportedOperationException("Unimplemented method 'binOpRight'"); + } + + @Override + public ADictionary binOpRightWithReference(BinaryOperator op, double[] v, IColIndex colIndexes, double[] reference, + double[] newReference) { + throw new UnsupportedOperationException("Unimplemented method 'binOpRightWithReference'"); + } + + @Override + public void write(DataOutput out) throws IOException { + throw new UnsupportedOperationException("Unimplemented method 'write'"); + } + + @Override + public long getExactSizeOnDisk() { + throw new UnsupportedOperationException("Unimplemented method 'getExactSizeOnDisk'"); + } + + @Override + public DictType getDictType() { + throw new UnsupportedOperationException("Unimplemented method 'getDictType'"); + } + + @Override + public int getNumberOfValues(int ncol) { + throw new UnsupportedOperationException("Unimplemented method 'getNumberOfValues'"); + } + + @Override + public double[] sumAllRowsToDouble(int nrColumns) { + throw new UnsupportedOperationException("Unimplemented method 'sumAllRowsToDouble'"); + } + + @Override + public double[] sumAllRowsToDoubleWithDefault(double[] defaultTuple) { + throw new UnsupportedOperationException("Unimplemented method 'sumAllRowsToDoubleWithDefault'"); + } + + @Override + public double[] sumAllRowsToDoubleWithReference(double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'sumAllRowsToDoubleWithReference'"); + } + + @Override + public double[] sumAllRowsToDoubleSq(int nrColumns) { + throw new UnsupportedOperationException("Unimplemented method 'sumAllRowsToDoubleSq'"); + } + + @Override + public double[] sumAllRowsToDoubleSqWithDefault(double[] defaultTuple) { + throw new UnsupportedOperationException("Unimplemented method 'sumAllRowsToDoubleSqWithDefault'"); + } + + @Override + public double[] sumAllRowsToDoubleSqWithReference(double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'sumAllRowsToDoubleSqWithReference'"); + } + + @Override + public double[] productAllRowsToDouble(int nrColumns) { + throw new UnsupportedOperationException("Unimplemented method 'productAllRowsToDouble'"); + } + + @Override + public double[] productAllRowsToDoubleWithDefault(double[] defaultTuple) { + throw new UnsupportedOperationException("Unimplemented method 'productAllRowsToDoubleWithDefault'"); + } + + @Override + public double[] productAllRowsToDoubleWithReference(double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'productAllRowsToDoubleWithReference'"); + } + + @Override + public void colSum(double[] c, int[] counts, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'colSum'"); + } + + @Override + public void colSumSq(double[] c, int[] counts, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'colSumSq'"); + } + + @Override + public void colSumSqWithReference(double[] c, int[] counts, IColIndex colIndexes, double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'colSumSqWithReference'"); + } + + @Override + public double sum(int[] counts, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'sum'"); + } + + @Override + public double sumSq(int[] counts, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'sumSq'"); + } + + @Override + public double sumSqWithReference(int[] counts, double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'sumSqWithReference'"); + } + + @Override + public String getString(int colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'getString'"); + } + + @Override + public ADictionary sliceOutColumnRange(int idxStart, int idxEnd, int previousNumberOfColumns) { + throw new UnsupportedOperationException("Unimplemented method 'sliceOutColumnRange'"); + } + + @Override + public boolean containsValue(double pattern) { + throw new UnsupportedOperationException("Unimplemented method 'containsValue'"); + } + + @Override + public boolean containsValueWithReference(double pattern, double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'containsValueWithReference'"); + } + + @Override + public long getNumberNonZeros(int[] counts, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'getNumberNonZeros'"); + } + + @Override + public long getNumberNonZerosWithReference(int[] counts, double[] reference, int nRows) { + throw new UnsupportedOperationException("Unimplemented method 'getNumberNonZerosWithReference'"); + } + + @Override + public void addToEntry(double[] v, int fr, int to, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'addToEntry'"); + } + + @Override + public void addToEntry(double[] v, int fr, int to, int nCol, int rep) { + throw new UnsupportedOperationException("Unimplemented method 'addToEntry'"); + } + + @Override + public void addToEntryVectorized(double[] v, int f1, int f2, int f3, int f4, int f5, int f6, int f7, int f8, int t1, + int t2, int t3, int t4, int t5, int t6, int t7, int t8, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'addToEntryVectorized'"); + } + + @Override + public ADictionary subtractTuple(double[] tuple) { + throw new UnsupportedOperationException("Unimplemented method 'subtractTuple'"); + } + + @Override + public MatrixBlockDictionary getMBDict(int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'getMBDict'"); + } + + @Override + public ADictionary scaleTuples(int[] scaling, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'scaleTuples'"); + } + + @Override + public ADictionary preaggValuesFromDense(int numVals, IColIndex colIndexes, IColIndex aggregateColumns, double[] b, + int cut) { + throw new UnsupportedOperationException("Unimplemented method 'preaggValuesFromDense'"); + } + + @Override + public ADictionary replace(double pattern, double replace, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'replace'"); + } + + @Override + public ADictionary replaceWithReference(double pattern, double replace, double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'replaceWithReference'"); + } + + @Override + public void product(double[] ret, int[] counts, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'product'"); + } + + @Override + public void productWithDefault(double[] ret, int[] counts, double[] def, int defCount) { + throw new UnsupportedOperationException("Unimplemented method 'productWithDefault'"); + } + + @Override + public void productWithReference(double[] ret, int[] counts, double[] reference, int refCount) { + throw new UnsupportedOperationException("Unimplemented method 'productWithReference'"); + } + + @Override + public void colProduct(double[] res, int[] counts, IColIndex colIndexes) { + throw new UnsupportedOperationException("Unimplemented method 'colProduct'"); + } + + @Override + public void colProductWithReference(double[] res, int[] counts, IColIndex colIndexes, double[] reference) { + throw new UnsupportedOperationException("Unimplemented method 'colProductWithReference'"); + } + + @Override + public CM_COV_Object centralMoment(CM_COV_Object ret, ValueFunction fn, int[] counts, int nRows) { + throw new UnsupportedOperationException("Unimplemented method 'centralMoment'"); + } + + @Override + public CM_COV_Object centralMomentWithDefault(CM_COV_Object ret, ValueFunction fn, int[] counts, double def, + int nRows) { + throw new UnsupportedOperationException("Unimplemented method 'centralMomentWithDefault'"); + } + + @Override + public CM_COV_Object centralMomentWithReference(CM_COV_Object ret, ValueFunction fn, int[] counts, double reference, + int nRows) { + throw new UnsupportedOperationException("Unimplemented method 'centralMomentWithReference'"); + } + + @Override + public ADictionary rexpandCols(int max, boolean ignore, boolean cast, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'rexpandCols'"); + } + + @Override + public ADictionary rexpandColsWithReference(int max, boolean ignore, boolean cast, int reference) { + throw new UnsupportedOperationException("Unimplemented method 'rexpandColsWithReference'"); + } + + @Override + public double getSparsity() { + throw new UnsupportedOperationException("Unimplemented method 'getSparsity'"); + } + + @Override + public void multiplyScalar(double v, double[] ret, int off, int dictIdx, IColIndex cols) { + throw new UnsupportedOperationException("Unimplemented method 'multiplyScalar'"); + } + + @Override + public void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMWithScaling'"); + } + + @Override + public void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'MMDict'"); + } + + @Override + public void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'MMDictDense'"); + } + + @Override + public void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'MMDictSparse'"); + } + + @Override + public void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMToUpperTriangle'"); + } + + @Override + public void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMToUpperTriangleDense'"); + } + + @Override + public void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, + MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMToUpperTriangleSparse'"); + } + + @Override + public void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMToUpperTriangleScaling'"); + } + + @Override + public void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMToUpperTriangleDenseScaling'"); + } + + @Override + public void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result) { + throw new UnsupportedOperationException("Unimplemented method 'TSMMToUpperTriangleSparseScaling'"); + } + + @Override + public ADictionary cbind(ADictionary that, int nCol) { + throw new UnsupportedOperationException("Unimplemented method 'cbind'"); + } + + @Override + public boolean equals(IDictionary o) { + throw new UnsupportedOperationException("Unimplemented method 'equals'"); + } + + @Override + public ADictionary reorder(int[] reorder) { + throw new UnsupportedOperationException("Unimplemented method 'reorder'"); + } + + @Override + public ADictionary clone() { + throw new UnsupportedOperationException("Unimplemented method 'clone'"); + } + +} diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java index 3c6f2f78e47..5f8afae3cee 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java @@ -546,61 +546,61 @@ public void multiplyScalar(double v, double[] ret, int off, int dictIdx, IColInd } @Override - protected void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { + public void TSMMWithScaling(int[] counts, IColIndex rows, IColIndex cols, MatrixBlock ret) { throw new NotImplementedException(); } @Override - protected void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDict(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void MMDictSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangle(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { + public void TSMMToUpperTriangleDense(double[] left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, + public void TSMMToUpperTriangleSparse(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleScaling(ADictionary right, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + public void TSMMToUpperTriangleDenseScaling(double[] left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, MatrixBlock result) { throw new NotImplementedException(); } @Override - protected void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, - int[] scale, MatrixBlock result) { + public void TSMMToUpperTriangleSparseScaling(SparseBlock left, IColIndex rowsLeft, IColIndex colsRight, int[] scale, + MatrixBlock result) { throw new NotImplementedException(); } @Override - public boolean equals(ADictionary o) { + public boolean equals(IDictionary o) { throw new NotImplementedException(); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 9ff97a91c5b..76cfea08823 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -28,8 +28,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator; @@ -413,7 +413,7 @@ public final int[] getCounts() { * @param ret The output dictionary to aggregate into * @param nCol The number of columns */ - public final void preAggregateDDC_DDC(AMapToData tm, ADictionary td, Dictionary ret, int nCol) { + public final void preAggregateDDC_DDC(AMapToData tm, IDictionary td, Dictionary ret, int nCol) { if(nCol == 1) preAggregateDDC_DDCSingleCol(tm, td.getValues(), ret.getValues()); else @@ -441,7 +441,7 @@ protected void preAggregateDDC_DDCSingleCol(AMapToData tm, double[] td, double[] * @param ret The output dictionary to aggregate into * @param nCol The number of columns */ - protected void preAggregateDDC_DDCMultiCol(AMapToData tm, ADictionary td, double[] v, int nCol) { + protected void preAggregateDDC_DDCMultiCol(AMapToData tm, IDictionary td, double[] v, int nCol) { final int sz = size(); final int h = sz % 8; for(int r = 0; r < h; r++) @@ -464,7 +464,7 @@ protected void preAggregateDDC_DDCMultiCol(AMapToData tm, ADictionary td, double * @param ret The output dictionary to aggregate into * @param nCol The number of columns in output and td dictionary */ - public final void preAggregateDDC_SDCZ(AMapToData tm, ADictionary td, AOffset tof, Dictionary ret, int nCol) { + public final void preAggregateDDC_SDCZ(AMapToData tm, IDictionary td, AOffset tof, Dictionary ret, int nCol) { if(nCol == 1) preAggregateDDC_SDCZSingleCol(tm, td.getValues(), tof, ret.getValues()); else @@ -485,7 +485,7 @@ public void preAggregateDDC_SDCZSingleCol(AMapToData tm, double[] td, AOffset to v[to] += td[fr]; } - public void preAggregateDDC_SDCZMultiCol(AMapToData tm, ADictionary td, AOffset tof, double[] v, int nCol) { + public void preAggregateDDC_SDCZMultiCol(AMapToData tm, IDictionary td, AOffset tof, double[] v, int nCol) { final AOffsetIterator it = tof.getOffsetIterator(); final int size = tm.size() - 1; int i = (size > 8) ? preAggregateDDC_SDCZMultiCol_vect(tm, td, v, nCol, it, size) : 0; @@ -502,7 +502,7 @@ public void preAggregateDDC_SDCZMultiCol(AMapToData tm, ADictionary td, AOffset td.addToEntry(v, fr, to, nCol); } - private int preAggregateDDC_SDCZMultiCol_vect(AMapToData tm, ADictionary td, double[] v, int nCol, + private int preAggregateDDC_SDCZMultiCol_vect(AMapToData tm, IDictionary td, double[] v, int nCol, AOffsetIterator it, int size) { final int h = size % 8; int i = 0; @@ -538,7 +538,7 @@ private int preAggregateDDC_SDCZMultiCol_vect(AMapToData tm, ADictionary td, dou * @param ret The output dictionary to aggregate into * @param nCol The number of columns in output and td dictionary */ - public final void preAggregateSDCZ_DDC(AMapToData tm, ADictionary td, AOffset of, Dictionary ret, int nCol) { + public final void preAggregateSDCZ_DDC(AMapToData tm, IDictionary td, AOffset of, Dictionary ret, int nCol) { if(nCol == 1) preAggregateSDCZ_DDCSingleCol(tm, td.getValues(), of, ret.getValues()); else @@ -556,7 +556,7 @@ protected void preAggregateSDCZ_DDCSingleCol(AMapToData tm, double[] td, AOffset v[getIndex(size)] += td[tm.getIndex(tv)]; } - protected void preAggregateSDCZ_DDCMultiCol(AMapToData tm, ADictionary td, AOffset of, double[] v, int nCol) { + protected void preAggregateSDCZ_DDCMultiCol(AMapToData tm, IDictionary td, AOffset of, double[] v, int nCol) { final AOffsetIterator itThis = of.getOffsetIterator(); final int size = size() - 1; int i = (size > 8) ? preAggregateSDCZ_DDCMultiCol_vect(tm, td, v, nCol, itThis, size) : 0; @@ -569,7 +569,7 @@ protected void preAggregateSDCZ_DDCMultiCol(AMapToData tm, ADictionary td, AOffs td.addToEntry(v, tm.getIndex(tv), getIndex(size), nCol); } - private int preAggregateSDCZ_DDCMultiCol_vect(AMapToData tm, ADictionary td, double[] v, int nCol, + private int preAggregateSDCZ_DDCMultiCol_vect(AMapToData tm, IDictionary td, double[] v, int nCol, AOffsetIterator it, int size) { final int h = size % 8; int i = 0; @@ -596,7 +596,7 @@ private int preAggregateSDCZ_DDCMultiCol_vect(AMapToData tm, ADictionary td, dou return i; } - public final void preAggregateSDCZ_SDCZ(AMapToData tm, ADictionary td, AOffset tof, AOffset of, Dictionary ret, + public final void preAggregateSDCZ_SDCZ(AMapToData tm, IDictionary td, AOffset tof, AOffset of, Dictionary ret, int nCol) { if(nCol == 1) preAggregateSDCZ_SDCZSingleCol(tm, td.getValues(), tof, of, ret.getValues()); @@ -639,7 +639,7 @@ else if(tv < v) { preAggregateSDCZ_SDCZMultiCol_tail(tm, this, Dictionary.create(td), dv, 1, itThat, itThis, tSize, size, i, j); } - protected void preAggregateSDCZ_SDCZMultiCol(AMapToData tm, ADictionary td, AOffset tof, AOffset of, double[] dv, + protected void preAggregateSDCZ_SDCZMultiCol(AMapToData tm, IDictionary td, AOffset tof, AOffset of, double[] dv, int nCol) { final AOffsetIterator itThat = tof.getOffsetIterator(); final AOffsetIterator itThis = of.getOffsetIterator(); @@ -673,7 +673,7 @@ else if(tv < v) { preAggregateSDCZ_SDCZMultiCol_tail(tm, this, td, dv, nCol, itThat, itThis, tSize, size, i, j); } - protected static void preAggregateSDCZ_SDCZMultiCol_tail(AMapToData tm, AMapToData m, ADictionary td, double[] dv, + protected static void preAggregateSDCZ_SDCZMultiCol_tail(AMapToData tm, AMapToData m, IDictionary td, double[] dv, int nCol, AOffsetIterator itThat, AOffsetIterator itThis, int tSize, int size, int i, int j) { int tv = itThat.value(); int v = itThis.value(); @@ -709,7 +709,7 @@ protected static void preAggregateSDCZ_SDCZMultiCol_tail(AMapToData tm, AMapToDa } } - public void preAggregateRLE_DDC(int[] ptr, char[] data, ADictionary td, Dictionary ret, int nCol) { + public void preAggregateRLE_DDC(int[] ptr, char[] data, IDictionary td, Dictionary ret, int nCol) { if(nCol == 1) preAggregateRLE_DDCSingleCol(ptr, data, td.getValues(), ret.getValues()); else @@ -729,7 +729,7 @@ protected void preAggregateRLE_DDCSingleCol(int[] ptr, char[] data, double[] td, } } - protected void preAggregateRLE_DDCMultiCol(int[] ptr, char[] data, ADictionary td, double[] ret, int nCol) { + protected void preAggregateRLE_DDCMultiCol(int[] ptr, char[] data, IDictionary td, double[] ret, int nCol) { // find each index in RLE, and aggregate into those. for(int k = 0; k < ret.length / nCol; k++) { // for each run in RLE final int blen = ptr[k + 1]; @@ -742,7 +742,7 @@ protected void preAggregateRLE_DDCMultiCol(int[] ptr, char[] data, ADictionary t } } - public void preAggregateDDC_RLE(int[] ptr, char[] data, ADictionary td, Dictionary ret, int nCol) { + public void preAggregateDDC_RLE(int[] ptr, char[] data, IDictionary td, Dictionary ret, int nCol) { // find each index in RLE, and aggregate into those. double[] v = ret.getValues(); for(int k = 0; k < ptr.length - 1; k++) { // for each run in RLE diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index 8aec0f8ef36..1f40fe5e052 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -27,7 +27,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.runtime.frame.data.columns.BitSetArray; import org.apache.sysds.utils.MemoryEstimates; @@ -198,14 +198,14 @@ private void preAggregateDDCSingleColBitBit(MapToBit tmb, double[] td, double[] } @Override - public void preAggregateDDC_DDCMultiCol(AMapToData tm, ADictionary td, double[] v, int nCol) { + public void preAggregateDDC_DDCMultiCol(AMapToData tm, IDictionary td, double[] v, int nCol) { if(tm instanceof MapToBit) preAggregateDDCMultiColBitBit((MapToBit) tm, td, v, nCol); else // fallback super.preAggregateDDC_DDCMultiCol(tm, td, v, nCol); } - private void preAggregateDDCMultiColBitBit(MapToBit tmb, ADictionary td, double[] v, int nCol) { + private void preAggregateDDCMultiColBitBit(MapToBit tmb, IDictionary td, double[] v, int nCol) { JoinBitSets j = new JoinBitSets(tmb, this, _size); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index d15cf099530..6b384a7a0a3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; public class MapToZero extends AMapToData { @@ -120,7 +120,7 @@ public void preAggregateDDC_DDCSingleCol(AMapToData tm, double[] td, double[] v) } @Override - public void preAggregateDDC_DDCMultiCol(AMapToData tm, ADictionary td, double[] v, int nCol) { + public void preAggregateDDC_DDCMultiCol(AMapToData tm, IDictionary td, double[] v, int nCol) { final int sz = size(); for(int r = 0; r < sz; r++) td.addToEntry(v, tm.getIndex(r), 0, nCol); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCScheme.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCScheme.java index 2401946cae2..6c05bdf30aa 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCScheme.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCScheme.java @@ -20,13 +20,13 @@ package org.apache.sysds.runtime.compress.colgroup.scheme; import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; public abstract class DDCScheme extends ACLAScheme { // TODO make it into a soft reference - protected ADictionary lastDict; + protected IDictionary lastDict; protected DDCScheme(IColIndex cols) { super(cols); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/SDCScheme.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/SDCScheme.java index b4231681c94..dd6446945ea 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/SDCScheme.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/SDCScheme.java @@ -24,14 +24,14 @@ import org.apache.sysds.runtime.compress.colgroup.ASDC; import org.apache.sysds.runtime.compress.colgroup.ASDCZero; import org.apache.sysds.runtime.compress.colgroup.ColGroupSDCFOR; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.matrix.data.MatrixBlock; public abstract class SDCScheme extends ACLAScheme { // TODO make it into a soft reference - protected ADictionary lastDict; + protected IDictionary lastDict; protected SDCScheme(IColIndex cols) { super(cols); diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSeparator.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSeparator.java new file mode 100644 index 00000000000..41000680a56 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibSeparator.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.lib; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; + +/** + * The job of this Lib is to separate and combine back a list of column groups from its dictionaries, and vice versa + * combine back together dictionaries with their respective column groups + */ +public interface CLALibSeparator { + + /** + * Split a given list of column groups into separate subparts. + * + * @param g the list of groups to separate. + * @return A split of the groups and their dictionaries. + */ + public static SeparatedGroups split(List g) { + List dicts = new ArrayList<>(); + + return new SeparatedGroups(dicts, g); + } + + /** + * Combine a set of separated groups back together. + * + * @param s A Separated group of indexStructures + * @return + */ + public static List combine(SeparatedGroups s) { + throw new NotImplementedException(); + } + + public static class SeparatedGroups { + public final List dicts; + public final List indexStructures; + + private SeparatedGroups(List dicts, List indexStructures) { + this.dicts = dicts; + this.indexStructures = indexStructures; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(dicts); + sb.append(indexStructures); + return sb.toString(); + } + } + +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupMorphingPerformanceCompare.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupMorphingPerformanceCompare.java index 1382da1184e..1e11b9c6715 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupMorphingPerformanceCompare.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupMorphingPerformanceCompare.java @@ -28,7 +28,7 @@ import org.apache.sysds.runtime.compress.colgroup.ColGroupSDC; import org.apache.sysds.runtime.compress.colgroup.ColGroupSDCZeros; import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; @@ -159,7 +159,7 @@ public SDCNoMorph(ColGroupSDC g) { null); } - protected SDCNoMorph(IColIndex colIndices, int numRows, ADictionary dict, double[] defaultTuple, AOffset offsets, + protected SDCNoMorph(IColIndex colIndices, int numRows, IDictionary dict, double[] defaultTuple, AOffset offsets, AMapToData data, int[] cachedCounts) { super(colIndices, numRows, dict, defaultTuple, offsets, data, cachedCounts); diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupNegativeTests.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupNegativeTests.java index 8257351e22f..e89d41295ff 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupNegativeTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupNegativeTests.java @@ -37,7 +37,7 @@ import org.apache.sysds.runtime.compress.colgroup.ColGroupRLE; import org.apache.sysds.runtime.compress.colgroup.ColGroupSDCSingleZeros; import org.apache.sysds.runtime.compress.colgroup.ColGroupSDCZeros; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; @@ -365,13 +365,13 @@ public ICLAScheme getCompressionScheme() { } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { // TODO Auto-generated method stub return null; } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { // TODO Auto-generated method stub return null; } @@ -603,13 +603,13 @@ public ICLAScheme getCompressionScheme() { } @Override - protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { + protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) { // TODO Auto-generated method stub return null; } @Override - protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { + protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) { // TODO Auto-generated method stub return null; } diff --git a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DictionaryTests.java b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DictionaryTests.java index e1827d03bfb..91707565f3c 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DictionaryTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DictionaryTests.java @@ -30,7 +30,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.DMLCompressionException; -import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; import org.apache.sysds.runtime.functionobjects.Builtin; @@ -51,10 +51,10 @@ public class DictionaryTests { private final int nRow; private final int nCol; - private final ADictionary a; - private final ADictionary b; + private final IDictionary a; + private final IDictionary b; - public DictionaryTests(ADictionary a, ADictionary b, int nRow, int nCol) { + public DictionaryTests(IDictionary a, IDictionary b, int nRow, int nCol) { this.nRow = nRow; this.nCol = nCol; this.a = a; @@ -241,8 +241,8 @@ public void replace() { final int c = rand.nextInt(nCol); final double v = a.getValue(r, c, nCol); final double rep = rand.nextDouble(); - final ADictionary aRep = a.replace(v, rep, nCol); - final ADictionary bRep = b.replace(v, rep, nCol); + final IDictionary aRep = a.replace(v, rep, nCol); + final IDictionary bRep = b.replace(v, rep, nCol); assertEquals(aRep.getValue(r, c, nCol), rep, 0.0000001); assertEquals(bRep.getValue(r, c, nCol), rep, 0.0000001); } @@ -256,8 +256,8 @@ public void replaceWitReference() { final double before = a.getValue(r, c, nCol); final double v = before + 1.0; final double rep = rand.nextDouble() * 500; - final ADictionary aRep = a.replaceWithReference(v, rep, reference); - final ADictionary bRep = b.replaceWithReference(v, rep, reference); + final IDictionary aRep = a.replaceWithReference(v, rep, reference); + final IDictionary bRep = b.replaceWithReference(v, rep, reference); assertEquals(aRep.getValue(r, c, nCol), bRep.getValue(r, c, nCol), 0.0000001); assertNotEquals(before, aRep.getValue(r, c, nCol), 0.00001); } @@ -266,8 +266,8 @@ public void replaceWitReference() { public void rexpandCols() { if(nCol == 1) { int max = (int) a.aggregate(0, Builtin.getBuiltinFnObject(BuiltinCode.MAX)); - final ADictionary aR = a.rexpandCols(max + 1, true, false, nCol); - final ADictionary bR = b.rexpandCols(max + 1, true, false, nCol); + final IDictionary aR = a.rexpandCols(max + 1, true, false, nCol); + final IDictionary bR = b.rexpandCols(max + 1, true, false, nCol); compare(aR, bR, nRow, max + 1); } } @@ -316,8 +316,8 @@ public void rexpandColsWithReference(int reference) { if(nCol == 1) { int max = (int) a.aggregate(0, Builtin.getBuiltinFnObject(BuiltinCode.MAX)); - final ADictionary aR = a.rexpandColsWithReference(max + 1, true, false, reference); - final ADictionary bR = b.rexpandColsWithReference(max + 1, true, false, reference); + final IDictionary aR = a.rexpandColsWithReference(max + 1, true, false, reference); + final IDictionary bR = b.rexpandColsWithReference(max + 1, true, false, reference); if(aR == null && bR == null) return; // valid compare(aR, bR, nRow, max + 1); @@ -346,8 +346,8 @@ public void sliceOutColumnRange() { Random r = new Random(2323); int s = r.nextInt(nCol); int e = r.nextInt(nCol - s) + s + 1; - ADictionary ad = a.sliceOutColumnRange(s, e, nCol); - ADictionary bd = b.sliceOutColumnRange(s, e, nCol); + IDictionary ad = a.sliceOutColumnRange(s, e, nCol); + IDictionary bd = b.sliceOutColumnRange(s, e, nCol); compare(ad, bd, nRow, e - s); } @@ -411,7 +411,7 @@ public void containsValueWithReference(double value, double[] reference) { b.containsValueWithReference(value, reference)); } - private static void compare(ADictionary a, ADictionary b, int nRow, int nCol) { + private static void compare(IDictionary a, IDictionary b, int nRow, int nCol) { for(int i = 0; i < nRow; i++) for(int j = 0; j < nCol; j++) assertEquals(a.getValue(i, j, nCol), b.getValue(i, j, nCol), 0.0001); diff --git a/src/test/java/org/apache/sysds/test/component/compress/io/IOCompressionTestUtils.java b/src/test/java/org/apache/sysds/test/component/compress/io/IOCompressionTestUtils.java index 557049e5c06..6e3a638ddff 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/io/IOCompressionTestUtils.java +++ b/src/test/java/org/apache/sysds/test/component/compress/io/IOCompressionTestUtils.java @@ -34,7 +34,7 @@ public class IOCompressionTestUtils { static final AtomicInteger id = new AtomicInteger(0); - public static void deleteDirectory(File file) { + public synchronized static void deleteDirectory(File file) { synchronized(IOCompressionTestUtils.lock) { File[] files = file.listFiles(); if(files == null) @@ -48,7 +48,7 @@ public static void deleteDirectory(File file) { } } - public static String getName(String nameBeginning) { + public synchronized static String getName(String nameBeginning) { return nameBeginning + "testWrite" + id.incrementAndGet() + ".cla"; } @@ -62,7 +62,7 @@ protected static void verifyEquivalence(MatrixBlock a, MatrixBlock b) { // assertTrue("Disk size is not equivalent", a.getExactSizeOnDisk() > b.getExactSizeOnDisk()); } - public static MatrixBlock read(String path) { + public synchronized static MatrixBlock read(String path) { try { return ReaderCompressed.readCompressedMatrixFromHDFS(path); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/io/SeparateDictionariesAndIndexes.java b/src/test/java/org/apache/sysds/test/component/compress/io/SeparateDictionariesAndIndexes.java new file mode 100644 index 00000000000..459c2fe8318 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/io/SeparateDictionariesAndIndexes.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysds.test.component.compress.io; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory; +import org.apache.sysds.runtime.compress.lib.CLALibSeparator; +import org.apache.sysds.runtime.compress.lib.CLALibSeparator.SeparatedGroups; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.test.TestUtils; +import org.junit.Test; + +public class SeparateDictionariesAndIndexes { + + protected static final Log LOG = LogFactory.getLog(IOSpark.class.getName()); + + @Test + public void separate() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(100, 5, 0, 9, 1.0, 1342); + mb = TestUtils.ceil(mb); + mb = mb.append(mb).append(mb); + CompressedMatrixBlock cmb = (CompressedMatrixBlock) CompressedMatrixBlockFactory.compress(mb).getLeft(); + + SeparatedGroups s = CLALibSeparator.split(cmb.getColGroups()); + LOG.error(s); + } + +}