Skip to content

Commit

Permalink
more specialization
Browse files Browse the repository at this point in the history
  • Loading branch information
Baunsgaard committed Aug 30, 2023
1 parent ed4d651 commit 296137b
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ private void encodeGeneric(MatrixBlock data, AMapToData d, int col) {

@Override
protected Pair<ICLAScheme, AColGroup> tryUpdateAndEncode(MatrixBlock data, IColIndex columns) {
if(data.isEmpty())
if(data.isEmpty()){
map.increment(0.0, data.getNumRows());
return new Pair<>(this, new ColGroupEmpty(columns));
}
final int nRow = data.getNumRows();

final AMapToData d = MapToFactory.create(nRow, map.size());
Expand All @@ -173,9 +175,7 @@ protected Pair<ICLAScheme, AColGroup> tryUpdateAndEncode(MatrixBlock data, IColI

private void encodeAndUpdate(MatrixBlock data, AMapToData d, int col) {
final int max = d.getMaxPossible();
if(data.isEmpty())
d.fill(map.getId(0.0));
else if(data.isInSparseFormat())
if(data.isInSparseFormat())
encodeAndUpdateSparse(data, d, col, max);
else if(data.getDenseBlock().isContiguous())
encodeAndUpdateDense(data, d, col, max);
Expand All @@ -186,6 +186,7 @@ else if(data.getDenseBlock().isContiguous())
private void encodeAndUpdateSparse(MatrixBlock data, AMapToData d, int col, int max) {
final int nRow = data.getNumRows();
final SparseBlock sb = data.getSparseBlock();

for(int i = 0; i < nRow; i++) {
int id = map.increment(sb.get(i, col));
if(id >= max)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

package org.apache.sysds.runtime.compress.colgroup.scheme;

import org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.compress.colgroup.AColGroup;
import org.apache.sysds.runtime.compress.colgroup.ASDC;
import org.apache.sysds.runtime.compress.colgroup.ASDCZero;
Expand Down Expand Up @@ -56,39 +55,26 @@ public SDCSchemeMC(IColIndex cols, DblArrayCountHashMap map, DblArray def) {

protected SDCSchemeMC(ASDC g) {
super(g.getColIndices());
try {
this.lastDict = g.getDictionary();
final MatrixBlockDictionary mbd = lastDict.getMBDict(this.cols.size());
final MatrixBlock mbDict = mbd != null ? mbd.getMatrixBlock() : new MatrixBlock(1, this.cols.size(), 0.0);
final int dictRows = mbDict.getNumRows();
final int dictCols = mbDict.getNumColumns();

// Read the mapping data and materialize map.
map = new DblArrayCountHashMap(dictRows * 2);
final ReaderColumnSelection reader = ReaderColumnSelection.createReader(mbDict, //
ColIndexFactory.create(dictCols), false, 0, dictRows);
emptyRow = new DblArray(new double[dictCols]);
DblArray d = null;
int r = 0;
while((d = reader.nextRow()) != null) {

final int row = reader.getCurrentRowIndex();
if(row != r) {
map.increment(emptyRow, row - r);
r = row;
}
map.increment(d);
}
if(r < dictRows) {
map.increment(emptyRow, dictRows - r);
}

def = new DblArray(g.getCommon());
this.lastDict = g.getDictionary();
final MatrixBlockDictionary mbd = lastDict.getMBDict(this.cols.size());
final MatrixBlock mbDict = mbd != null ? mbd.getMatrixBlock() : new MatrixBlock(1, this.cols.size(), 0.0);
final int dictRows = mbDict.getNumRows();
final int dictCols = mbDict.getNumColumns();

// Read the mapping data and materialize map.
map = new DblArrayCountHashMap(dictRows * 2);
final ReaderColumnSelection reader = ReaderColumnSelection.createReader(mbDict, //
ColIndexFactory.create(dictCols), false, 0, dictRows);
emptyRow = new DblArray(new double[dictCols]);
DblArray d = null;
while((d = reader.nextRow()) != null) {
// this leverage the fact that our readers not transposed never skips a line
map.increment(d);
}
catch(Exception e) {
throw new DMLCompressionException(g.getDictionary().toString());
}

def = new DblArray(g.getCommon());

}

protected SDCSchemeMC(ASDCZero g) {
Expand Down Expand Up @@ -145,8 +131,8 @@ private AMapToData encode(MatrixBlock data, ReaderColumnSelection reader, IntArr

DblArray cellVals;
ACount<DblArray> emptyIdx = map.getC(emptyRow);
IntArrayList dt = new IntArrayList();

IntArrayList dt = new IntArrayList();
int r = 0;
while((cellVals = reader.nextRow()) != null) {
final int row = reader.getCurrentRowIndex();
Expand All @@ -171,6 +157,9 @@ private AMapToData encode(MatrixBlock data, ReaderColumnSelection reader, IntArr
r++;
}
}
else {
r++;
}
}
if(emptyIdx != null) {
// empty is non default.
Expand Down Expand Up @@ -215,7 +204,7 @@ private ICLAScheme update(MatrixBlock data, ReaderColumnSelection reader, IColIn
map.increment(emptyRow, row - r);
r = row;
}
if(!cellVals.equals(def))
if(!cellVals.equals(def))
map.increment(cellVals);
}
if(!defIsEmpty) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,8 @@ private AColGroup encodeSparseT(MatrixBlock data, IColIndex columns) {
final IntArrayList dt = getCachedArray(1);

final int zeroId = map.getId(0.0);
for(int i = 0; i < data.getNumColumns(); i++) {
if(aix[apos] == i) {
for(int i = 0; i < data.getNumColumns() ; i++) {
if(apos < alen && aix[apos] == i) {
if(!Util.eq(aval[apos], def)) {
off.appendValue(i);
dt.appendValue(map.getId(aval[apos]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,25 @@
public class ReaderColumnSelectionSparse extends ReaderColumnSelection {

private final SparseBlock a;
private final DblArray empty;

protected ReaderColumnSelectionSparse(MatrixBlock data, IColIndex colIndexes, int rl, int ru) {
super(colIndexes, rl, Math.min(ru, data.getNumRows()) - 1);
a = data.getSparseBlock();
empty = new DblArray(new double[colIndexes.size()]);
}

protected final DblArray getNextRow() {
while(_rl < _ru) {
_rl++;
if(a.isEmpty(_rl))
continue; // if empty easy skip
_rl++;
if(a.isEmpty(_rl))
return empty;

final boolean zeroResult = processInRange(_rl);
final boolean zeroResult = processInRange(_rl);

if(zeroResult)
continue; // skip if no values found were in my cols

return reusableReturn;
}
return null;
if(zeroResult)
return empty; // skip if no values found were in my cols

return reusableReturn;
}

final boolean processInRange(final int r) {
Expand All @@ -81,6 +79,7 @@ else if(_colIndexes.get(skip) > aix[j])
else
reusableArr[skip++] = 0;
}

if(zeroResult)
return true; // skip if no values found were in my cols

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import org.junit.Test;

public abstract class SchemeTestBase {
protected final Log LOG = LogFactory.getLog(SchemeTestBase.class.getName());
protected static final Log LOG = LogFactory.getLog(SchemeTestBase.class.getName());

static {
CompressedMatrixBlock.debug = true;
Expand All @@ -51,7 +51,6 @@ public void testEncode() {

MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(20, src.getNumColumns(), 0, distinct, 0.9, 7));

AColGroup out = sh.encode(in);
MatrixBlock d = new MatrixBlock(in.getNumRows(), in.getNumColumns(), false);
d.allocateBlock();
Expand Down Expand Up @@ -230,6 +229,34 @@ public void testUpdateSparseT() {
TestUtils.compareMatricesBitAvgDistance(inSlice, LibMatrixReorg.transpose(d), 0, 0);
}

@Test
public void testUpdateSparseTEmptyColumn() {
MatrixBlock in = new MatrixBlock(src.getNumColumns(), 100, 0.0);
MatrixBlock b = new MatrixBlock(1, 100, 1.0);
in = in.append(b, false);
in.denseToSparse(true);
if(!in.isInSparseFormat())
throw new RuntimeException();
try {
sh.encodeT(in);
}
catch(NullPointerException e) {
// all good expected
// we want to have an exception thrown if we try to encode something that is not possible to encode.
// but we can also not have an exception thrown...
}
ICLAScheme shc = sh.clone();
shc = shc.updateT(in);

AColGroup out = shc.encodeT(in); // should be possible now.
MatrixBlock d = new MatrixBlock(in.getNumColumns(), src.getNumColumns(), false);
d.allocateBlock();
out.decompressToDenseBlock(d.getDenseBlock(), 0, in.getNumColumns());
MatrixBlock inSlice = in.slice(0, src.getNumColumns() - 1, 0, in.getNumColumns() - 1);
d.recomputeNonZeros();
TestUtils.compareMatricesBitAvgDistance(inSlice, LibMatrixReorg.transpose(d), 0, 0);
}

@Test
public void testUpdateLargeBlock() {
try {
Expand Down Expand Up @@ -422,6 +449,15 @@ public void testUpdateAndEncodeSparseT() {
testUpdateAndEncodeT(in);
}

@Test
public void testUpdateAndEncodeSparseTEmptyColumn() {
MatrixBlock in = new MatrixBlock(src.getNumColumns(), 10, 0.0);
MatrixBlock b = new MatrixBlock(1, 10, 1.0);
in = in.append(b, false);
in.denseToSparse(true);
testUpdateAndEncodeT(in);
}

@Test
public void testUpdateAndEncodeLarge() {
double newVal = distinct + 4;
Expand All @@ -441,6 +477,57 @@ public void testUpdateAndEncodeLargeT() {
testUpdateAndEncodeT(in);
}

@Test
public void testUpdateAndEncodeManyNew() {
double newVal = distinct + 300;
MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(100, src.getNumColumns(), 0, newVal, 1.0, 7));
testUpdateAndEncode(in);
}

@Test
public void testUpdateAndEncodeTManyNew() {
double newVal = distinct + 300;
MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(src.getNumColumns(), 100, 0, newVal, 1.0, 7));
testUpdateAndEncodeT(in);
}

@Test
public void testUpdateAndEncodeSparseManyNew() {
double newVal = distinct + 300;
MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(100, src.getNumColumns() + 100, 0, newVal, 0.1, 7));
testUpdateAndEncode(in);
}

@Test
public void testUpdateAndEncodeSparseTManyNew() {
double newVal = distinct + 300;
MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(src.getNumColumns(), 100, 0, newVal, 0.1, 7));
testUpdateAndEncodeT(in);
}

@Test
public void testUpdateAndEncodeLargeManyNew() {
double newVal = distinct + 300;
MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(100, src.getNumColumns(), 0, newVal, 1.0, 7));

in = ReadersTestCompareReaders.createMock(in);
testUpdateAndEncode(in);
}

@Test
public void testUpdateAndEncodeLargeTManyNew() {
double newVal = distinct + 300;
MatrixBlock in = TestUtils
.round(TestUtils.generateTestMatrixBlock(src.getNumColumns(), 100, 0, newVal, 1.0, 7));
in = ReadersTestCompareReaders.createMock(in);
testUpdateAndEncodeT(in);
}

@Test
public void testUpdateAndEncodeEmpty() {
MatrixBlock in = new MatrixBlock(100, src.getNumColumns(), 0);
Expand Down Expand Up @@ -482,10 +569,10 @@ public void testUpdateAndEncodeT(MatrixBlock in) {
try {
Pair<ICLAScheme, AColGroup> r = sh.clone().updateAndEncodeT(in);
AColGroup out = r.getValue();
MatrixBlock d = new MatrixBlock(in.getNumRows(), src.getNumColumns(), false);
MatrixBlock d = new MatrixBlock(in.getNumColumns(), src.getNumColumns(), false);
d.allocateBlock();
out.decompressToDenseBlock(d.getDenseBlock(), 0, in.getNumRows());
MatrixBlock inSlice = in.slice(0, src.getNumColumns() - 1, 0, in.getNumRows() - 1);
out.decompressToDenseBlock(d.getDenseBlock(), 0, in.getNumColumns());
MatrixBlock inSlice = in.slice(0, src.getNumColumns() - 1, 0, in.getNumColumns() - 1);
d.recomputeNonZeros();
TestUtils.compareMatricesBitAvgDistance(inSlice, LibMatrixReorg.transpose(d), 0, 0);
}
Expand Down
Loading

0 comments on commit 296137b

Please sign in to comment.