Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tpetra: use GIDs to convert from point matrix to block matrix #12914

Merged
merged 2 commits into from
Apr 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 69 additions & 26 deletions packages/tpetra/core/src/Tpetra_BlockCrsMatrix_Helpers_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ namespace Tpetra {
RCP<const map_type> meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap);
if(meshColMap.is_null()) throw std::runtime_error("ERROR: Cannot create mesh colmap");

auto localMeshColMap = meshColMap->getLocalMap();
auto localPointColMap = pointColMap.getLocalMap();
auto localPointRowMap = pointRowMap.getLocalMap();

const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
RCP<const map_type> meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap);

Expand Down Expand Up @@ -362,9 +366,22 @@ namespace Tpetra {
blockRowptr(i) = offset_b;

const LO offset_p = pointRowptr(i*blockSize);
const LO offset_p_max = pointRowptr((i+1)*blockSize);

LO filled_block = 0;
for (LO p_i=0; p_i<offset_p_max-offset_p; ++p_i) {
auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i))/blockSize;
auto lcol_GID = localMeshColMap.getLocalElement(bcol_GID);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is lcol_GIDa local ID? If so, would it be better to call it lcol_LID?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it was supposed to be bcol_LID good catch!


for (LO k=0; k<offset_b_max-offset_b; ++k) {
blockColind(offset_b + k) = pointColind(offset_p + k * blockSize)/blockSize;
bool visited = false;
for (LO k=0; k<filled_block; ++k) {
if (blockColind(offset_b + k) == lcol_GID)
visited = true;
}
if (!visited) {
blockColind(offset_b + filled_block) = lcol_GID;
++filled_block;
}
}
});

Expand Down Expand Up @@ -411,35 +428,61 @@ namespace Tpetra {
const offset_type bs2 = blockSize * blockSize;

auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize);

auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
auto localPointColMap = pointMatrix.getColMap()->getLocalMap();

{
TEUCHOS_FUNC_TIME_MONITOR("Tpetra::convertToBlockCrsMatrix::fillBlockCrsMatrix");
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;

Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
const offset_type blkBeg = blockRowptr[i];
const offset_type numBlocks = blockRowptr[i+1] - blkBeg;

// For each block in the row...
for (offset_type block=0; block < numBlocks; block++) {

// For each entry in the block...
for(LO little_row=0; little_row<blockSize; little_row++) {
offset_type point_row_offset = pointRowptr[i*blockSize + little_row];
for(LO little_col=0; little_col<blockSize; little_col++) {
blockValues((blkBeg+block) * bs2 + little_row * blockSize + little_col) =
pointValues[point_row_offset + block*blockSize + little_col];
}
}
{
auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
auto pointRowptr = pointLocalGraph.row_map;
auto pointColind = pointLocalGraph.entries;

}
offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
auto blockColind = meshCrsGraph->getLocalGraphDevice().entries;

row_map_type pointGColind("pointGColind", pointColind.extent(0));

Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i));
});

row_map_type blockGColind("blockGColind", blockColind.extent(0));

Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i));
});

Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
const offset_type blkBeg = blockRowptr[i];
const offset_type numBlocks = blockRowptr[i+1] - blkBeg;

for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) {

offset_type block_inv=static_cast<offset_type>(-1);
offset_type little_col_inv=static_cast<offset_type>(-1);
for (offset_type block_2=0; block_2 < numBlocks; block_2++) {
for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) {
if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) {
block_inv = block_2;
little_col_inv = little_col_2;
break;
}
}
if (block_inv!=static_cast<offset_type>(-1))
break;
}

for(LO little_row=0; little_row<blockSize; little_row++) {
blockValues((blkBeg+block_inv) * bs2 + little_row * blockSize + little_col_inv) = pointValues[pointRowptr[i*blockSize+little_row] + point_i];
}
}
});
}
blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
Kokkos::DefaultExecutionSpace().fence();
}
Expand Down
Loading