Skip to content

Commit

Permalink
Add router_cores and get_core_at (#446)
Browse files Browse the repository at this point in the history
### Issue
Somewhat related to #439 

### Description
get_core_at can be a useful API provided by coordinate manager. There
are scenarios where we want to learn what is located at a specific core
location. This obviously can't be offered for LOGICAL coord system, but
for others it is possible.
Since there are also some places in the code which request specifically
some cores which might be router_only cores, I've also added
router_cores to CoordinateManager and SocDescriptor (see
Cluster::test_setup_interface or
Cluster::broadcast_pcie_tensix_risc_reset)

### List of the changes
- Add translate_coord_to api which doesn't know what coretype is there
- Add router cores everywhere
- Restructured constants a bit to follow tensix, dram, eth, arc, pci
ordering.
- Wrote a test to verify new behavior

### Testing
Added tests which test the new API.

### API Changes
There are no API changes in this PR.
  • Loading branch information
broskoTT authored Jan 21, 2025
1 parent ddf6425 commit 558328f
Show file tree
Hide file tree
Showing 17 changed files with 284 additions and 86 deletions.
3 changes: 2 additions & 1 deletion device/api/umd/device/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ class BlackholeCoordinateManager : public CoordinateManager {
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);
const std::vector<tt_xy_pair>& pcie_cores,
const std::vector<tt_xy_pair>& router_cores);

protected:
void assert_coordinate_manager_constructor() override;
Expand Down
61 changes: 32 additions & 29 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ enum class arc_message_type {
// DEVICE_DATA
const static tt_xy_pair TENSIX_GRID_SIZE = {14, 10};
// clang-format off
const static std::vector<tt_xy_pair> TENSIX_CORES = {{
const static std::vector<tt_xy_pair> TENSIX_CORES = {
{1, 2}, {2, 2}, {3, 2}, {4, 2}, {5, 2}, {6, 2}, {7, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2}, {15, 2}, {16, 2},
{1, 3}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {7, 3}, {10, 3}, {11, 3}, {12, 3}, {13, 3}, {14, 3}, {15, 3}, {16, 3},
{1, 4}, {2, 4}, {3, 4}, {4, 4}, {5, 4}, {6, 4}, {7, 4}, {10, 4}, {11, 4}, {12, 4}, {13, 4}, {14, 4}, {15, 4}, {16, 4},
Expand All @@ -72,29 +72,45 @@ const static std::vector<tt_xy_pair> TENSIX_CORES = {{
{1, 9}, {2, 9}, {3, 9}, {4, 9}, {5, 9}, {6, 9}, {7, 9}, {10, 9}, {11, 9}, {12, 9}, {13, 9}, {14, 9}, {15, 9}, {16, 9},
{1, 10}, {2, 10}, {3, 10}, {4, 10}, {5, 10}, {6, 10}, {7, 10}, {10, 10}, {11, 10}, {12, 10}, {13, 10}, {14, 10}, {15, 10}, {16, 10},
{1, 11}, {2, 11}, {3, 11}, {4, 11}, {5, 11}, {6, 11}, {7, 11}, {10, 11}, {11, 11}, {12, 11}, {13, 11}, {14, 11}, {15, 11}, {16, 11},
}};
};
// clang-format on

const std::size_t NUM_DRAM_BANKS = 8;
const std::size_t NUM_NOC_PORTS_PER_DRAM_BANK = 3;
static const tt_xy_pair DRAM_GRID_SIZE = {NUM_DRAM_BANKS, NUM_NOC_PORTS_PER_DRAM_BANK};
// clang-format off
static const std::vector<tt_xy_pair> DRAM_CORES = {
{
{0, 0}, {0, 1}, {0, 11},
{0, 2}, {0, 10}, {0, 3},
{0, 9}, {0, 4}, {0, 8},
{0, 5}, {0, 7}, {0, 6},
{9, 0}, {9, 1}, {9, 11},
{9, 2}, {9, 10}, {9, 3},
{9, 9}, {9, 4}, {9, 8},
{9, 5}, {9, 7}, {9, 6}}};
{0, 0}, {0, 1}, {0, 11},
{0, 2}, {0, 10}, {0, 3},
{0, 9}, {0, 4}, {0, 8},
{0, 5}, {0, 7}, {0, 6},
{9, 0}, {9, 1}, {9, 11},
{9, 2}, {9, 10}, {9, 3},
{9, 9}, {9, 4}, {9, 8},
{9, 5}, {9, 7}, {9, 6}};
// clang-format on

// TODO: DRAM locations should be deleted. We keep it for compatibility with
// the existing code in clients which rely on DRAM_LOCATIONS.
static const std::vector<tt_xy_pair> DRAM_LOCATIONS = DRAM_CORES;

static const tt_xy_pair ETH_GRID_SIZE = {14, 1};
static const std::vector<tt_xy_pair> ETH_CORES = {
{1, 1},
{2, 1},
{3, 1},
{4, 1},
{5, 1},
{6, 1},
{7, 1},
{10, 1},
{11, 1},
{12, 1},
{13, 1},
{14, 1},
{15, 1},
{16, 1}};
static const std::vector<tt_xy_pair> ETH_LOCATIONS = ETH_CORES;

static const tt_xy_pair ARC_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> ARC_CORES = {{8, 0}};
static const std::vector<tt_xy_pair> ARC_LOCATIONS = ARC_CORES;
Expand All @@ -104,23 +120,10 @@ static const std::vector<tt_xy_pair> PCIE_CORES_TYPE2 = {{{2, 0}}};
static const std::vector<tt_xy_pair> PCI_LOCATIONS = PCIE_CORES_TYPE2;
static const std::vector<tt_xy_pair> PCIE_CORES_TYPE1 = {{{11, 0}}};

static const tt_xy_pair ETH_GRID_SIZE = {14, 1};
static const std::vector<tt_xy_pair> ETH_CORES = {
{{1, 1},
{2, 1},
{3, 1},
{4, 1},
{5, 1},
{6, 1},
{7, 1},
{10, 1},
{11, 1},
{12, 1},
{13, 1},
{14, 1},
{15, 1},
{16, 1}}};
static const std::vector<tt_xy_pair> ETH_LOCATIONS = ETH_CORES;
static const std::vector<tt_xy_pair> ROUTER_CORES = {
{1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}, {7, 0}, {10, 0}, {12, 0}, {13, 0}, {14, 0}, {15, 0},
{16, 0}, {8, 1}, {8, 2}, {8, 3}, {8, 4}, {8, 5}, {8, 6}, {8, 7}, {8, 8}, {8, 9}, {8, 10}, {8, 11}};

// Return to std::array instead of std::vector once we get std::span support in C++20
static const std::vector<uint32_t> T6_X_LOCATIONS = {1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16};
static const std::vector<uint32_t> T6_Y_LOCATIONS = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Expand Down
30 changes: 20 additions & 10 deletions device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

class CoordinateManager {
public:
CoordinateManager(CoordinateManager& other) = default;
virtual ~CoordinateManager() = default;

/*
* Creates a Coordinate Manager object.
* Board type and is_chip_remote are used only for Blackhole, since PCIe cores are different
Expand All @@ -37,7 +40,8 @@ class CoordinateManager {
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);
const std::vector<tt_xy_pair>& pcie_cores,
const std::vector<tt_xy_pair>& router_cores);

static std::shared_ptr<CoordinateManager> create_coordinate_manager(
tt::ARCH arch,
Expand All @@ -49,7 +53,6 @@ class CoordinateManager {
const bool is_chip_remote = false);

static size_t get_num_harvested(const size_t harvesting_mask);

static std::vector<size_t> get_harvested_indices(const size_t harvesting_mask);

// Harvesting mask is reported by hardware in the order of physical layout. This function returns a more suitable
Expand All @@ -59,22 +62,18 @@ class CoordinateManager {
static uint32_t shuffle_tensix_harvesting_mask_to_noc0_coords(
tt::ARCH arch, uint32_t tensix_harvesting_logical_layout);

CoordinateManager(CoordinateManager& other) = default;

tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;
tt::umd::CoreCoord translate_coord_to(
const tt_xy_pair core, const CoordSystem input_coord_system, const CoordSystem target_coord_system) const;

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
tt_xy_pair get_grid_size(const CoreType core_type) const;

std::vector<tt::umd::CoreCoord> get_harvested_cores(const CoreType core_type) const;
tt_xy_pair get_harvested_grid_size(const CoreType core_type) const;

virtual ~CoordinateManager() = default;

size_t get_tensix_harvesting_mask() const;

size_t get_dram_harvesting_mask() const;

size_t get_eth_harvesting_mask() const;

private:
Expand All @@ -88,6 +87,7 @@ class CoordinateManager {
* returned from create-ethernet-map, so each bit is responsible for one row of the actual physical
* row of the tensix cores on the chip. Harvesting mask is shuffled in constructor to match the NOC
* layout of the tensix cores.
* Router cores don't have a grid size, since they are not layed out in a regular fashion.
*/
CoordinateManager(
const bool noc_translation_enabled,
Expand All @@ -103,7 +103,8 @@ class CoordinateManager {
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);
const std::vector<tt_xy_pair>& pcie_cores,
const std::vector<tt_xy_pair>& router_cores);

void initialize();

Expand All @@ -114,6 +115,7 @@ class CoordinateManager {
virtual void translate_eth_coords();
virtual void translate_arc_coords();
virtual void translate_pcie_coords();
virtual void translate_router_coords();

void identity_map_physical_cores();
void add_core_translation(const tt::umd::CoreCoord& core_coord, const tt_xy_pair& physical_pair);
Expand Down Expand Up @@ -185,8 +187,13 @@ class CoordinateManager {
*/
virtual void fill_arc_physical_translated_mapping() = 0;

// Maps full CoreCoord from any CoordSystem to physical coordinates.
std::map<tt::umd::CoreCoord, tt_xy_pair> to_physical_map;
// Maps physical coordinates given a target CoordSystem to full CoreCoord.
std::map<std::pair<tt_xy_pair, CoordSystem>, tt::umd::CoreCoord> from_physical_map;
// Maps coordinates in the designated CoordSystem to a full CoreCoord at that location holding the right CoreType.
// Doesn't include logical CoordSystem.
std::map<std::pair<tt_xy_pair, CoordSystem>, tt::umd::CoreCoord> to_core_type_map;

// Whether NOC translation is enabled on chip.
// This flag affects how Translated coords are calculated. If translation is enabled on the chip, than we can
Expand All @@ -212,4 +219,7 @@ class CoordinateManager {

tt_xy_pair pcie_grid_size;
const std::vector<tt_xy_pair> pcie_cores;

// Router cores don't have a grid size, since they are not layed out in a regular fashion.
const std::vector<tt_xy_pair> router_cores;
};
3 changes: 2 additions & 1 deletion device/api/umd/device/grayskull_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class GrayskullCoordinateManager : public CoordinateManager {
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);
const std::vector<tt_xy_pair>& pcie_cores,
const std::vector<tt_xy_pair>& router_cores);

protected:
void fill_tensix_physical_translated_mapping() override;
Expand Down
24 changes: 15 additions & 9 deletions device/api/umd/device/grayskull_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ enum class arc_message_type {

// DEVICE_DATA
static const tt_xy_pair TENSIX_GRID_SIZE = {12, 10};
static const std::vector<tt_xy_pair> TENSIX_CORES = {{
static const std::vector<tt_xy_pair> TENSIX_CORES = {
{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}, {6, 1}, {7, 1}, {8, 1}, {9, 1}, {10, 1}, {11, 1}, {12, 1},
{1, 2}, {2, 2}, {3, 2}, {4, 2}, {5, 2}, {6, 2}, {7, 2}, {8, 2}, {9, 2}, {10, 2}, {11, 2}, {12, 2},
{1, 3}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {7, 3}, {8, 3}, {9, 3}, {10, 3}, {11, 3}, {12, 3},
Expand All @@ -116,26 +116,32 @@ static const std::vector<tt_xy_pair> TENSIX_CORES = {{
{1, 9}, {2, 9}, {3, 9}, {4, 9}, {5, 9}, {6, 9}, {7, 9}, {8, 9}, {9, 9}, {10, 9}, {11, 9}, {12, 9},
{1, 10}, {2, 10}, {3, 10}, {4, 10}, {5, 10}, {6, 10}, {7, 10}, {8, 10}, {9, 10}, {10, 10}, {11, 10}, {12, 10},
{1, 11}, {2, 11}, {3, 11}, {4, 11}, {5, 11}, {6, 11}, {7, 11}, {8, 11}, {9, 11}, {10, 11}, {11, 11}, {12, 11},
}};
};

const std::size_t NUM_DRAM_BANKS = 8;
const std::size_t NUM_NOC_PORTS_PER_DRAM_BANK = 1;
static const tt_xy_pair DRAM_GRID_SIZE = {NUM_DRAM_BANKS, NUM_NOC_PORTS_PER_DRAM_BANK};
static const std::vector<tt_xy_pair> DRAM_CORES = {{{1, 0}, {1, 6}, {4, 0}, {4, 6}, {7, 0}, {7, 6}, {10, 0}, {10, 6}}};

static const tt_xy_pair ETH_GRID_SIZE = {0, 0};
static const std::vector<tt_xy_pair> ETH_CORES = {};
static const std::vector<tt_xy_pair> DRAM_CORES = {{1, 0}, {1, 6}, {4, 0}, {4, 6}, {7, 0}, {7, 6}, {10, 0}, {10, 6}};
// TODO: DRAM locations should be deleted. We keep it for compatibility with
// the existing code in clients which rely on DRAM_LOCATIONS.
static const std::vector<tt_xy_pair> DRAM_LOCATIONS = DRAM_CORES;

static const tt_xy_pair ETH_GRID_SIZE = {0, 0};
static const std::vector<tt_xy_pair> ETH_CORES = {};
static const std::array<xy_pair, 0> ETH_LOCATIONS = {};

static const tt_xy_pair ARC_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> ARC_CORES = {{{0, 2}}};
static const std::vector<tt_xy_pair> ARC_CORES = {{0, 2}};
static const std::vector<tt_xy_pair> ARC_LOCATIONS = ARC_CORES;

static const tt_xy_pair PCIE_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> PCIE_CORES = {{{0, 4}}};
static const std::vector<tt_xy_pair> PCIE_CORES = {{0, 4}};
static const std::vector<tt_xy_pair> PCI_LOCATIONS = PCIE_CORES;
static const std::array<xy_pair, 0> ETH_LOCATIONS = {};

static const std::vector<tt_xy_pair> ROUTER_CORES = {
{0, 0}, {0, 11}, {0, 1}, {0, 10}, {0, 9}, {0, 3}, {0, 8}, {0, 7}, {0, 5}, {0, 6}, {12, 0}, {11, 0}, {2, 0},
{3, 0}, {9, 0}, {8, 0}, {5, 0}, {6, 0}, {12, 6}, {11, 6}, {2, 6}, {3, 6}, {9, 6}, {8, 6}, {5, 6}, {6, 6}};

// Return to std::array instead of std::vector once we get std::span support in C++20
static const std::vector<uint32_t> T6_X_LOCATIONS = {12, 1, 11, 2, 10, 3, 9, 4, 8, 5, 7, 6};
static const std::vector<uint32_t> T6_Y_LOCATIONS = {11, 1, 10, 2, 9, 3, 8, 4, 7, 5};
Expand Down
50 changes: 50 additions & 0 deletions device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,48 @@ enum class CoordSystem : std::uint8_t {
TRANSLATED,
};

static inline std::string to_str(const CoreType core_type) {
switch (core_type) {
case CoreType::ARC:
return "ARC";
case CoreType::DRAM:
return "DRAM";
case CoreType::ACTIVE_ETH:
return "ACTIVE_ETH";
case CoreType::IDLE_ETH:
return "IDLE_ETH";
case CoreType::PCIE:
return "PCIE";
case CoreType::TENSIX:
return "TENSIX";
case CoreType::ROUTER_ONLY:
return "ROUTER_ONLY";
case CoreType::HARVESTED:
return "HARVESTED";
case CoreType::ETH:
return "ETH";
case CoreType::WORKER:
return "WORKER";
default:
return "UNKNOWN";
}
}

static inline std::string to_str(const CoordSystem coord_system) {
switch (coord_system) {
case CoordSystem::LOGICAL:
return "LOGICAL";
case CoordSystem::PHYSICAL:
return "PHYSICAL";
case CoordSystem::VIRTUAL:
return "VIRTUAL";
case CoordSystem::TRANSLATED:
return "TRANSLATED";
default:
return "UNKNOWN";
}
}

namespace tt::umd {

struct CoreCoord : public tt_xy_pair {
Expand All @@ -51,6 +93,9 @@ struct CoreCoord : public tt_xy_pair {
CoreCoord(const size_t x, const size_t y, const CoreType type, const CoordSystem coord_system) :
tt_xy_pair(x, y), core_type(type), coord_system(coord_system) {}

CoreCoord(const tt_xy_pair core, const CoreType type, const CoordSystem coord_system) :
tt_xy_pair(core), core_type(type), coord_system(coord_system) {}

CoreType core_type;
CoordSystem coord_system;

Expand Down Expand Up @@ -80,6 +125,11 @@ struct CoreCoord : public tt_xy_pair {
}
return coord_system < o.coord_system;
}

std::string to_str() const {
return "CoreCoord: (" + std::to_string(x) + ", " + std::to_string(y) + ", " + ::to_str(core_type) + ", " +
::to_str(coord_system) + ")";
}
};

} // namespace tt::umd
Expand Down
1 change: 1 addition & 0 deletions device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class tt_SocDescriptor {
std::unordered_map<tt_xy_pair, int> ethernet_core_channel_map;
std::vector<std::size_t> trisc_sizes; // Most of software stack assumes same trisc size for whole chip..
std::string device_descriptor_file_path = std::string("");
std::vector<tt_xy_pair> router_cores;

int overlay_version;
int unpacker_version;
Expand Down
3 changes: 2 additions & 1 deletion device/api/umd/device/wormhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ class WormholeCoordinateManager : public CoordinateManager {
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);
const std::vector<tt_xy_pair>& pcie_cores,
const std::vector<tt_xy_pair>& router_cores);

protected:
void fill_tensix_physical_translated_mapping() override;
Expand Down
Loading

0 comments on commit 558328f

Please sign in to comment.