Skip to content

Commit

Permalink
Read noc translation enabled
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Feb 14, 2025
1 parent fa8c747 commit c3c11d3
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 20 deletions.
2 changes: 1 addition & 1 deletion device/api/umd/device/chip/chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Chip {
protected:
void wait_chip_to_be_ready();

virtual void wait_eth_cores_training(const uint32_t timeout_per_core_ms = 1000);
virtual void wait_eth_cores_training(const uint32_t timeout_ms = 5000);
};

} // namespace tt::umd
2 changes: 1 addition & 1 deletion device/api/umd/device/chip/local_chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ class LocalChip : public Chip {
void initialize_tlb_manager();

protected:
void wait_eth_cores_training(const uint32_t timeout_per_core_ms = 1000) override;
void wait_eth_cores_training(const uint32_t timeout_ms = 5000) override;
};
} // namespace tt::umd
5 changes: 4 additions & 1 deletion device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ void BlackholeCoordinateManager::translate_eth_coords() {
}

void BlackholeCoordinateManager::fill_eth_physical_translated_mapping() {
const size_t num_harvested_channels = CoordinateManager::get_num_harvested(harvesting_masks.eth_harvesting_mask);
size_t num_harvested_channels = CoordinateManager::get_num_harvested(harvesting_masks.eth_harvesting_mask);
if (eth_cores.size() == 0) {
num_harvested_channels = 0;
}
for (size_t eth_channel = 0; eth_channel < eth_cores.size() - num_harvested_channels; eth_channel++) {
const size_t translated_x = eth_channel + blackhole::eth_translated_coordinate_start_x;
const size_t translated_y = blackhole::eth_translated_coordinate_start_y;
Expand Down
2 changes: 1 addition & 1 deletion device/chip/chip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,6 @@ const ChipInfo& Chip::get_chip_info() { return chip_info_; }

void Chip::wait_chip_to_be_ready() { wait_eth_cores_training(); }

void Chip::wait_eth_cores_training(const uint32_t timeout_per_core) {}
void Chip::wait_eth_cores_training(const uint32_t timeout_ms) {}

} // namespace tt::umd
22 changes: 9 additions & 13 deletions device/chip/local_chip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,46 +53,42 @@ TTDevice* LocalChip::get_tt_device() { return tt_device_.get(); }

bool LocalChip::is_mmio_capable() const { return true; }

void LocalChip::wait_eth_cores_training(const uint32_t timeout_per_core_ms) {
void LocalChip::wait_eth_cores_training(const uint32_t timeout_ms) {
if (get_tt_device()->get_arch() != tt::ARCH::BLACKHOLE) {
return;
}

const std::vector<CoreCoord> eth_cores = get_soc_descriptor().get_cores(CoreType::ETH);
TTDevice* tt_device = get_tt_device();
auto start = std::chrono::system_clock::now();
for (const CoreCoord& eth_core : eth_cores) {
TTDevice* tt_device = get_tt_device();

const tt_xy_pair eth_core_pair = {eth_core.x, eth_core.y};

uint32_t postcode;
auto start = std::chrono::system_clock::now();

while (true) {
tt_device->read_from_device(&postcode, eth_core_pair, blackhole::BOOT_RESULTS_ADDR, sizeof(postcode));

if (postcode == blackhole::POSTCODE_ETH_INIT_PASS) {
return;
break;
}

if (postcode == blackhole::POSTCODE_ETH_INIT_FAIL) {
// TODO: Exception should be thrown here. ETH connections are very flaky
// on Blackhole right now. When this is fixed we can throw the exception here.
// Since we are not going to do any remote IO at the moment it is fine to just log the error.
log_error("Eth core ({}, {}) failed to initialize", eth_core_pair.x, eth_core_pair.y);
return;
break;
}

auto end = std::chrono::system_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
if (duration.count() > timeout_per_core_ms) {
if (duration.count() > timeout_ms) {
// TODO: Exception should be thrown here. ETH connections are very flaky
// on Blackhole right now. When this is fixed we can throw the exception here.
// Since we are not going to do any remote IO at the moment it is fine to just log the error.
log_error(
"Timed out after waiting {} seconds for eth core ({}, {}) to initialize",
timeout_per_core_ms,
eth_core_pair.x,
eth_core_pair.y);
return;
log_error("ETH training timed out after {} ms", timeout_ms);
break;
}
}
}
Expand Down
12 changes: 9 additions & 3 deletions device/tt_device/blackhole_tt_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,22 @@ ChipInfo BlackholeTTDevice::get_chip_info() {
// It is expected that this entry is always available.
chip_info.chip_uid.asic_location = telemetry->read_entry(blackhole::TAG_ASIC_ID);

// For now, NOC translation is disabled on all Blackhole boards.
// TODO: read this information when it becomes available.
chip_info.noc_translation_enabled = false;
const uint64_t niu_cfg_addr = 0x80050100;
uint32_t niu_cfg;
read_from_device(&niu_cfg, tt_xy_pair{8, 0}, niu_cfg_addr, sizeof(uint32_t));

chip_info.noc_translation_enabled = ((niu_cfg >> 14) & 0x1) != 0;

// It is expected that these entries are always available.
chip_info.chip_uid.board_id = ((uint64_t)telemetry->read_entry(blackhole::TAG_BOARD_ID_HIGH) << 32) |
(telemetry->read_entry(blackhole::TAG_BOARD_ID_LOW));

chip_info.board_type = get_board_type_from_board_id(chip_info.chip_uid.board_id);

if (chip_info.board_type == BoardType::P100) {
chip_info.harvesting_masks.eth_harvesting_mask = 0;
}

return chip_info;
}

Expand Down

0 comments on commit c3c11d3

Please sign in to comment.