Skip to content

Commit

Permalink
Draft IRON implementation for init values
Browse files Browse the repository at this point in the history
  • Loading branch information
abisca committed Oct 31, 2024
1 parent d4db482 commit acf2e52
Show file tree
Hide file tree
Showing 4 changed files with 285 additions and 0 deletions.
4 changes: 4 additions & 0 deletions python/dialects/aie.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ def __init__(
via_DMA=None,
plio=None,
disable_synchronization=None,
initValues=None,
):
self.datatype = try_convert_np_type_to_mlir_type(datatype)
if not isinstance(consumerTiles, List):
Expand All @@ -387,6 +388,8 @@ def __init__(
dimensionsFromStreamPerConsumer = []
if dimensionsToStream is None:
dimensionsToStream = []
if initValues is None:
initValues = []
of_Ty = TypeAttr.get(ObjectFifoType.get(self.datatype))
super().__init__(
sym_name=name,
Expand All @@ -399,6 +402,7 @@ def __init__(
via_DMA=via_DMA,
plio=plio,
disable_synchronization=disable_synchronization,
initValues=initValues,
)

def acquire(self, port, num_elem):
Expand Down
77 changes: 77 additions & 0 deletions test/npu-xrt/memtile_repeat/init_values_repeat/aie2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 AMD Inc.

# REQUIRES: ryzen_ai, valid_xchess_license
#
# RUN: %python %S/aie2.py 4 > ./aie2.mlir
# RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir
# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags
# RUN: %run_on_npu ./test.exe -x final.xclbin -i insts.txt -k MLIR_AIE -l 4 | FileCheck %s
# CHECK: PASS!
import numpy as np
import sys

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.extras.context import mlir_mod_ctx

N = 2
depth = 2
dev = AIEDevice.npu1_1col
col = 0
#memtile_repeat_count = 2

if len(sys.argv) > 1:
N = int(sys.argv[1])
data_out_size = N * depth #* (memtile_repeat_count + 1)

if len(sys.argv) > 2:
if sys.argv[2] == "npu":
dev = AIEDevice.npu1_1col
elif sys.argv[2] == "xcvc1902":
dev = AIEDevice.xcvc1902
else:
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[2]))

if len(sys.argv) > 3:
col = int(sys.argv[3])


def init_repeat():
with mlir_mod_ctx() as ctx:

@device(dev)
def device_body():
tensor_ty = np.ndarray[(N,), np.dtype[np.int32]]
tensor_out_ty = np.ndarray[(data_out_size,), np.dtype[np.int32]]

# Tile declarations
ShimTile = tile(col, 0)
MemTile = tile(col, 1)
ComputeTile = tile(col, 2)

# AIE-array data movement with object fifos
of_in = object_fifo("in", MemTile, ComputeTile, 2, tensor_ty)
of_out = object_fifo("out", ComputeTile, ShimTile, 2, tensor_ty)
#of_in.set_memtile_repeat(memtile_repeat_count)
object_fifo_link(of_in, of_out)

# To/from AIE-array data movement
@runtime_sequence(tensor_ty, tensor_ty, tensor_out_ty)
def sequence(A, B, C):
npu_dma_memcpy_nd(
metadata=of_out,
bd_id=0,
mem=C,
sizes=[1, 1, 1, data_out_size],
)
dma_wait(of_out)

print(ctx.module)


init_repeat()
195 changes: 195 additions & 0 deletions test/npu-xrt/memtile_repeat/init_values_repeat/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
//===- test.cpp -------------------------------------------000---*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//

#include <boost/program_options.hpp>
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

#include "xrt/xrt_bo.h"
#include "xrt/xrt_device.h"
#include "xrt/xrt_kernel.h"

namespace po = boost::program_options;

void check_arg_file_exists(po::variables_map &vm_in, std::string name) {
if (!vm_in.count(name)) {
throw std::runtime_error("Error: no " + name + " file was provided\n");
} else {
std::ifstream test(vm_in[name].as<std::string>());
if (!test) {
throw std::runtime_error("The " + name + " file " +
vm_in[name].as<std::string>() +
" does not exist.\n");
}
}
}

std::vector<uint32_t> load_instr_sequence(std::string instr_path) {
std::ifstream instr_file(instr_path);
std::string line;
std::vector<uint32_t> instr_v;
while (std::getline(instr_file, line)) {
std::istringstream iss(line);
uint32_t a;
if (!(iss >> std::hex >> a)) {
throw std::runtime_error("Unable to parse instruction file\n");
}
instr_v.push_back(a);
}
return instr_v;
}

int main(int argc, const char *argv[]) {
// Program arguments parsing
po::options_description desc("Allowed options");
desc.add_options()("help,h", "produce help message")(
"xclbin,x", po::value<std::string>()->required(),
"the input xclbin path")(
"kernel,k", po::value<std::string>()->required(),
"the kernel name in the XCLBIN (for instance PP_PRE_FD)")(
"verbosity,v", po::value<int>()->default_value(0),
"the verbosity of the output")(
"instr,i", po::value<std::string>()->required(),
"path of file containing userspace instructions to be sent to the LX6")(
"length,l", po::value<int>()->default_value(4096),
"the length of the transfer in int32_t")(
"repeat,r", po::value<int>()->default_value(3),
"the memtile repeat count");
po::variables_map vm;

try {
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);

if (vm.count("help")) {
std::cout << desc << std::endl;
return 1;
}
} catch (const std::exception &ex) {
std::cerr << ex.what() << "\n\n";
std::cerr << "Usage:\n" << desc << std::endl;
return 1;
}

check_arg_file_exists(vm, "xclbin");
check_arg_file_exists(vm, "instr");

std::vector<uint32_t> instr_v =
load_instr_sequence(vm["instr"].as<std::string>());

int verbosity = vm["verbosity"].as<int>();
if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << std::endl;

int N = vm["length"].as<int>();
int repeat_count = vm["repeat"].as<int>();

// Start the XRT test code
// Get a device handle
unsigned int device_index = 0;
auto device = xrt::device(device_index);

// Load the xclbin
if (verbosity >= 1)
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>()
<< std::endl;
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>());

if (verbosity >= 1)
std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>()
<< std::endl;
std::string Node = vm["kernel"].as<std::string>();

// Get the kernel from the xclbin
auto xkernels = xclbin.get_kernels();
auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(),
[Node](xrt::xclbin::kernel &k) {
auto name = k.get_name();
std::cout << "Name: " << name << std::endl;
return name.rfind(Node, 0) == 0;
});
auto kernelName = xkernel.get_name();

if (verbosity >= 1)
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>()
<< "\n";

device.register_xclbin(xclbin);

// get a hardware context
if (verbosity >= 1)
std::cout << "Getting hardware context." << std::endl;
xrt::hw_context context(device, xclbin.get_uuid());

// get a kernel handle
if (verbosity >= 1)
std::cout << "Getting handle to kernel:" << kernelName << std::endl;
auto kernel = xrt::kernel(context, kernelName);

auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
auto bo_inA = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(3));
auto bo_inB = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(4));
auto bo_out = xrt::bo(device, N /** (repeat_count + 1)*/ * sizeof(int32_t),
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects." << std::endl;

// int32_t *bufInA = bo_inA.map<int32_t *>();
// std::vector<uint32_t> srcVecA;
// for (int i = 0; i < N; i++)
// srcVecA.push_back(i + 1);
// memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t)));

void *bufInstr = bo_instr.map<void *>();
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));

bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);

if (verbosity >= 1)
std::cout << "Running Kernel." << std::endl;
unsigned int opcode = 3;
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
run.wait();

bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

uint32_t *bufOut = bo_out.map<uint32_t *>();

int errors = 0;
for (uint32_t i = 0; i < N /** (repeat_count + 1)*/; i++) {
uint32_t ref = i + 1;
if (*(bufOut + i) != ref) {
std::cout << "error at index[" << i << "]: expected " << ref << " got "
<< *(bufOut + i) << std::endl;
errors++;
}
}

if (!errors) {
std::cout << std::endl << "PASS!" << std::endl << std::endl;
return 0;
} else {
std::cout << std::endl
<< errors << " mismatches." << std::endl
<< std::endl;
std::cout << std::endl << "fail." << std::endl << std::endl;
return 1;
}
}
9 changes: 9 additions & 0 deletions test/python/objFifo.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ def objFifo_example():
of2 = object_fifo("of2", T_, C_, 2, np.ndarray[(256,), np.dtype[np.int32]])
of2.set_via_shared_mem(ObjectFifoPort.Consume)

of3 = object_fifo(
"of3",
M,
C_,
2,
np.ndarray[(2,), np.dtype[np.int32]],
initValues = [],
)

C = Core(T_)
bb = Block.create_at_start(C.body)
with InsertionPoint(bb):
Expand Down

0 comments on commit acf2e52

Please sign in to comment.