From 3d36c42505bbc17af054b8b411464d6a9a456bf5 Mon Sep 17 00:00:00 2001 From: Gregory Jun Date: Sun, 15 Dec 2024 19:40:19 -0600 Subject: [PATCH] minimal example --- .../basic/passthrough_kernel/Makefile | 2 +- .../passthrough_kernel_alt.py | 68 ++++++++++++++++--- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/programming_examples/basic/passthrough_kernel/Makefile b/programming_examples/basic/passthrough_kernel/Makefile index 721a0cac94..393494882a 100755 --- a/programming_examples/basic/passthrough_kernel/Makefile +++ b/programming_examples/basic/passthrough_kernel/Makefile @@ -20,7 +20,7 @@ trace_size = 8192 PASSTHROUGH_SIZE = ${data_size} aie_py_src=${targetname}.py -use_alt?=0 +use_alt?=1 ifeq (${use_alt}, 1) aie_py_src=${targetname}_alt.py diff --git a/programming_examples/basic/passthrough_kernel/passthrough_kernel_alt.py b/programming_examples/basic/passthrough_kernel/passthrough_kernel_alt.py index 2f4260ea8a..fefa5980d2 100644 --- a/programming_examples/basic/passthrough_kernel/passthrough_kernel_alt.py +++ b/programming_examples/basic/passthrough_kernel/passthrough_kernel_alt.py @@ -33,27 +33,75 @@ def device_body(): # Tile declarations ShimTile = tile(0, 0) + MemTile = tile(0, 1) ComputeTile2 = tile(0, 2) # Set up a circuit-switched flow from core to shim for tracing information if trace_size > 0: flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) - + # AIE-array data movement with object fifos - of_in = object_fifo("in", ShimTile, ComputeTile2, 2, line_ty) - of_out = object_fifo("out", ComputeTile2, ShimTile, 2, line_ty) + # Input + # Origianal code + # of_in = object _fifo("in", ShimTile, ComputeTile2, 2, line_ty) + # of_out = object_fifo("out", ComputeTile2, ShimTile, 2, line_ty) + +################################################################################################ + + # Passes if Object FIFO size is the same + # of_in0 = object_fifo("in0", ShimTile, MemTile, 2, line_ty) + # of_in1 = object_fifo("in1", MemTile, ComputeTile2, 2, line_ty, + # ( + # [ + # (2, 512), + # (512, 1), + # ] + # ), + # ) + # object_fifo_link(of_in0, of_in1) + +################################################################################################ + + # Does not complete and times out if Object FIFO size is different and there is transforms + of_in0 = object_fifo("in0", ShimTile, MemTile, 2, vector_ty) + of_in1 = object_fifo("in1", MemTile, ComputeTile2, 2, line_ty, + ( + [ + # comment to make it work + (8, 512), # 8 to account for different fifo sizes + (512, 1), + ] + ), + ) + object_fifo_link(of_in0, of_in1) + + # After Stateful Transform + # ^bb4: // 2 preds: ^bb3, ^bb5 + # aie.use_lock(%in0_cons_cons_lock, AcquireGreaterEqual, 1) + # aie.dma_bd(%in0_cons_buff_0 : memref<4096xui8>, 0, 1024, [, ]) + # aie.use_lock(%in0_cons_prod_lock, Release, 1) + # aie.next_bd ^bb5 + + # As can be seen with streaming transformation memref and 1024 do not align. Commenting out the transformation alings this + # so that "aie.dma_bd(%in0_cons_buff_0 : memref<4096xui8>, 0, 4096)" and functions correctly just for the passthough kernel + +################################################################################################ + + # Output + of_out0 = object_fifo("out0", MemTile, ShimTile, 2, line_ty) + of_out1 = object_fifo("out1", ComputeTile2, MemTile, 2, line_ty) + object_fifo_link(of_out1, of_out0) # Set up compute tiles - # Compute tile 2 @core(ComputeTile2, "passThrough.cc.o") def core_body(): for _ in range_(sys.maxsize): - elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) - elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + elemOut = of_out1.acquire(ObjectFifoPort.Produce, 1) + elemIn = of_in1.acquire(ObjectFifoPort.Consume, 1) passThroughLine(elemIn, elemOut, lineWidthInBytes) - of_in.release(ObjectFifoPort.Consume, 1) - of_out.release(ObjectFifoPort.Produce, 1) + of_in1.release(ObjectFifoPort.Consume, 1) + of_out1.release(ObjectFifoPort.Produce, 1) # print(ctx.module.operation.verify()) @@ -68,10 +116,10 @@ def sequence(inTensor, outTensor, notUsed): offset=N, ) in_task = shim_dma_single_bd_task( - of_in, inTensor, sizes=[1, 1, 1, N], issue_token=True + of_in0, inTensor, sizes=[1, 1, 1, N], issue_token=True ) out_task = shim_dma_single_bd_task( - of_out, outTensor, sizes=[1, 1, 1, N], issue_token=True + of_out0, outTensor, sizes=[1, 1, 1, N], issue_token=True ) dma_start_task(in_task, out_task)