bespoke-silicon-group · dpetrisko · Nov 10, 2022 · Nov 10, 2022 · Jan 16, 2023 · Jan 19, 2023
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,4 @@ __pycache__
 machines/*/V*
 machines/*/obj_dir/
 xcelium.d/
+*.vpd
diff --git a/examples/cuda/test_host_stream/Makefile b/examples/cuda/test_host_stream/Makefile
@@ -0,0 +1,124 @@
+# Copyright (c) 2021, University of Washington All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# Redistributions of source code must retain the above copyright notice, this list
+# of conditions and the following disclaimer.
+#
+# Redistributions in binary form must reproduce the above copyright notice, this
+# list of conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.
+#
+# Neither the name of the copyright holder nor the names of its contributors may
+# be used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This Makefile compiles, links, and executes examples Run `make help`
+# to see the available targets for the selected platform.
+
+################################################################################
+# environment.mk verifies the build environment and sets the following
+# makefile variables:
+#
+# LIBRAIRES_PATH: The path to the libraries directory
+# HARDWARE_PATH: The path to the hardware directory
+# EXAMPLES_PATH: The path to the examples directory
+# BASEJUMP_STL_DIR: Path to a clone of BaseJump STL
+# BSG_MANYCORE_DIR: Path to a clone of BSG Manycore
+###############################################################################
+
+REPLICANT_PATH:=$(shell git rev-parse --show-toplevel)
+
+include $(REPLICANT_PATH)/environment.mk
+SPMD_SRC_PATH = $(BSG_MANYCORE_DIR)/software/spmd
+
+# KERNEL_NAME is the name of the CUDA-Lite Kernel
+KERNEL_NAME = kernel_host_stream
+
+###############################################################################
+# Host code compilation flags and flow
+###############################################################################
+
+# TEST_SOURCES is a list of source files that need to be compiled
+TEST_SOURCES = main.c
+
+DEFINES += -D_XOPEN_SOURCE=500 -D_BSD_SOURCE -D_DEFAULT_SOURCE
+CDEFINES += 
+CXXDEFINES += 
+
+FLAGS     = -g -Wall -Wno-unused-function -Wno-unused-variable
+CFLAGS   += -std=c99 $(FLAGS)
+CXXFLAGS += -std=c++11 $(FLAGS)
+
+# compilation.mk defines rules for compilation of C/C++
+include $(EXAMPLES_PATH)/compilation.mk
+
+###############################################################################
+# Host code link flags and flow
+###############################################################################
+
+
+
+# link.mk defines rules for linking of the final execution binary.
+include $(EXAMPLES_PATH)/link.mk
+
+###############################################################################
+# Device code compilation flow
+###############################################################################
+
+# BSG_MANYCORE_KERNELS is a list of manycore executables that should
+# be built before executing.
+BSG_MANYCORE_KERNELS = kernel.riscv
+
+# Tile Group Dimensions
+TILE_GROUP_DIM_X = 4
+TILE_GROUP_DIM_Y = 1
+
+kernel.riscv: kernel.rvo
+
+RISCV_DEFINES += -Dbsg_tiles_X=$(TILE_GROUP_DIM_X)
+RISCV_DEFINES += -Dbsg_tiles_Y=$(TILE_GROUP_DIM_Y)
+
+include $(EXAMPLES_PATH)/cuda/riscv.mk
+
+###############################################################################
+# Execution flow
+#
+# C_ARGS: Use this to pass arguments that you want to appear in argv
+#         For SPMD tests C arguments are: <Path to RISC-V Binary> <Test Name>
+#
+# SIM_ARGS: Use this to pass arguments to the simulator
+###############################################################################
+C_ARGS ?= $(BSG_MANYCORE_KERNELS) $(KERNEL_NAME)
+
+SIM_ARGS ?=
+
+# Include platform-specific execution rules
+include $(EXAMPLES_PATH)/execution.mk
+
+###############################################################################
+# Regression Flow
+###############################################################################
+
+regression: exec.log
+	@grep "BSG REGRESSION TEST .*PASSED.*" $< > /dev/null
+
+.DEFAULT_GOAL := help
+
+.PHONY: clean
+
+clean:
+	rm -rf *.ld
+
diff --git a/examples/cuda/test_host_stream/kernel.cpp b/examples/cuda/test_host_stream/kernel.cpp
@@ -0,0 +1,32 @@
+//This kernel performs a barrier among all tiles in tile group 
+
+#include "bsg_manycore.h"
+#include "bsg_set_tile_x_y.h"
+#include "bsg_manycore_spsc_queue.hpp"
+
+#define BUFFER_ELS  10
+#define CHAIN_LEN    4
+#define NUM_PACKETS 100
+
+extern "C" __attribute__ ((noinline))
+int kernel_host_stream(int *buffer_chain, int *buffer_count)
+{
+    int *recv_buffer = &buffer_chain[0] + (__bsg_id * BUFFER_ELS);
+    int *recv_count  = &buffer_count[0] + (__bsg_id);
+
+    int *send_buffer = &buffer_chain[0] + ((__bsg_id+1) * BUFFER_ELS);
+    int *send_count = &buffer_count[0] + (__bsg_id+1);
+
+    bsg_manycore_spsc_queue_recv<int, BUFFER_ELS> recv_spsc(recv_buffer, recv_count);
+    bsg_manycore_spsc_queue_send<int, BUFFER_ELS> send_spsc(send_buffer, send_count);
+
+    int data;
+    while(1)
+    {
+        data = recv_spsc.recv();
+        send_spsc.send(data);
+    }
+
+	return 0;
+}
+
diff --git a/examples/cuda/test_host_stream/main.cpp b/examples/cuda/test_host_stream/main.cpp
@@ -0,0 +1,165 @@
+// Copyright (c) 2019, University of Washington All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+// 
+// Redistributions of source code must retain the above copyright notice, this list
+// of conditions and the following disclaimer.
+// 
+// Redistributions in binary form must reproduce the above copyright notice, this
+// list of conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.
+// 
+// Neither the name of the copyright holder nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <bsg_manycore_tile.h>
+#include <bsg_manycore_errno.h>
+#include <bsg_manycore_tile.h>
+#include <bsg_manycore_loader.h>
+#include <bsg_manycore_cuda.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <bsg_manycore_regression.h>
+#include <bsg_manycore_responder.h>
+#include <algorithm>
+#include <vector>
+#include <bsg_manycore_spsc_queue.hpp>
+
+#define ALLOC_NAME "default_allocator"
+#define TEST_BYTE 0xcd
+
+#define BUFFER_ELS  10
+#define CHAIN_LEN    4
+#define NUM_PACKETS 100
+
+/*!
+ * Runs a host_stream kernel on a 2x2 tile group.
+ * This test streams data through circular buffers on the host through the manycore
+ * in a chain, then streams back to the host. Validation is that the data received
+ * matches the data pattern sent.
+ * 
+ * This test demonstrates how host can be run concurrently with manycore code in a
+ * streaming or cooperative manner.
+ */
+
+int kernel_host_stream(int argc, char **argv) {
+        int rc;
+        char *bin_path, *test_name;
+        struct arguments_path args = {NULL, NULL};
+
+        argp_parse (&argp_path, argc, argv, 0, 0, &args);
+        bin_path = args.path;
+        test_name = args.name;
+
+        bsg_pr_test_info("Running the CUDA Device Memset Kernel on a grid of one 2x2 tile group.\n\n");
+
+        /*****************************************************************************************************************
+        * Define path to binary.
+        * Initialize device, load binary and unfreeze tiles.
+        ******************************************************************************************************************/
+        hb_mc_device_t *device = (hb_mc_device_t *) malloc(sizeof(hb_mc_device_t));
+        BSG_CUDA_CALL(hb_mc_device_init(device, test_name, 0));
+        BSG_CUDA_CALL(hb_mc_device_program_init(device, bin_path, ALLOC_NAME, 0));
+        hb_mc_manycore_t *mc = device->mc;
+        hb_mc_pod_id_t pod_id = device->default_pod_id;
+        hb_mc_pod_t *pod = &device->pods[pod_id];
+
+        /*****************************************************************************************************************
+        * 
+        ******************************************************************************************************************/
+        eva_t buffer_device;
+        eva_t count_device;
+        BSG_CUDA_CALL(hb_mc_device_malloc(device, BUFFER_ELS * (CHAIN_LEN+1) * sizeof(int), &buffer_device));
+        BSG_CUDA_CALL(hb_mc_device_malloc(device, (CHAIN_LEN+1) * sizeof(int), &count_device));
+
+        BSG_CUDA_CALL(hb_mc_device_memset(device, &count_device, 0, (CHAIN_LEN+1) * sizeof(int)));
+
+        /*****************************************************************************************************************
+        * Define block_size_x/y: amount of work for each tile group
+        * Define tg_dim_x/y: number of tiles in each tile group
+        * Calculate grid_dim_x/y: number of tile groups needed based on block_size_x/y
+        ******************************************************************************************************************/
+        hb_mc_dimension_t tg_dim = { .x = CHAIN_LEN, .y = 1 }; 
+
+        hb_mc_dimension_t grid_dim = { .x = 1, .y = 1 };
+
+
+        /*****************************************************************************************************************
+        * Prepare list of input arguments for kernel.
+        ******************************************************************************************************************/
+        uint32_t cuda_argv[2] = {buffer_device, count_device};
+
+        /*****************************************************************************************************************
+        * Enquque grid of tile groups, pass in grid and tile group dimensions, kernel name, number and list of input arguments
+        ******************************************************************************************************************/
+        BSG_CUDA_CALL(hb_mc_kernel_enqueue (device, grid_dim, tg_dim, "kernel_host_stream", 2, cuda_argv));
+
+        /*****************************************************************************************************************
+        * Launch and execute all tile groups on device and wait for all to finish. 
+        ******************************************************************************************************************/
+
+        int packets_sent = 0;
+        int packets_recv = 0;
+        int mismatch = 0;
+        void *src, *dst;
+
+        eva_t send_count_eva = count_device;
+        eva_t send_buffer_eva = buffer_device;
+        bsg_manycore_spsc_queue_send<int, BUFFER_ELS> send_spsc(device, send_buffer_eva, send_count_eva);
+
+        eva_t recv_count_eva = count_device + CHAIN_LEN * sizeof(int);
+        eva_t recv_buffer_eva = buffer_device + (CHAIN_LEN * BUFFER_ELS * sizeof(int));
+        bsg_manycore_spsc_queue_recv<int, BUFFER_ELS> recv_spsc(device, recv_buffer_eva, recv_count_eva);
+        BSG_CUDA_CALL(hb_mc_manycore_host_request_fence(mc, -1));
+        BSG_CUDA_CALL(hb_mc_device_pod_try_launch_tile_groups(device, pod));
+        do
+        {
+            int send_data = packets_sent;
+            if (send_spsc.try_send(send_data))
+            {
+                packets_sent++;
+            }
+
+            int recv_data;
+            if (recv_spsc.try_recv(&recv_data))
+            {
+                if (recv_data != packets_recv++)
+                {
+                    mismatch = 1;
+                }
+            }
+
+            // Check for finish
+            hb_mc_device_pod_wait_for_tile_group_finish_any(device, pod, 1);
+        } while (packets_recv < NUM_PACKETS);
+
+        /*****************************************************************************************************************
+        * Freeze the tiles and memory manager cleanup. 
+        ******************************************************************************************************************/
+        BSG_CUDA_CALL(hb_mc_device_finish(device)); 
+
+        // Fail if data is not expected
+        if (mismatch) { 
+                return HB_MC_FAIL;
+        }
+        return HB_MC_SUCCESS;
+}
+
+declare_program_main("test_host_stream", kernel_host_stream);