Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Host stream #816

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ __pycache__
machines/*/V*
machines/*/obj_dir/
xcelium.d/
*.vpd
124 changes: 124 additions & 0 deletions examples/cuda/test_host_stream/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Copyright (c) 2021, University of Washington All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list
# of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
# Neither the name of the copyright holder nor the names of its contributors may
# be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# This Makefile compiles, links, and executes examples Run `make help`
# to see the available targets for the selected platform.

################################################################################
# environment.mk verifies the build environment and sets the following
# makefile variables:
#
# LIBRAIRES_PATH: The path to the libraries directory
# HARDWARE_PATH: The path to the hardware directory
# EXAMPLES_PATH: The path to the examples directory
# BASEJUMP_STL_DIR: Path to a clone of BaseJump STL
# BSG_MANYCORE_DIR: Path to a clone of BSG Manycore
###############################################################################

REPLICANT_PATH:=$(shell git rev-parse --show-toplevel)

include $(REPLICANT_PATH)/environment.mk
SPMD_SRC_PATH = $(BSG_MANYCORE_DIR)/software/spmd

# KERNEL_NAME is the name of the CUDA-Lite Kernel
KERNEL_NAME = kernel_host_stream

###############################################################################
# Host code compilation flags and flow
###############################################################################

# TEST_SOURCES is a list of source files that need to be compiled
TEST_SOURCES = main.c

DEFINES += -D_XOPEN_SOURCE=500 -D_BSD_SOURCE -D_DEFAULT_SOURCE
CDEFINES +=
CXXDEFINES +=

FLAGS = -g -Wall -Wno-unused-function -Wno-unused-variable
CFLAGS += -std=c99 $(FLAGS)
CXXFLAGS += -std=c++11 $(FLAGS)

# compilation.mk defines rules for compilation of C/C++
include $(EXAMPLES_PATH)/compilation.mk

###############################################################################
# Host code link flags and flow
###############################################################################



# link.mk defines rules for linking of the final execution binary.
include $(EXAMPLES_PATH)/link.mk

###############################################################################
# Device code compilation flow
###############################################################################

# BSG_MANYCORE_KERNELS is a list of manycore executables that should
# be built before executing.
BSG_MANYCORE_KERNELS = kernel.riscv

# Tile Group Dimensions
TILE_GROUP_DIM_X = 4
TILE_GROUP_DIM_Y = 1

kernel.riscv: kernel.rvo

RISCV_DEFINES += -Dbsg_tiles_X=$(TILE_GROUP_DIM_X)
RISCV_DEFINES += -Dbsg_tiles_Y=$(TILE_GROUP_DIM_Y)

include $(EXAMPLES_PATH)/cuda/riscv.mk

###############################################################################
# Execution flow
#
# C_ARGS: Use this to pass arguments that you want to appear in argv
# For SPMD tests C arguments are: <Path to RISC-V Binary> <Test Name>
#
# SIM_ARGS: Use this to pass arguments to the simulator
###############################################################################
C_ARGS ?= $(BSG_MANYCORE_KERNELS) $(KERNEL_NAME)

SIM_ARGS ?=

# Include platform-specific execution rules
include $(EXAMPLES_PATH)/execution.mk

###############################################################################
# Regression Flow
###############################################################################

regression: exec.log
@grep "BSG REGRESSION TEST .*PASSED.*" $< > /dev/null

.DEFAULT_GOAL := help

.PHONY: clean

clean:
rm -rf *.ld

32 changes: 32 additions & 0 deletions examples/cuda/test_host_stream/kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//This kernel performs a barrier among all tiles in tile group

#include "bsg_manycore.h"
#include "bsg_set_tile_x_y.h"
#include "bsg_manycore_spsc_queue.hpp"

#define BUFFER_ELS 10
#define CHAIN_LEN 4
#define NUM_PACKETS 100

extern "C" __attribute__ ((noinline))
int kernel_host_stream(int *buffer_chain, int *buffer_count)
{
int *recv_buffer = &buffer_chain[0] + (__bsg_id * BUFFER_ELS);
int *recv_count = &buffer_count[0] + (__bsg_id);

int *send_buffer = &buffer_chain[0] + ((__bsg_id+1) * BUFFER_ELS);
int *send_count = &buffer_count[0] + (__bsg_id+1);

bsg_manycore_spsc_queue_recv<int, BUFFER_ELS> recv_spsc(recv_buffer, recv_count);
bsg_manycore_spsc_queue_send<int, BUFFER_ELS> send_spsc(send_buffer, send_count);

int data;
while(1)
{
data = recv_spsc.recv();
send_spsc.send(data);
}

return 0;
}

165 changes: 165 additions & 0 deletions examples/cuda/test_host_stream/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (c) 2019, University of Washington All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this list
// of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
//
// Neither the name of the copyright holder nor the names of its contributors may
// be used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <bsg_manycore_tile.h>
#include <bsg_manycore_errno.h>
#include <bsg_manycore_tile.h>
#include <bsg_manycore_loader.h>
#include <bsg_manycore_cuda.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <bsg_manycore_regression.h>
#include <bsg_manycore_responder.h>
#include <algorithm>
#include <vector>
#include <bsg_manycore_spsc_queue.hpp>

#define ALLOC_NAME "default_allocator"
#define TEST_BYTE 0xcd

#define BUFFER_ELS 10
#define CHAIN_LEN 4
#define NUM_PACKETS 100

/*!
* Runs a host_stream kernel on a 2x2 tile group.
* This test streams data through circular buffers on the host through the manycore
* in a chain, then streams back to the host. Validation is that the data received
* matches the data pattern sent.
*
* This test demonstrates how host can be run concurrently with manycore code in a
* streaming or cooperative manner.
*/

int kernel_host_stream(int argc, char **argv) {
int rc;
char *bin_path, *test_name;
struct arguments_path args = {NULL, NULL};

argp_parse (&argp_path, argc, argv, 0, 0, &args);
bin_path = args.path;
test_name = args.name;

bsg_pr_test_info("Running the CUDA Device Memset Kernel on a grid of one 2x2 tile group.\n\n");

/*****************************************************************************************************************
* Define path to binary.
* Initialize device, load binary and unfreeze tiles.
******************************************************************************************************************/
hb_mc_device_t *device = (hb_mc_device_t *) malloc(sizeof(hb_mc_device_t));
BSG_CUDA_CALL(hb_mc_device_init(device, test_name, 0));
BSG_CUDA_CALL(hb_mc_device_program_init(device, bin_path, ALLOC_NAME, 0));
hb_mc_manycore_t *mc = device->mc;
hb_mc_pod_id_t pod_id = device->default_pod_id;
hb_mc_pod_t *pod = &device->pods[pod_id];

/*****************************************************************************************************************
*
******************************************************************************************************************/
eva_t buffer_device;
eva_t count_device;
BSG_CUDA_CALL(hb_mc_device_malloc(device, BUFFER_ELS * (CHAIN_LEN+1) * sizeof(int), &buffer_device));
BSG_CUDA_CALL(hb_mc_device_malloc(device, (CHAIN_LEN+1) * sizeof(int), &count_device));

BSG_CUDA_CALL(hb_mc_device_memset(device, &count_device, 0, (CHAIN_LEN+1) * sizeof(int)));

/*****************************************************************************************************************
* Define block_size_x/y: amount of work for each tile group
* Define tg_dim_x/y: number of tiles in each tile group
* Calculate grid_dim_x/y: number of tile groups needed based on block_size_x/y
******************************************************************************************************************/
hb_mc_dimension_t tg_dim = { .x = CHAIN_LEN, .y = 1 };

hb_mc_dimension_t grid_dim = { .x = 1, .y = 1 };


/*****************************************************************************************************************
* Prepare list of input arguments for kernel.
******************************************************************************************************************/
uint32_t cuda_argv[2] = {buffer_device, count_device};

/*****************************************************************************************************************
* Enquque grid of tile groups, pass in grid and tile group dimensions, kernel name, number and list of input arguments
******************************************************************************************************************/
BSG_CUDA_CALL(hb_mc_kernel_enqueue (device, grid_dim, tg_dim, "kernel_host_stream", 2, cuda_argv));

/*****************************************************************************************************************
* Launch and execute all tile groups on device and wait for all to finish.
******************************************************************************************************************/

int packets_sent = 0;
int packets_recv = 0;
int mismatch = 0;
void *src, *dst;

eva_t send_count_eva = count_device;
eva_t send_buffer_eva = buffer_device;
bsg_manycore_spsc_queue_send<int, BUFFER_ELS> send_spsc(device, send_buffer_eva, send_count_eva);

eva_t recv_count_eva = count_device + CHAIN_LEN * sizeof(int);
eva_t recv_buffer_eva = buffer_device + (CHAIN_LEN * BUFFER_ELS * sizeof(int));
bsg_manycore_spsc_queue_recv<int, BUFFER_ELS> recv_spsc(device, recv_buffer_eva, recv_count_eva);
BSG_CUDA_CALL(hb_mc_manycore_host_request_fence(mc, -1));
BSG_CUDA_CALL(hb_mc_device_pod_try_launch_tile_groups(device, pod));
do
{
int send_data = packets_sent;
if (send_spsc.try_send(send_data))
{
packets_sent++;
}

int recv_data;
if (recv_spsc.try_recv(&recv_data))
{
if (recv_data != packets_recv++)
{
mismatch = 1;
}
}

// Check for finish
hb_mc_device_pod_wait_for_tile_group_finish_any(device, pod, 1);
} while (packets_recv < NUM_PACKETS);

/*****************************************************************************************************************
* Freeze the tiles and memory manager cleanup.
******************************************************************************************************************/
BSG_CUDA_CALL(hb_mc_device_finish(device));

// Fail if data is not expected
if (mismatch) {
return HB_MC_FAIL;
}
return HB_MC_SUCCESS;
}

declare_program_main("test_host_stream", kernel_host_stream);
Loading