Skip to content

Commit

Permalink
cleaning up/adding kernel code to repo
Browse files Browse the repository at this point in the history
  • Loading branch information
eafurst authored and mrutt92 committed May 3, 2021
1 parent 4ff7aa6 commit 4d2d896
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 34 deletions.
34 changes: 28 additions & 6 deletions examples/graphit/test_pr_nibble/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,29 @@ GRAPH_PATH := $(GRAPHIT_PATH)/test/graphs/darpa-eval/jhu.mtx
TEST_NAME = main
# KERNEL_NAME is the name of the CUDA-Lite Kernel
KERNEL_NAME = pr_nibble
HOST_TARGET := $(TEST_NAME).profile

BASE_VERSIONS += hybrid-update

ITERATIONS := 0 1 2 3 4 5 6 7 8 9
v-from-basev-and-iter = $1-iteration-$2
basev-from-v = $(word 1,$(subst -iteration-, ,$1))
iter-from-v = $(word 2,$(subst -iteration-, ,$1))

VERSIONS := $(foreach i,$(ITERATIONS),$(foreach v,$(BASE_VERSIONS),\
$(call v-from-basev-and-iter,$v,$i)))

VERSION-DIRS := $(foreach v,$(VERSIONS),kernel/$v)

.PHONY: $(VERSION-DIRS)
$(VERSION-DIRS):
cp -r $(call basev-from-v,$@) $@

.PHONY: versions bleach-versions
versions: $(VERSION-DIRS)
bleach-versions:
rm -rf $(VERSION-DIRS)

VERSIONS = hybrid
DEFAULT_VERSION := hybrid
KERNEL_DEFAULT := kernel/$(DEFAULT_VERSION)/kernel.cpp

Expand Down Expand Up @@ -131,28 +152,29 @@ SIM_ARGS ?=
# Include platform-specific execution rules
include $(EXAMPLES_PATH)/execution.mk

HOST_TARGET := $(TEST_NAME).profile

$(VERSIONS): %: kernel/%/$(HOST_TARGET).log

ALIASES = vanilla_stats.csv vcache_stats.csv
ALIASES = vanilla_stats.csv vcache_stats.csv dramsim3epoch.json dramsim3.json dramsim3.tag.json dramsim3.txt
$(ALIASES): $(HOST_TARGET).log ;
$(HOST_TARGET).log: kernel.riscv $(HOST_TARGET)
$(HOST_TARGET).log: $(HOST_TARGET) kernel.riscv
./$(HOST_TARGET) $(SIM_ARGS) +c_args="kernel.riscv $(DEFAULT_VERSION) $(C_ARGS)" 2>&1 | tee $@


KERNEL_ALIASES = $(foreach a,$(ALIASES),kernel/%/$a)
.PRECIOUS: $(KERNEL_ALIASES)
$(KERNEL_ALIASES): kernel/%/$(HOST_TARGET).log ;
kernel/%/$(HOST_TARGET).log: kernel/%/kernel.riscv $(HOST_TARGET)
kernel/%/$(HOST_TARGET).log: $(HOST_TARGET) kernel/%/kernel.riscv
$(eval EXEC_PATH := $(patsubst %/,%,$(dir $@)))
$(eval KERNEL_PATH := $(CURRENT_PATH)/$(EXEC_PATH))
$(eval _VERSION := $(notdir $(EXEC_PATH)))
cd $(EXEC_PATH) && \
$(CURRENT_PATH)/$(HOST_TARGET) $(SIM_ARGS) +c_args="$(KERNEL_PATH)/kernel.riscv $(_VERSION) $(C_ARGS)" \
2>&1 | tee $(notdir $a)

versions: $(foreach v,$(VERSIONS),kernel/$v/$(HOST_TARGET).log)
.PRECIOUS: %.log

all-versions: $(foreach v,$(VERSIONS),kernel/$v/$(HOST_TARGET).log)

###############################################################################
# Regression Flow
Expand Down
229 changes: 229 additions & 0 deletions examples/graphit/test_pr_nibble/kernel/hybrid/kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
//#define DEBUG
#include <bsg_manycore.h>

#ifdef DEBUG
#define BSG_TILE_GROUP_X_DIM 1
#define BSG_TILE_GROUP_Y_DIM 1
#define bsg_tiles_X BSG_TILE_GROUP_X_DIM
#define bsg_tiles_Y BSG_TILE_GROUP_Y_DIM
#else
#include <bsg_set_tile_x_y.h>
// #define BSG_TILE_GROUP_X_DIM 16
// #define BSG_TILE_GROUP_Y_DIM 8
#endif

#include <bsg_tile_group_barrier.hpp>
bsg_barrier<bsg_tiles_X, bsg_tiles_Y> barrier;

#include <pr_nibble.hpp>
#include <cstring>

#ifdef DEBUG
#define pr_dbg(fmt, ...) \
bsg_printf(fmt, ##__VA_ARGS__)
#else
#define pr_dbg(fmt, ...)
#endif

__attribute__((section(".dram"))) float * __restrict p;
__attribute__((section(".dram"))) float * __restrict old_rank;
__attribute__((section(".dram"))) float * __restrict new_rank;
__attribute__((section(".dram"))) int * __restrict out_degree;
__attribute__((section(".dram"))) int * __restrict generated_tmp_vector_3;
//__attribute__((section(".dram"))) double alpha = 0.15;
//__attribute__((section(".dram"))) double epsilon = (double) 1e-6;

template <typename APPLY_FUNC > int edgeset_apply_pull_parallel_from_vertexset(int *in_indices , int *in_neighbors, int* from_vertexset, APPLY_FUNC apply_func, int V, int E, int block_size_x)
{
//if(bsg_id == 0) pr_dbg("val of root front: %i\n", from_vertexset[6]);
//if(bsg_id == 0) pr_dbg("size of graph: %i\n", V);
int start, end;
local_range(V, &start, &end);
for ( int d = start; d < end; d++) {
int degree = in_indices[d + 1] - in_indices[d];
int * neighbors = &in_neighbors[in_indices[d]];
for(int s = 0; s < degree; s++) {
if(from_vertexset[neighbors[s]]) {
//pr_dbg("found a vertex to update: %i %i\n", neighbors[s], d);
apply_func (neighbors[s] , d);
}
} //end of loop on in neighbors
} //end of outer for loop
return 0;
} //end of edgeset apply function

template <typename APPLY_FUNC > int edgeset_apply_push_parallel_from_vertexset(int *out_indices , int *out_neighbors, int* from_vertexset, APPLY_FUNC apply_func, int V, int E, int block_size_x)
{
//if(bsg_id == 0) pr_dbg("val of root front: %i\n", from_vertexset[6]);
//if(bsg_id == 0) pr_dbg("size of graph: %i\n", V);
int start, end;
local_range(V, &start, &end);
for ( int s = start; s < end; s++) {
if(from_vertexset[s]) {
int degree = out_indices[s + 1] - out_indices[s];
int * neighbors = &out_neighbors[out_indices[s]];
for(int d = 0; d < degree; d++) {
apply_func (s, neighbors[d]);
//if (new_rank[neighbors[d]] != 0.0){ pr_dbg("value updated in iteration: %i\n", neighbors[d]); }

}
} //end of loop on in neighbors
} //end of outer for loop
//barrier.sync();
return 0;
} //end of edgeset apply function


struct generated_vector_op_apply_func_4
{
void operator() (int v)
{
out_degree[v] = generated_tmp_vector_3[v];
};
};
struct new_rank_generated_vector_op_apply_func_2
{
void operator() (int v)
{
new_rank[v] = ((float) 0) ;
};
};
struct old_rank_generated_vector_op_apply_func_1
{
void operator() (int v)
{
old_rank[v] = ((float) 0) ;
};
};
struct p_generated_vector_op_apply_func_0
{
void operator() (int v)
{
p[v] = ((float) 0) ;
};
};
struct updateEdge
{
void operator() (int src, int dst)
{
float alpha = 0.15;
new_rank[dst] = (new_rank[dst] + (((((1) - alpha) / ((1) + alpha)) * old_rank[src]) / out_degree[src]));
};
};
struct updateSelf
{
void operator() (int v)
{
float alpha = 0.15;
p[v] = (p[v] + ((((2) * alpha) / ((1) + alpha)) * old_rank[v]));
new_rank[v] = (0) ;
};
};
struct filter_frontier
{
bool operator() (int v)
{
float epsilon = (float) 1e-6;
bool output ;
//if(old_rank[v] == 0) return 0;
if(new_rank[v] == 0) return 0;
//output = (old_rank[v]) > ((out_degree[v] * epsilon));
output = (new_rank[v]) > ((out_degree[v] * epsilon));
return output;
};
};

extern "C" int __attribute__ ((noinline)) p_generated_vector_op_apply_func_0_kernel(int V) {
int start, end;
local_range(V, &start, &end);
for (int iter_x = start; iter_x < end; iter_x++) {
p_generated_vector_op_apply_func_0()(iter_x);
}
barrier.sync();
return 0;
}
extern "C" int __attribute__ ((noinline)) old_rank_generated_vector_op_apply_func_1_kernel(int V) {
int start, end;
local_range(V, &start, &end);
for (int iter_x = start; iter_x < end; iter_x++) {
old_rank_generated_vector_op_apply_func_1()(iter_x);
}
barrier.sync();
return 0;
}
extern "C" int __attribute__ ((noinline)) new_rank_generated_vector_op_apply_func_2_kernel(int V) {
int start, end;
local_range(V, &start, &end);
for (int iter_x = start; iter_x < end; iter_x++) {
new_rank_generated_vector_op_apply_func_2()(iter_x);
}
barrier.sync();
return 0;
}
extern "C" int __attribute__ ((noinline)) generated_vector_op_apply_func_4_kernel(int V) {
int start, end;
local_range(V, &start, &end);
for (int iter_x = start; iter_x < end; iter_x++) {
generated_vector_op_apply_func_4()(iter_x);
}
barrier.sync();
return 0;
}
extern "C" int __attribute__ ((noinline)) updateSelf_kernel(int * frontier, int V, int tag_c) {
bsg_cuda_print_stat_start(tag_c);
barrier.sync();
int start, end;
local_range(V, &start, &end);
for (int iter_x = start; iter_x < end; iter_x++) {
if(frontier[iter_x]) { updateSelf()(iter_x); }
}
bsg_cuda_print_stat_end(tag_c);
barrier.sync();
return 0;
}
extern "C" int __attribute__ ((noinline)) edgeset_apply_pull_parallel_from_vertexset_call(int *in_indices, int *in_neighbors, int *frontier, int V, int E, int block_size_x, int tag_c) {
barrier.sync();
//pr_dbg("%i: on update edges %i\n", bsg_id, tag_c);
bsg_cuda_print_stat_start(tag_c);
bsg_saif_start();
edgeset_apply_pull_parallel_from_vertexset(in_indices, in_neighbors, frontier, updateEdge(), V, E, block_size_x);
bsg_saif_end();
bsg_cuda_print_stat_end(tag_c);
barrier.sync();
return 0;
}

extern "C" int __attribute__ ((noinline)) edgeset_apply_push_parallel_from_vertexset_call(int *out_indices, int *out_neighbors, int *frontier, int V, int E, int block_size_x, int tag_c) {
barrier.sync();
bsg_cuda_print_stat_start(tag_c);
bsg_saif_start();
edgeset_apply_push_parallel_from_vertexset(out_indices, out_neighbors, frontier, updateEdge(), V, E, block_size_x);
bsg_saif_end();
bsg_cuda_print_stat_end(tag_c);
barrier.sync();
return 0;
}

extern "C" int __attribute__ ((noinline)) filter_frontier_where_call(int * next5, int V, int block_size_x, int tag_c) {
//if(bsg_id == 0) pr_dbg("0x%08x next, %i tag\n", next5, tag_c);
//pr_dbg("%i: on frontier filter %i\n", bsg_id, tag_c);
bsg_cuda_print_stat_start(tag_c);
barrier.sync();
int start, end;
local_range(V, &start, &end);
for (int iter_x = start; iter_x < end; iter_x++) {
if (iter_x < V) {
next5[iter_x] = 0;
if ( filter_frontier()( iter_x ) ) {
next5[iter_x] = 1;
//pr_dbg("added vertex %i to frontier\n", iter_x);
}
}
else { break; }
} //end of loop
bsg_cuda_print_stat_end(tag_c);
barrier.sync();
return 0;
}


32 changes: 11 additions & 21 deletions examples/graphit/test_pr_nibble/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ int launch(int argc, char ** argv){
std::string ucode_path = input.getRISCVFile();

int iter = 0;
// std::string iterstrbase = "iteration-";
// auto pos = ucode_path.find(iterstrbase);
// auto iterstr = ucode_path.substr(pos+iterstrbase.size(), std::string::npos);
// std::stringstream ss(iterstr);
// ss >> iter;
std::string iterstrbase = "iteration-";
auto pos = ucode_path.find(iterstrbase);
auto iterstr = ucode_path.substr(pos+iterstrbase.size(), std::string::npos);
std::stringstream ss(iterstr);
ss >> iter;
std::cerr << "iteration: " << iter << std::endl;

int version = 0; //default to vertex pull
Expand Down Expand Up @@ -84,7 +84,6 @@ int launch(int argc, char ** argv){
float epsilon = ((float) 1e-06) ;
int start_vertex = ROOT;
Vector<int32_t> frontier = Vector<int32_t>(hammerblade::builtin_getVerticesHB(edges));
//Vector<int32_t> next_frontier = Vector<int32_t>(hammerblade::builtin_getVerticesHB(edges));

std::vector<int32_t> hfrontier(edges.num_nodes(), 0);
std::vector<float> p(edges.num_nodes(), (float) 0.0);
Expand Down Expand Up @@ -122,12 +121,7 @@ int launch(int argc, char ** argv){

std::cerr << "start of while loop\n";
int tag_c = 0;
//double host_rank[edges.num_nodes()];
//ofstream prog_file;
//prog_file.open("./progress.txt");
//prog_file << "starting computation w/ root vertex: " << start_vertex << std::endl;
//while ( builtin_getVertexSetSizeHB(frontier, edges.num_nodes()) != 0)
//while ( iter < 16)
for(int i = 0; i < 1; i++)
{
int f_sz = 0;
Expand All @@ -136,10 +130,10 @@ int launch(int argc, char ** argv){
case 0: //vertex pull
std::cerr << "pull kernel\n";
std::cerr << "run update self vertex kernel\n";
device->enqueueJob("updateSelf_kernel",hb_mc_dimension(X,Y), {edges.num_nodes(), tag_c});
device->enqueueJob("updateSelf_kernel",hb_mc_dimension(X,Y), {frontier.getAddr(), edges.num_nodes(), tag_c});
device->runJobs();
tag_c++;
std::cerr << "run update edges kernel on iter : " << iter << "\n";
std::cerr << "run update edges kernel on iter : " << iter << "\n";
device->enqueueJob("edgeset_apply_pull_parallel_from_vertexset_call", hb_mc_dimension(X,Y),{edges.getInIndicesAddr() , edges.getInNeighborsAddr(), frontier.getAddr(), edges.num_nodes(), edges.num_edges(), edges.num_nodes(), tag_c});
device->runJobs();
tag_c++;
Expand All @@ -154,25 +148,25 @@ int launch(int argc, char ** argv){
case 1: //vertex push
std::cerr << "push kernel\n";
std::cerr << "run update self vertex kernel\n";
device->enqueueJob("updateSelf_kernel",hb_mc_dimension(X,Y), {edges.num_nodes(), tag_c});
device->enqueueJob("updateSelf_kernel",hb_mc_dimension(X,Y), {frontier.getAddr(), edges.num_nodes(), tag_c});
device->runJobs();
tag_c++;
std::cerr << "run update edges kernel on iter : " << iter << "\n";
device->enqueueJob("edgeset_apply_push_parallel_from_vertexset_call", hb_mc_dimension(X,Y),{edges.getOutIndicesAddr() , edges.getOutNeighborsAddr(), frontier.getAddr(), edges.num_nodes(), edges.num_edges(), edges.num_nodes(), tag_c});
device->runJobs();
tag_c++;
std::cerr << "swap arrays\n";
hammerblade::swap_global_arrays<float>(new_rank_dev, old_rank_dev);
std::cerr << "create next frontier\n";
device->enqueueJob("filter_frontier_where_call", hb_mc_dimension(X,Y),{frontier.getAddr(), edges.num_nodes(), edges.num_edges(), tag_c});
device->runJobs();
std::cerr << "swap arrays\n";
hammerblade::swap_global_arrays<float>(new_rank_dev, old_rank_dev);
f_sz = builtin_getVertexSetSizeHB(frontier, edges.num_nodes());
std::cerr << "size of frontier after iteration " << iter << " : " << f_sz << std::endl;
break;
case 2: //blocked pull
std::cerr << "blocked pull kernel\n";
std::cerr << "run update self vertex kernel\n";
device->enqueueJob("updateSelf_kernel",hb_mc_dimension(X,Y), {edges.num_nodes(), tag_c});
device->enqueueJob("updateSelf_kernel",hb_mc_dimension(X,Y), {frontier.getAddr(), edges.num_nodes(), tag_c});
device->runJobs();
tag_c++;
std::cerr << "run update edges kernel on iter : " << iter << "\n";
Expand All @@ -191,13 +185,9 @@ int launch(int argc, char ** argv){
tag_c++;

iter++;
//prog_file << "finished iteration: " << iter << std::endl;
}
std::cerr << "*******end of program********\n";
//prog_file << "*******end of program********\n";
std::cerr << "took: " << iter << " iterations to complete\n";
//prog_file << "took: " << iter << " iterations to complete\n";
//prog_file.close();
if(VERIFY) {
ofstream ver_file;
ver_file.open("./rank.txt");
Expand Down
Loading

0 comments on commit 4d2d896

Please sign in to comment.