From 91c1b3194f8ea8cdb8d4717c11570daa485a4b34 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 1 Mar 2024 16:15:27 -0500 Subject: [PATCH 01/76] White space and code reformatting Fix Link order on Linux --- CMakeLists.txt | 10 +- gs_patterns.c | 1784 ++++++++++++++++++++++++------------------------ 2 files changed, 905 insertions(+), 889 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 88a6775..3c36ba9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,17 @@ cmake_minimum_required(VERSION 3.1...3.25) +set (CMAKE_VERBOSE_MAKEFILE "1") + project( gs_patterns VERSION 1.0 LANGUAGES C) add_executable(gs_patterns gs_patterns.c) -set (CMAKE_C_FLAGS "-lz -lm") + +#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lz") +#set(CMAKE_LINK_LIBRARY_FLAG "-lz -lm") +#set(CMAKE_C_LINK_LIBRARY_FLAG "-lm -lz") +set(CMAKE_C_STANDARD_LIBRARIES "-lm -lz ${CMAKE_C_STANDARD_LIBRARIES}") + +#set (CMAKE_C_FLAGS "-lm -lz") diff --git a/gs_patterns.c b/gs_patterns.c index e7cdbab..1e5d6e0 100644 --- a/gs_patterns.c +++ b/gs_patterns.c @@ -53,40 +53,40 @@ typedef uintptr_t addr_t; //FROM DR SOURCE //DR trace struct _trace_entry_t { - unsigned short type; // 2 bytes: trace_type_t - unsigned short size; - union { - addr_t addr; - unsigned char length[sizeof(addr_t)]; - }; + unsigned short type; // 2 bytes: trace_type_t + unsigned short size; + union { + addr_t addr; + unsigned char length[sizeof(addr_t)]; + }; } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; static inline int popcount(uint64_t x) { int c; - + for (c = 0; x != 0; x >>= 1) - if (x & 1) - c++; + if (x & 1) + c++; return c; } //string tools int startswith(const char *a, const char *b) { - if(strncmp(b, a, strlen(b)) == 0) - return 1; - return 0; + if (strncmp(b, a, strlen(b)) == 0) + return 1; + return 0; } int endswith(const char *a, const char *b) { - int idx = strlen(a); - int preidx = strlen(b); + int idx = strlen(a); + int preidx = strlen(b); - if (preidx >= idx) + if (preidx >= idx) + return 0; + if (strncmp(b, &a[idx - preidx], preidx) == 0) + return 1; return 0; - if(strncmp(b, &a[idx-preidx], preidx) == 0) - return 1; - return 0; } //https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c @@ -102,10 +102,10 @@ char *str_replace(char *orig, char *rep, char *with) { // sanity checks and initialization if (!orig) return NULL; - + if (!rep) - return orig; - + return orig; + len_rep = strlen(rep); if (len_rep == 0) return NULL; // empty rep causes infinite loop during count @@ -140,910 +140,918 @@ char *str_replace(char *orig, char *rep, char *with) { return result; } -char * get_str(char * line, char * bparse, char * aparse) { +char *get_str(char *line, char *bparse, char *aparse) { - char * sline; + char *sline; - sline = str_replace(line, bparse, ""); - sline = str_replace(sline, aparse, ""); + sline = str_replace(line, bparse, ""); + sline = str_replace(sline, aparse, ""); - return sline; + return sline; } -int cnt_str(char * line, char c) { +int cnt_str(char *line, char c) { - int cnt = 0; - for(int i=0; line[i] != '\0'; i++){ - if (line[i] == c) - cnt++; - } + int cnt = 0; + for (int i = 0; line[i] != '\0'; i++) { + if (line[i] == c) + cnt++; + } - return cnt; + return cnt; } - -void translate_iaddr(char * binary, char * source_line, addr_t iaddr) { - - int i = 0; - int ntranslated = 0; - char path[1024]; - char cmd[1024]; - FILE *fp; - - sprintf(cmd, "addr2line -e %s 0x%lx", binary, iaddr); - - /* Open the command for reading. */ - fp = popen(cmd, "r"); - if (fp == NULL) { - printf("Failed to run command\n" ); - exit(1); - } - - /* Read the output a line at a time - output it. */ - while (fgets(path, sizeof(path), fp) != NULL) { - strcpy(source_line, path); - source_line[strcspn(source_line, "\n")] = 0; - } - - /* close */ - pclose(fp); - - return; + +void translate_iaddr(char *binary, char *source_line, addr_t iaddr) { + + int i = 0; + int ntranslated = 0; + char path[1024]; + char cmd[1024]; + FILE *fp; + + sprintf(cmd, "addr2line -e %s 0x%lx", binary, iaddr); + + /* Open the command for reading. */ + fp = popen(cmd, "r"); + if (fp == NULL) { + printf("Failed to run command\n"); + exit(1); + } + + /* Read the output a line at a time - output it. */ + while (fgets(path, sizeof(path), fp) != NULL) { + strcpy(source_line, path); + source_line[strcspn(source_line, "\n")] = 0; + } + + /* close */ + pclose(fp); + + return; } -int drline_read(gzFile fp, trace_entry_t * val, trace_entry_t ** p_val, int * edx) { - - int idx; - - idx = (*edx)/sizeof(trace_entry_t); - //first read - if (*p_val == NULL) { - *edx = gzread(fp, val, sizeof(trace_entry_t)*NBUFS); - *p_val = val; - - } else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(trace_entry_t)*NBUFS); - *p_val = val; - } - - if (*edx == 0) - return 0; - - return 1; +int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_entry_t); + //first read + if (*p_val == NULL) { + *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *p_val = val; + + } else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *p_val = val; + } + + if (*edx == 0) + return 0; + + return 1; } -int main(int argc, char ** argv) { - - //generic - int i, j, k, m, n, w; - int iwindow = 0; - int iret = 0; - int ret; - int did_opcode = 0; - int windowfull = 0; - int byte; - int do_gs_traces = 0; - int do_filter = 1; - int64_t ngs = 0; - char *eptr; - char binary[1024]; - char srcline[1024]; - - //dtrace vars - int64_t drtrace_lines = 0; - trace_entry_t * drline; - trace_entry_t * drline2; - trace_entry_t * p_drtrace = NULL; - static trace_entry_t drtrace[NBUFS]; - gzFile fp_drtrace; - FILE * fp_gs; - - //metrics - int gs; - uint64_t opcodes = 0; - uint64_t opcodes_mem = 0; - uint64_t addrs = 0; - uint64_t other = 0; - int64_t maddr_prev; - int64_t maddr; - int64_t mcl; - int64_t gather_bytes_hist[100] = {0}; - int64_t scatter_bytes_hist[100] = {0}; - double gather_cnt = 0.0; - double scatter_cnt = 0.0; - double other_cnt = 0.0; - double gather_score = 0.0; - double gather_occ_avg = 0.0; - double scatter_occ_avg = 0.0; - - //windows - int w_rw_idx; - int w_idx; - addr_t iaddr; - static int64_t w_iaddrs[2][IWINDOW]; - static int64_t w_bytes[2][IWINDOW]; - static int64_t w_maddr[2][IWINDOW][VBYTES]; - static int64_t w_cnt[2][IWINDOW]; - - //First pass to find top gather / scatters - static char gather_srcline[NGS][1024]; - static addr_t gather_iaddrs[NGS] = {0}; - static int64_t gather_icnt[NGS] = {0}; - static int64_t gather_occ[NGS] = {0}; - static char scatter_srcline[NGS][1024]; - static addr_t scatter_iaddrs[NGS] = {0}; - static int64_t scatter_icnt[NGS] = {0}; - static int64_t scatter_occ[NGS] = {0}; - - //Second Pass - int dotrace; - int bestcnt; - int bestidx; - int gather_ntop = 0; - int scatter_ntop = 0; - static int gather_offset[NTOP] = {0}; - static int scatter_offset[NTOP] = {0}; - - static addr_t best_iaddr; - static addr_t gather_tot[NTOP] = {0}; - static addr_t scatter_tot[NTOP] = {0}; - static addr_t gather_top[NTOP] = {0}; - static addr_t gather_top_idx[NTOP] = {0}; - static addr_t scatter_top[NTOP] = {0}; - static addr_t scatter_top_idx[NTOP] = {0}; - static addr_t gather_base[NTOP] = {0}; - static addr_t scatter_base[NTOP] = {0}; - static int64_t * gather_patterns[NTOP] = {0}; - static int64_t * scatter_patterns[NTOP] = {0}; - - for(j=0; jtype >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e) ) { - - //iaddr - iaddr = drline->addr; - - //nops - opcodes++; - did_opcode = 1; - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ( (drline->type == 0x0) || (drline->type == 0x1) ) { - - w_rw_idx = drline->type; - - //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iaddr, drline->addr, drline->addr % 64, drline->size); - - if ((++mcnt % PERSAMPLE) == 0) { + + uint64_t mcnt = 0; + uint64_t unique_iaddrs = 0; + int unsynced = 0; + uint64_t unsync_cnt = 0; + addr_t ciaddr; + + printf("First pass to find top gather / scatter iaddresses\n"); + fflush(stdout); + + //read dr trace entries instrs + //printf("%16s %16s %16s %16s %16s %16s\n", "iaddr", "rw", "byte", "bytes", "cnt", "maddr"); + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { + + //decode drtrace + drline = p_drtrace; + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + + //iaddr + iaddr = drline->addr; + + //nops + opcodes++; + did_opcode = 1; + + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + + w_rw_idx = drline->type; + + //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", + // iaddr, drline->addr, drline->addr % 64, drline->size); + + if ((++mcnt % PERSAMPLE) == 0) { #if SAMPLE - break; + break; #endif - printf("."); - fflush(stdout); - } - - //is iaddr in window - w_idx = -1; - for (i=0; i= VBYTES) || - (w_cnt[w_rw_idx][w_idx] >= VBYTES) ) { - - /***************************/ - //do analysis - /***************************/ - //i = each window - for(w=0; w<2; w++) { - - for (i=0; i -1); - - //previous addr - if (j==0) - maddr_prev = maddr - 1; - - //gather / scatter - if ( maddr != maddr_prev) { - if ( (gs == -1) && (abs(maddr - maddr_prev) > 1) ) - gs = w; - } - maddr_prev = maddr; - } - - for (j=0; jaddr / drline->size; - w_bytes[w_rw_idx][w_idx] += drline->size; - - //num access per iaddr in loop - w_cnt[w_rw_idx][w_idx]++; - - if (did_opcode) { - - opcodes_mem++; - addrs++; - did_opcode = 0; - - } else { - addrs++; - } - - /***********************/ - /** SOMETHING ELSE **/ - /***********************/ - } else { - other++; - } - - p_drtrace++; - drtrace_lines++; - - } //while drtrace - - //metrics - gather_occ_avg /= gather_cnt; - scatter_occ_avg /= scatter_cnt; - - printf("\n RESULTS \n"); - - //close files - gzclose(fp_drtrace); - - printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", drtrace_lines); - printf("OPCODES: %16lu\n", opcodes); - printf("MEMOPCODES: %16lu\n", opcodes_mem); - printf("LOAD/STORES: %16lu\n", addrs); - printf("OTHER: %16lu\n", other); - - printf("\n"); - - printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(gather_cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(other_cnt) / log(2.0)); - - //Find source lines - - //Must have symbol - printf("\nSymbol table lookup for gathers..."); fflush(stdout); - gather_cnt = 0.0; - for(k=0; k bestcnt) { - bestcnt = gather_icnt[k]; - best_iaddr = gather_iaddrs[k]; - bestidx = k; - } - + printf("."); + fflush(stdout); + } + + //is iaddr in window + w_idx = -1; + for (i = 0; i < IWINDOW; i++) { + + //new iaddr + if (w_iaddrs[w_rw_idx][i] == -1) { + w_idx = i; + break; + + //iaddr exists + } else if (w_iaddrs[w_rw_idx][i] == iaddr) { + w_idx = i; + break; + } + } + + //new window + if ((w_idx == -1) || (w_bytes[w_rw_idx][w_idx] >= VBYTES) || + (w_cnt[w_rw_idx][w_idx] >= VBYTES)) { + + /***************************/ + //do analysis + /***************************/ + //i = each window + for (w = 0; w < 2; w++) { + + for (i = 0; i < IWINDOW; i++) { + + if (w_iaddrs[w][i] == -1) + break; + + byte = w_bytes[w][i] / w_cnt[w][i]; + + //First pass + //Determine + //gather/scatter? + gs = -1; + for (j = 0; j < w_cnt[w][i]; j++) { + + //address and cl + maddr = w_maddr[w][i][j]; + assert(maddr > -1); + + //previous addr + if (j == 0) + maddr_prev = maddr - 1; + + //gather / scatter + if (maddr != maddr_prev) { + if ((gs == -1) && (abs(maddr - maddr_prev) > 1)) + gs = w; + } + maddr_prev = maddr; + } + + for (j = 0; j < w_cnt[w][i]; j++) { + + if (gs == -1) { + other_cnt++; + continue; + } + } + + if (gs == 0) { + + gather_occ_avg += w_cnt[w][i]; + gather_cnt += 1.0; + + for (k = 0; k < NGS; k++) { + if (gather_iaddrs[k] == 0) { + gather_iaddrs[k] = w_iaddrs[w][i]; + gather_icnt[k]++; + gather_occ[k] += w_cnt[w][i]; + break; + } + + if (gather_iaddrs[k] == w_iaddrs[w][i]) { + gather_icnt[k]++; + gather_occ[k] += w_cnt[w][i]; + break; + } + + } + + } else if (gs == 1) { + + scatter_occ_avg += w_cnt[w][i]; + scatter_cnt += 1.0; + + for (k = 0; k < NGS; k++) { + if (scatter_iaddrs[k] == 0) { + scatter_iaddrs[k] = w_iaddrs[w][i]; + scatter_icnt[k]++; + scatter_occ[k] += w_cnt[w][i]; + break; + } + + if (scatter_iaddrs[k] == w_iaddrs[w][i]) { + scatter_icnt[k]++; + scatter_occ[k] += w_cnt[w][i]; + break; + } + } + } + } //WINDOW i + + w_idx = 0; + + //reset windows + for (i = 0; i < IWINDOW; i++) { + w_iaddrs[w][i] = -1; + w_bytes[w][i] = 0; + w_cnt[w][i] = 0; + for (j = 0; j < VBYTES; j++) + w_maddr[w][i][j] = -1; + } + } // rw w + } //analysis + + //Set window values + w_iaddrs[w_rw_idx][w_idx] = iaddr; + w_maddr[w_rw_idx][w_idx][w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; + w_bytes[w_rw_idx][w_idx] += drline->size; + + //num access per iaddr in loop + w_cnt[w_rw_idx][w_idx]++; + + if (did_opcode) { + + opcodes_mem++; + addrs++; + did_opcode = 0; + + } else { + addrs++; + } + + /***********************/ + /** SOMETHING ELSE **/ + /***********************/ + } else { + other++; + } + + p_drtrace++; + drtrace_lines++; + + } //while drtrace + + //metrics + gather_occ_avg /= gather_cnt; + scatter_occ_avg /= scatter_cnt; + + printf("\n RESULTS \n"); + + //close files + gzclose(fp_drtrace); + + printf("DRTRACE STATS\n"); + printf("DRTRACE LINES: %16lu\n", drtrace_lines); + printf("OPCODES: %16lu\n", opcodes); + printf("MEMOPCODES: %16lu\n", opcodes_mem); + printf("LOAD/STORES: %16lu\n", addrs); + printf("OTHER: %16lu\n", other); + + printf("\n"); + + printf("GATHER/SCATTER STATS: \n"); + printf("LOADS per GATHER: %16.3f\n", gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(gather_cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(other_cnt) / log(2.0)); + + //Find source lines + + //Must have symbol + printf("\nSymbol table lookup for gathers..."); + fflush(stdout); + gather_cnt = 0.0; + for (k = 0; k < NGS; k++) { + + if (gather_iaddrs[k] == 0) + break; + + translate_iaddr(binary, gather_srcline[k], gather_iaddrs[k]); + + if (startswith(gather_srcline[k], "?")) + gather_icnt[k] = 0; + + gather_cnt += gather_icnt[k]; } - - if (best_iaddr == 0) { - break; - - } else { + printf("done.\n"); + //printf("\nTOP GATHERS\n"); + + //Get top gathers + gather_ntop = 0; + for (j = 0; j < NTOP; j++) { + + bestcnt = 0; + best_iaddr = 0; + bestidx = -1; + + for (k = 0; k < NGS; k++) { + + if (gather_icnt[k] == 0) + continue; + + if (gather_iaddrs[k] == 0) { + break; + } - gather_ntop++; - //printf("GIADDR -- %016lx: %16lu -- %s\n", - // gather_iaddrs[bestidx], gather_icnt[bestidx], gather_srcline[bestidx]); - - gather_top[j] = best_iaddr; - gather_top_idx[j] = bestidx; - gather_tot[j] = gather_icnt[bestidx]; - gather_icnt[bestidx] = 0; - + if (gather_icnt[k] > bestcnt) { + bestcnt = gather_icnt[k]; + best_iaddr = gather_iaddrs[k]; + bestidx = k; + } + + } + + if (best_iaddr == 0) { + break; + + } else { + + gather_ntop++; + //printf("GIADDR -- %016lx: %16lu -- %s\n", + // gather_iaddrs[bestidx], gather_icnt[bestidx], gather_srcline[bestidx]); + + gather_top[j] = best_iaddr; + gather_top_idx[j] = bestidx; + gather_tot[j] = gather_icnt[bestidx]; + gather_icnt[bestidx] = 0; + + } } - } - - //Find source lines - scatter_cnt = 0.0; - - printf("Symbol table lookup for scatters..."); fflush(stdout); - //Check it is not a library - for(k=0; k bestcnt) { - bestcnt = scatter_icnt[k]; - best_iaddr = scatter_iaddrs[k]; - bestidx = k; - } + printf("done.\n"); + + //Get top scatters + //printf("\nTOP SCATTERS\n"); + scatter_ntop = 0; + for (j = 0; j < NTOP; j++) { + + bestcnt = 0; + best_iaddr = 0; + bestidx = -1; + + for (k = 0; k < NGS; k++) { + + if (scatter_icnt[k] == 0) + continue; + + if (scatter_iaddrs[k] == 0) { + break; + } + + if (scatter_icnt[k] > bestcnt) { + bestcnt = scatter_icnt[k]; + best_iaddr = scatter_iaddrs[k]; + bestidx = k; + } + } + + if (best_iaddr == 0) { + break; + + } else { + + scatter_ntop++; + scatter_top[j] = best_iaddr; + scatter_top_idx[j] = bestidx; + scatter_tot[j] = scatter_icnt[bestidx]; + scatter_icnt[bestidx] = 0; + //printf("SIADDR -- %016lx: %16lu -- %s\n", + // scatter_top[j], scatter_tot[j], scatter_srcline[bestidx]); + } } - - if (best_iaddr == 0) { - break; - - } else { - scatter_ntop++; - scatter_top[j] = best_iaddr; - scatter_top_idx[j] = bestidx; - scatter_tot[j] = scatter_icnt[bestidx]; - scatter_icnt[bestidx] = 0; - //printf("SIADDR -- %016lx: %16lu -- %s\n", - // scatter_top[j], scatter_tot[j], scatter_srcline[bestidx]); + //Second Pass + + //Open trace + fp_drtrace = gzopen(argv[1], "hrb"); + if (fp_drtrace == NULL) { + printf("ERROR: Could not open %s!\n", argv[1]); + exit(-1); } - } - - //Second Pass - - //Open trace - fp_drtrace = gzopen(argv[1], "hrb"); - if (fp_drtrace == NULL) { - printf("ERROR: Could not open %s!\n", argv[1]); - exit(-1); - } - - mcnt = 0; - iret = 0; - p_drtrace = NULL; - int breakout = 0; - printf("\nSecond pass to fill gather / scatter subtraces\n"); fflush(stdout); - while ( drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout ) { - - //decode drtrace - drline = p_drtrace; - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if ( ((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e) ) { - - //iaddr - iaddr = drline->addr; - - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ( (drline->type == 0x0) || (drline->type == 0x1) ) { - - maddr = drline->addr / drline->size; - - if ((++mcnt % PERSAMPLE) == 0) { + + mcnt = 0; + iret = 0; + p_drtrace = NULL; + int breakout = 0; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { + + //decode drtrace + drline = p_drtrace; + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + + //iaddr + iaddr = drline->addr; + + + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + + maddr = drline->addr / drline->size; + + if ((++mcnt % PERSAMPLE) == 0) { #if SAMPLE - break; + break; #endif - printf("."); - fflush(stdout); - } - - //gather ? - if (drline->type == 0x0) { - - for(i=0; i= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; - } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_patterns[i][ gather_offset[i]++ ] = (int64_t) (maddr - gather_base[i]); - - break; - } - } - - //scatter ? - } else { - - for(i=0; i= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; - } - scatter_patterns[i][ scatter_offset[i]++ ] = (int64_t) (maddr - scatter_base[i]); - break; - } - } - } - - } //MEM - - p_drtrace++; - - } //while drtrace - - gzclose(fp_drtrace); - - printf("\n"); - - //Normalize - int64_t smallest; - for(i=0; itype == 0x0) { + + for (i = 0; i < gather_ntop; i++) { + + //found it + if (iaddr == gather_top[i]) { + + if (gather_base[i] == 0) + gather_base[i] = maddr; + + //Add index + if (gather_offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = 1; + } + //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); + gather_patterns[i][gather_offset[i]++] = (int64_t) (maddr - gather_base[i]); + + break; + } + } + + //scatter ? + } else { + + for (i = 0; i < scatter_ntop; i++) { + + //found it + if (iaddr == scatter_top[i]) { + + //set base + if (scatter_base[i] == 0) + scatter_base[i] = maddr; + + //Add index + if (scatter_offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = 1; + } + scatter_patterns[i][scatter_offset[i]++] = (int64_t) (maddr - scatter_base[i]); + break; + } + } + } + + } //MEM + + p_drtrace++; + + } //while drtrace + + gzclose(fp_drtrace); + + printf("\n"); - smallest *= -1; //Normalize - for(j=0; j 1025) ? 1026 : sidx; - n_stride[sidx]++; + gs_info = str_replace(argv[1], ".gz", ".txt"); + fp2 = fopen(gs_info, "w"); + if (fp2 == NULL) { + printf("ERROR: Could not open %s!\n", gs_info); + exit(-1); } - for(j=0; j<1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } + //Header + fprintf(fp, "[ "); + fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); + + printf("\n"); + for (i = 0; i < gather_ntop; i++) { + printf("***************************************************************************************\n"); + + unique_strides = 0; + for (j = 0; j < 1027; j++) + n_stride[j] = 0; + + for (j = 1; j < gather_offset[i]; j++) { + sidx = gather_patterns[i][j] - gather_patterns[i][j - 1] + 513; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > 1025) ? 1026 : sidx; + n_stride[sidx]++; + } + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } + + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) gather_offset[i]; + + //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ + if (1) { + + //create a binary file + FILE *fp_bin; + char *bin_name; + bin_name = str_replace(argv[1], ".gz", ".sbin"); + printf("%s\n", bin_name); + fp_bin = fopen(bin_name, "w"); + if (fp_bin == NULL) { + printf("ERROR: Could not open %s!\n", bin_name); + exit(-1); + } + + printf("GIADDR -- %p\n", gather_top[i]); + printf("SRCLINE -- %s\n", gather_srcline[gather_top_idx[i]]); + printf("GATHER %c -- %6.3f%c (512-bit chunks)\n", + '%', 100.0 * (double) gather_tot[i] / gather_cnt, '%'); + printf("NDISTS -- %ld\n", gather_offset[i]); + + int64_t nlcnt = 0; + for (j = 0; j < gather_offset[i]; j++) { + + if (j < 39) { + printf("%10ld ", gather_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j >= (gather_offset[i] - 39)) { + printf("%10ld ", gather_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j == 39) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + if (j == 0) + printf("%6s: %ld\n", "< -512", n_stride[j]); + else if (j == 1026) + printf("%6s: %ld\n", "> 512", n_stride[j]); + else + printf("%6d: %ld\n", j - 513, n_stride[j]); + } + } + + if (first_spatter) { + first_spatter = 0; + fprintf(fp, " {\"kernel\":\"Gather\", \"pattern\":["); + + } else { + fprintf(fp, ",\n {\"kernel\":\"Gather\", \"pattern\":["); + } + + fwrite(gather_patterns[i], sizeof(uint64_t), gather_offset[i], fp_bin); + fclose(fp_bin); + + for (j = 0; j < gather_offset[i] - 1; j++) + fprintf(fp, "%ld,", gather_patterns[i][j]); + fprintf(fp, "%ld", gather_patterns[i][gather_offset[i] - 1]); + + fprintf(fp, "], \"count\":1}"); + + fprintf(fp2, "%s,G,%ld,%6.3f\n", + gather_srcline[gather_top_idx[i]], gather_offset[i], + 100.0 * (double) gather_tot[i] / gather_cnt); + } + printf("***************************************************************************************\n\n"); } - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) gather_offset[i]; - - //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (1) { - - //create a binary file - FILE * fp_bin; - char * bin_name; - bin_name = str_replace(argv[1], ".gz", ".sbin"); - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { - printf("ERROR: Could not open %s!\n", bin_name); - exit(-1); - } - - printf("GIADDR -- %p\n", gather_top[i]); - printf("SRCLINE -- %s\n", gather_srcline[ gather_top_idx[i] ] ); - printf("GATHER %c -- %6.3f%c (512-bit chunks)\n", - '%', 100.0 * (double) gather_tot[i] / gather_cnt, '%'); - printf("NDISTS -- %ld\n", gather_offset[i]); - - int64_t nlcnt = 0; - for(j=0; j= (gather_offset[i] - 39)) { - printf("%10ld ", gather_patterns[i][j]); fflush(stdout); - if (( ++nlcnt % 13) == 0) - printf("\n"); - - } else if (j == 39) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - - for(j=0; j<1027; j++) { - if (n_stride[j] > 0) { - if (j == 0) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j-513, n_stride[j]); - } - } - - if (first_spatter) { - first_spatter = 0; - fprintf(fp, " {\"kernel\":\"Gather\", \"pattern\":["); - - } else { - fprintf(fp, ",\n {\"kernel\":\"Gather\", \"pattern\":["); - } - - fwrite(gather_patterns[i], sizeof(uint64_t), gather_offset[i], fp_bin); - fclose(fp_bin); - - for(j=0; j 1025) ? 1026 : sidx; - n_stride[sidx]++; + printf("\n"); + for (i = 0; i < scatter_ntop; i++) { + printf("***************************************************************************************\n"); + + unique_strides = 0; + for (j = 0; j < 1027; j++) + n_stride[j] = 0; + + for (j = 1; j < scatter_offset[i]; j++) { + sidx = scatter_patterns[i][j] - scatter_patterns[i][j - 1] + 513; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > 1025) ? 1026 : sidx; + n_stride[sidx]++; + } + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } + + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) scatter_offset[i]; + + //if (((unique_strides > NSTRIDES) | (outbounds > OUTTHRESH) ) && (scatter_offset[i] > USTRIDES) ){ + if (1) { + + //create a binary file + FILE *fp_bin; + char *bin_name; + bin_name = str_replace(argv[1], ".gz", ".sbin"); + printf("%s\n", bin_name); + fp_bin = fopen(bin_name, "w"); + if (fp_bin == NULL) { + printf("ERROR: Could not open %s!\n", bin_name); + exit(-1); + } + + printf("SIADDR -- %p\n", scatter_top[i]); + printf("SRCLINE -- %s\n", scatter_srcline[scatter_top_idx[i]]); + printf("SCATTER %c -- %6.3f%c (512-bit chunks)\n", + '%', 100.0 * (double) scatter_tot[i] / scatter_cnt, '%'); + printf("NDISTS -- %ld\n", scatter_offset[i]); + + int64_t nlcnt = 0; + for (j = 0; j < scatter_offset[i]; j++) { + + if (j < 39) { + printf("%10ld ", scatter_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j >= (scatter_offset[i] - 39)) { + printf("%10ld ", scatter_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j == 39) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + if (j == 0) + printf("%6s: %ld\n", "< -512", n_stride[j]); + else if (j == 1026) + printf("%6s: %ld\n", "> 512", n_stride[j]); + else + printf("%6d: %ld\n", j - 513, n_stride[j]); + } + } + + if (first_spatter) { + first_spatter = 0; + fprintf(fp, " {\"kernel\":\"Scatter\", \"pattern\":["); + + } else { + fprintf(fp, ", {\"kernel\":\"Scatter\", \"pattern\":["); + } + + fwrite(scatter_patterns[i], sizeof(uint64_t), scatter_offset[i], fp_bin); + fclose(fp_bin); + + for (j = 0; j < scatter_offset[i] - 1; j++) + fprintf(fp, "%ld,", scatter_patterns[i][j]); + fprintf(fp, "%ld", scatter_patterns[i][scatter_offset[i] - 1]); + fprintf(fp, "], \"count\":1}"); + + fprintf(fp2, "%s,S,%ld,%6.3f\n", + scatter_srcline[scatter_top_idx[i]], scatter_offset[i], + 100.0 * (double) scatter_tot[i] / scatter_cnt); + } + printf("***************************************************************************************\n\n"); } - - for(j=0; j<1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } + + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); + + + for (i = 0; i < NTOP; i++) { + free(gather_patterns[i]); + free(scatter_patterns[i]); } - - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) scatter_offset[i]; - - //if (((unique_strides > NSTRIDES) | (outbounds > OUTTHRESH) ) && (scatter_offset[i] > USTRIDES) ){ - if (1) { - - //create a binary file - FILE * fp_bin; - char * bin_name; - bin_name = str_replace(argv[1], ".gz", ".sbin"); - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { - printf("ERROR: Could not open %s!\n", bin_name); - exit(-1); - } - - printf("SIADDR -- %p\n", scatter_top[i]); - printf("SRCLINE -- %s\n", scatter_srcline[ scatter_top_idx[i]]); - printf("SCATTER %c -- %6.3f%c (512-bit chunks)\n", - '%', 100.0 * (double) scatter_tot[i] / scatter_cnt, '%'); - printf("NDISTS -- %ld\n", scatter_offset[i]); - - int64_t nlcnt = 0; - for(j=0; j= (scatter_offset[i] - 39)) { - printf("%10ld ", scatter_patterns[i][j]); fflush(stdout); - if (( ++nlcnt % 13) == 0) - printf("\n"); - - } else if (j == 39) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - for(j=0; j<1027; j++) { - if (n_stride[j] > 0) { - if (j == 0) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j-513, n_stride[j]); - } - } - - if (first_spatter) { - first_spatter = 0; - fprintf(fp, " {\"kernel\":\"Scatter\", \"pattern\":["); - - } else { - fprintf(fp, ", {\"kernel\":\"Scatter\", \"pattern\":["); - } - - fwrite(scatter_patterns[i], sizeof(uint64_t), scatter_offset[i], fp_bin); - fclose(fp_bin); - - for(j=0; j Date: Thu, 28 Mar 2024 00:16:13 -0400 Subject: [PATCH 02/76] extracted normalize and create_spatter_file --- gs_patterns.c | 794 +++++++++++++++++++++++++++----------------------- 1 file changed, 423 insertions(+), 371 deletions(-) diff --git a/gs_patterns.c b/gs_patterns.c index 1e5d6e0..ee42d67 100644 --- a/gs_patterns.c +++ b/gs_patterns.c @@ -211,6 +211,311 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) return 1; } +void create_spatter_file( + const char* trace_file_name, + int gather_ntop, + int scatter_ntop, + int* gather_offset, + int* scatter_offset, + int64_t** gather_patterns, + int64_t** scatter_patterns, + /* */ + addr_t* gather_tot, + addr_t* scatter_tot, + addr_t* gather_top, + addr_t* gather_top_idx, + addr_t* scatter_top, + addr_t* scatter_top_idx, + /* */ + char** gather_srcline, + char** scatter_srcline, + double gather_cnt, + double scatter_cnt +) +{ + int i = 0; + int j = 0; + + //Create stride histogram and create spatter + int sidx; + int first_spatter = 1; + int unique_strides; + int64_t idx, pidx; + int64_t n_stride[1027]; + double outbounds; + //print + + //Create spatter file + FILE *fp, *fp2; + char *json_name, *gs_info; + json_name = str_replace(trace_file_name, ".gz", ".json"); + fp = fopen(json_name, "w"); + if (fp == NULL) { + printf("ERROR: Could not open %s!\n", json_name); + exit(-1); + } + gs_info = str_replace(trace_file_name, ".gz", ".txt"); + fp2 = fopen(gs_info, "w"); + if (fp2 == NULL) { + printf("ERROR: Could not open %s!\n", gs_info); + exit(-1); + } + + //Header + fprintf(fp, "[ "); + fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); + + printf("\n"); + for (i = 0; i < gather_ntop; i++) { + printf("***************************************************************************************\n"); + + unique_strides = 0; + for (j = 0; j < 1027; j++) + n_stride[j] = 0; + + for (j = 1; j < gather_offset[i]; j++) { + sidx = gather_patterns[i][j] - gather_patterns[i][j - 1] + 513; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > 1025) ? 1026 : sidx; + n_stride[sidx]++; + } + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } + + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) gather_offset[i]; + + //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ + if (1) { + + //create a binary file + FILE *fp_bin; + char *bin_name; + bin_name = str_replace(trace_file_name, ".gz", ".sbin"); + printf("%s\n", bin_name); + fp_bin = fopen(bin_name, "w"); + if (fp_bin == NULL) { + printf("ERROR: Could not open %s!\n", bin_name); + exit(-1); + } + + printf("GIADDR -- %p\n", gather_top[i]); + printf("SRCLINE -- %s\n", gather_srcline[gather_top_idx[i]]); + printf("GATHER %c -- %6.3f%c (512-bit chunks)\n", + '%', 100.0 * (double) gather_tot[i] / gather_cnt, '%'); + printf("NDISTS -- %ld\n", gather_offset[i]); + + int64_t nlcnt = 0; + for (j = 0; j < gather_offset[i]; j++) { + + if (j < 39) { + printf("%10ld ", gather_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j >= (gather_offset[i] - 39)) { + printf("%10ld ", gather_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j == 39) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + if (j == 0) + printf("%6s: %ld\n", "< -512", n_stride[j]); + else if (j == 1026) + printf("%6s: %ld\n", "> 512", n_stride[j]); + else + printf("%6d: %ld\n", j - 513, n_stride[j]); + } + } + + if (first_spatter) { + first_spatter = 0; + fprintf(fp, " {\"kernel\":\"Gather\", \"pattern\":["); + + } else { + fprintf(fp, ",\n {\"kernel\":\"Gather\", \"pattern\":["); + } + + fwrite(gather_patterns[i], sizeof(uint64_t), gather_offset[i], fp_bin); + fclose(fp_bin); + + for (j = 0; j < gather_offset[i] - 1; j++) + fprintf(fp, "%ld,", gather_patterns[i][j]); + fprintf(fp, "%ld", gather_patterns[i][gather_offset[i] - 1]); + + fprintf(fp, "], \"count\":1}"); + + fprintf(fp2, "%s,G,%ld,%6.3f\n", + gather_srcline[gather_top_idx[i]], gather_offset[i], + 100.0 * (double) gather_tot[i] / gather_cnt); + } + printf("***************************************************************************************\n\n"); + } + + printf("\n"); + for (i = 0; i < scatter_ntop; i++) { + printf("***************************************************************************************\n"); + + unique_strides = 0; + for (j = 0; j < 1027; j++) + n_stride[j] = 0; + + for (j = 1; j < scatter_offset[i]; j++) { + sidx = scatter_patterns[i][j] - scatter_patterns[i][j - 1] + 513; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > 1025) ? 1026 : sidx; + n_stride[sidx]++; + } + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } + + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) scatter_offset[i]; + + //if (((unique_strides > NSTRIDES) | (outbounds > OUTTHRESH) ) && (scatter_offset[i] > USTRIDES) ){ + if (1) { + + //create a binary file + FILE *fp_bin; + char *bin_name; + bin_name = str_replace(trace_file_name, ".gz", ".sbin"); + printf("%s\n", bin_name); + fp_bin = fopen(bin_name, "w"); + if (fp_bin == NULL) { + printf("ERROR: Could not open %s!\n", bin_name); + exit(-1); + } + + printf("SIADDR -- %p\n", scatter_top[i]); + printf("SRCLINE -- %s\n", scatter_srcline[scatter_top_idx[i]]); + printf("SCATTER %c -- %6.3f%c (512-bit chunks)\n", + '%', 100.0 * (double) scatter_tot[i] / scatter_cnt, '%'); + printf("NDISTS -- %ld\n", scatter_offset[i]); + + int64_t nlcnt = 0; + for (j = 0; j < scatter_offset[i]; j++) { + + if (j < 39) { + printf("%10ld ", scatter_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j >= (scatter_offset[i] - 39)) { + printf("%10ld ", scatter_patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j == 39) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + if (j == 0) + printf("%6s: %ld\n", "< -512", n_stride[j]); + else if (j == 1026) + printf("%6s: %ld\n", "> 512", n_stride[j]); + else + printf("%6d: %ld\n", j - 513, n_stride[j]); + } + } + + if (first_spatter) { + first_spatter = 0; + fprintf(fp, " {\"kernel\":\"Scatter\", \"pattern\":["); + + } else { + fprintf(fp, ", {\"kernel\":\"Scatter\", \"pattern\":["); + } + + fwrite(scatter_patterns[i], sizeof(uint64_t), scatter_offset[i], fp_bin); + fclose(fp_bin); + + for (j = 0; j < scatter_offset[i] - 1; j++) + fprintf(fp, "%ld,", scatter_patterns[i][j]); + fprintf(fp, "%ld", scatter_patterns[i][scatter_offset[i] - 1]); + fprintf(fp, "], \"count\":1}"); + + fprintf(fp2, "%s,S,%ld,%6.3f\n", + scatter_srcline[scatter_top_idx[i]], scatter_offset[i], + 100.0 * (double) scatter_tot[i] / scatter_cnt); + } + printf("***************************************************************************************\n\n"); + } + + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); + +} + +void normalize_stats( + int gather_ntop, + int scatter_ntop, + int* gather_offset, + int* scatter_offset, + int64_t** gather_patterns, + int64_t** scatter_patterns +) +{ + int i = 0; + int j = 0; + + //Normalize + int64_t smallest; + for (i = 0; i < gather_ntop; i++) { + + //Find smallest + smallest = 0; + for (j = 0; j < gather_offset[i]; j++) { + if (gather_patterns[i][j] < smallest) + smallest = gather_patterns[i][j]; + } + + smallest *= -1; + //Normalize + for (j = 0; j < gather_offset[i]; j++) { + gather_patterns[i][j] += smallest; + } + } + + for (i = 0; i < scatter_ntop; i++) { + + //Find smallest + smallest = 0; + for (j = 0; j < scatter_offset[i]; j++) { + if (scatter_patterns[i][j] < smallest) + smallest = scatter_patterns[i][j]; + } + smallest *= -1; + + //Normalize + for (j = 0; j < scatter_offset[i]; j++) { + scatter_patterns[i][j] += smallest; + } + } +} + + int main(int argc, char **argv) { //generic @@ -408,9 +713,9 @@ int main(int argc, char **argv) { //do analysis /***************************/ //i = each window - for (w = 0; w < 2; w++) { + for (w = 0; w < 2; w++) { // 2 - for (i = 0; i < IWINDOW; i++) { + for (i = 0; i < IWINDOW; i++) { // 1024 if (w_iaddrs[w][i] == -1) break; @@ -447,7 +752,7 @@ int main(int argc, char **argv) { } } - if (gs == 0) { + if (gs == 0) { // GATHER gather_occ_avg += w_cnt[w][i]; gather_cnt += 1.0; @@ -468,7 +773,7 @@ int main(int argc, char **argv) { } - } else if (gs == 1) { + } else if (gs == 1) { // SCATTER scatter_occ_avg += w_cnt[w][i]; scatter_cnt += 1.0; @@ -533,6 +838,11 @@ int main(int argc, char **argv) { } //while drtrace + + + + + //metrics gather_occ_avg /= gather_cnt; scatter_occ_avg /= scatter_cnt; @@ -640,411 +950,153 @@ int main(int argc, char **argv) { } printf("done.\n"); - //Get top scatters - //printf("\nTOP SCATTERS\n"); - scatter_ntop = 0; - for (j = 0; j < NTOP; j++) { - - bestcnt = 0; - best_iaddr = 0; - bestidx = -1; - - for (k = 0; k < NGS; k++) { - - if (scatter_icnt[k] == 0) - continue; - - if (scatter_iaddrs[k] == 0) { - break; - } - - if (scatter_icnt[k] > bestcnt) { - bestcnt = scatter_icnt[k]; - best_iaddr = scatter_iaddrs[k]; - bestidx = k; - } - } - - if (best_iaddr == 0) { - break; - - } else { - - scatter_ntop++; - scatter_top[j] = best_iaddr; - scatter_top_idx[j] = bestidx; - scatter_tot[j] = scatter_icnt[bestidx]; - scatter_icnt[bestidx] = 0; - //printf("SIADDR -- %016lx: %16lu -- %s\n", - // scatter_top[j], scatter_tot[j], scatter_srcline[bestidx]); - } - } - - //Second Pass - - //Open trace - fp_drtrace = gzopen(argv[1], "hrb"); - if (fp_drtrace == NULL) { - printf("ERROR: Could not open %s!\n", argv[1]); - exit(-1); - } - - mcnt = 0; - iret = 0; - p_drtrace = NULL; - int breakout = 0; - printf("\nSecond pass to fill gather / scatter subtraces\n"); - fflush(stdout); - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { - - //decode drtrace - drline = p_drtrace; - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - - //iaddr - iaddr = drline->addr; - - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { - - maddr = drline->addr / drline->size; - - if ((++mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } - - //gather ? - if (drline->type == 0x0) { - - for (i = 0; i < gather_ntop; i++) { - - //found it - if (iaddr == gather_top[i]) { - - if (gather_base[i] == 0) - gather_base[i] = maddr; - - //Add index - if (gather_offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; - } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_patterns[i][gather_offset[i]++] = (int64_t) (maddr - gather_base[i]); - - break; - } - } - - //scatter ? - } else { - - for (i = 0; i < scatter_ntop; i++) { - - //found it - if (iaddr == scatter_top[i]) { - - //set base - if (scatter_base[i] == 0) - scatter_base[i] = maddr; - - //Add index - if (scatter_offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; - } - scatter_patterns[i][scatter_offset[i]++] = (int64_t) (maddr - scatter_base[i]); - break; - } - } - } - - } //MEM - - p_drtrace++; - - } //while drtrace - - gzclose(fp_drtrace); - - printf("\n"); - - //Normalize - int64_t smallest; - for (i = 0; i < gather_ntop; i++) { - - //Find smallest - smallest = 0; - for (j = 0; j < gather_offset[i]; j++) { - if (gather_patterns[i][j] < smallest) - smallest = gather_patterns[i][j]; - } - - smallest *= -1; - //Normalize - for (j = 0; j < gather_offset[i]; j++) { - gather_patterns[i][j] += smallest; - } - } - - for (i = 0; i < scatter_ntop; i++) { - - //Find smallest - smallest = 0; - for (j = 0; j < scatter_offset[i]; j++) { - if (scatter_patterns[i][j] < smallest) - smallest = scatter_patterns[i][j]; - } - smallest *= -1; - - //Normalize - for (j = 0; j < scatter_offset[i]; j++) { - scatter_patterns[i][j] += smallest; - } - } - - //Create stride histogram and create spatter - int sidx; - int first_spatter = 1; - int unique_strides; - int64_t idx, pidx; - int64_t n_stride[1027]; - double outbounds; - //print - - - //Create spatter file - FILE *fp, *fp2; - char *json_name, *gs_info; - json_name = str_replace(argv[1], ".gz", ".json"); - fp = fopen(json_name, "w"); - if (fp == NULL) { - printf("ERROR: Could not open %s!\n", json_name); - exit(-1); - } - gs_info = str_replace(argv[1], ".gz", ".txt"); - fp2 = fopen(gs_info, "w"); - if (fp2 == NULL) { - printf("ERROR: Could not open %s!\n", gs_info); - exit(-1); - } - - //Header - fprintf(fp, "[ "); - fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - - printf("\n"); - for (i = 0; i < gather_ntop; i++) { - printf("***************************************************************************************\n"); - - unique_strides = 0; - for (j = 0; j < 1027; j++) - n_stride[j] = 0; - - for (j = 1; j < gather_offset[i]; j++) { - sidx = gather_patterns[i][j] - gather_patterns[i][j - 1] + 513; - sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > 1025) ? 1026 : sidx; - n_stride[sidx]++; - } + //Get top scatters + //printf("\nTOP SCATTERS\n"); + scatter_ntop = 0; + for (j = 0; j < NTOP; j++) { - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } - } + bestcnt = 0; + best_iaddr = 0; + bestidx = -1; - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) gather_offset[i]; + for (k = 0; k < NGS; k++) { - //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (1) { + if (scatter_icnt[k] == 0) + continue; - //create a binary file - FILE *fp_bin; - char *bin_name; - bin_name = str_replace(argv[1], ".gz", ".sbin"); - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { - printf("ERROR: Could not open %s!\n", bin_name); - exit(-1); + if (scatter_iaddrs[k] == 0) { + break; } - printf("GIADDR -- %p\n", gather_top[i]); - printf("SRCLINE -- %s\n", gather_srcline[gather_top_idx[i]]); - printf("GATHER %c -- %6.3f%c (512-bit chunks)\n", - '%', 100.0 * (double) gather_tot[i] / gather_cnt, '%'); - printf("NDISTS -- %ld\n", gather_offset[i]); + if (scatter_icnt[k] > bestcnt) { + bestcnt = scatter_icnt[k]; + best_iaddr = scatter_iaddrs[k]; + bestidx = k; + } + } - int64_t nlcnt = 0; - for (j = 0; j < gather_offset[i]; j++) { + if (best_iaddr == 0) { + break; - if (j < 39) { - printf("%10ld ", gather_patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); + } else { - } else if (j >= (gather_offset[i] - 39)) { - printf("%10ld ", gather_patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); + scatter_ntop++; + scatter_top[j] = best_iaddr; + scatter_top_idx[j] = bestidx; + scatter_tot[j] = scatter_icnt[bestidx]; + scatter_icnt[bestidx] = 0; + //printf("SIADDR -- %016lx: %16lu -- %s\n", + // scatter_top[j], scatter_tot[j], scatter_srcline[bestidx]); + } + } - } else if (j == 39) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); + //Second Pass - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - if (j == 0) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j - 513, n_stride[j]); - } - } + //Open trace + fp_drtrace = gzopen(argv[1], "hrb"); + if (fp_drtrace == NULL) { + printf("ERROR: Could not open %s!\n", argv[1]); + exit(-1); + } - if (first_spatter) { - first_spatter = 0; - fprintf(fp, " {\"kernel\":\"Gather\", \"pattern\":["); + mcnt = 0; + iret = 0; + p_drtrace = NULL; + int breakout = 0; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { - } else { - fprintf(fp, ",\n {\"kernel\":\"Gather\", \"pattern\":["); - } + //decode drtrace + drline = p_drtrace; - fwrite(gather_patterns[i], sizeof(uint64_t), gather_offset[i], fp_bin); - fclose(fp_bin); + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - for (j = 0; j < gather_offset[i] - 1; j++) - fprintf(fp, "%ld,", gather_patterns[i][j]); - fprintf(fp, "%ld", gather_patterns[i][gather_offset[i] - 1]); + //iaddr + iaddr = drline->addr; - fprintf(fp, "], \"count\":1}"); - fprintf(fp2, "%s,G,%ld,%6.3f\n", - gather_srcline[gather_top_idx[i]], gather_offset[i], - 100.0 * (double) gather_tot[i] / gather_cnt); - } - printf("***************************************************************************************\n\n"); - } + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { - printf("\n"); - for (i = 0; i < scatter_ntop; i++) { - printf("***************************************************************************************\n"); + maddr = drline->addr / drline->size; - unique_strides = 0; - for (j = 0; j < 1027; j++) - n_stride[j] = 0; + if ((++mcnt % PERSAMPLE) == 0) { +#if SAMPLE + break; +#endif + printf("."); + fflush(stdout); + } - for (j = 1; j < scatter_offset[i]; j++) { - sidx = scatter_patterns[i][j] - scatter_patterns[i][j - 1] + 513; - sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > 1025) ? 1026 : sidx; - n_stride[sidx]++; - } + //gather ? + if (drline->type == 0x0) { - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } - } + for (i = 0; i < gather_ntop; i++) { - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) scatter_offset[i]; + //found it + if (iaddr == gather_top[i]) { - //if (((unique_strides > NSTRIDES) | (outbounds > OUTTHRESH) ) && (scatter_offset[i] > USTRIDES) ){ - if (1) { + if (gather_base[i] == 0) + gather_base[i] = maddr; - //create a binary file - FILE *fp_bin; - char *bin_name; - bin_name = str_replace(argv[1], ".gz", ".sbin"); - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { - printf("ERROR: Could not open %s!\n", bin_name); - exit(-1); - } + //Add index + if (gather_offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = 1; + } + //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); + gather_patterns[i][gather_offset[i]++] = (int64_t) (maddr - gather_base[i]); - printf("SIADDR -- %p\n", scatter_top[i]); - printf("SRCLINE -- %s\n", scatter_srcline[scatter_top_idx[i]]); - printf("SCATTER %c -- %6.3f%c (512-bit chunks)\n", - '%', 100.0 * (double) scatter_tot[i] / scatter_cnt, '%'); - printf("NDISTS -- %ld\n", scatter_offset[i]); + break; + } + } - int64_t nlcnt = 0; - for (j = 0; j < scatter_offset[i]; j++) { + //scatter ? + } else { - if (j < 39) { - printf("%10ld ", scatter_patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); + for (i = 0; i < scatter_ntop; i++) { - } else if (j >= (scatter_offset[i] - 39)) { - printf("%10ld ", scatter_patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); + //found it + if (iaddr == scatter_top[i]) { - } else if (j == 39) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - if (j == 0) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j - 513, n_stride[j]); + //set base + if (scatter_base[i] == 0) + scatter_base[i] = maddr; + + //Add index + if (scatter_offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = 1; + } + scatter_patterns[i][scatter_offset[i]++] = (int64_t) (maddr - scatter_base[i]); + break; + } } } - if (first_spatter) { - first_spatter = 0; - fprintf(fp, " {\"kernel\":\"Scatter\", \"pattern\":["); + } //MEM - } else { - fprintf(fp, ", {\"kernel\":\"Scatter\", \"pattern\":["); - } + p_drtrace++; - fwrite(scatter_patterns[i], sizeof(uint64_t), scatter_offset[i], fp_bin); - fclose(fp_bin); + } //while drtrace - for (j = 0; j < scatter_offset[i] - 1; j++) - fprintf(fp, "%ld,", scatter_patterns[i][j]); - fprintf(fp, "%ld", scatter_patterns[i][scatter_offset[i] - 1]); - fprintf(fp, "], \"count\":1}"); + gzclose(fp_drtrace); - fprintf(fp2, "%s,S,%ld,%6.3f\n", - scatter_srcline[scatter_top_idx[i]], scatter_offset[i], - 100.0 * (double) scatter_tot[i] / scatter_cnt); - } - printf("***************************************************************************************\n\n"); - } + printf("\n"); - //Footer - fprintf(fp, " ]"); - fclose(fp); - fclose(fp2); + normalize_stats( gather_ntop, scatter_ntop, gather_offset, scatter_offset, gather_patterns, scatter_patterns); + create_spatter_file(argv[1], + gather_ntop, scatter_ntop, gather_offset, scatter_offset, gather_patterns, scatter_patterns, + gather_tot, scatter_tot, gather_top, gather_top_idx, scatter_top, scatter_top_idx, + gather_srcline, scatter_srcline, gather_cnt, scatter_cnt + ); for (i = 0; i < NTOP; i++) { free(gather_patterns[i]); From 3c2b5e9a9366784f6f74f83273062dae9d21f0f5 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 28 Mar 2024 01:45:58 -0400 Subject: [PATCH 03/76] extracted get_top_target, update_source_lines & second_pass --- gs_patterns.c | 393 +++++++++++++++++++++++++------------------------- 1 file changed, 193 insertions(+), 200 deletions(-) diff --git a/gs_patterns.c b/gs_patterns.c index ee42d67..98a7ae1 100644 --- a/gs_patterns.c +++ b/gs_patterns.c @@ -473,8 +473,8 @@ void normalize_stats( int scatter_ntop, int* gather_offset, int* scatter_offset, - int64_t** gather_patterns, - int64_t** scatter_patterns + int64_t** gather_patterns, // updated + int64_t** scatter_patterns // updated ) { int i = 0; @@ -515,6 +515,190 @@ void normalize_stats( } } +double update_source_lines( + addr_t* target_iaddrs, + char** target_srcline, + int64_t* target_icnt, // updated + const char* binary_file_name) +{ + double scatter_cnt = 0.0; + + fflush(stdout); + //Check it is not a library + for (int k = 0; k < NGS; k++) { + + if (target_iaddrs[k] == 0) { + break; + } + translate_iaddr(binary_file_name, target_srcline[k], target_iaddrs[k]); + if (startswith(target_srcline[k], "?")) + target_icnt[k] = 0; + + scatter_cnt += target_icnt[k]; + } + printf("done.\n"); + + return scatter_cnt; +} + +void second_pass(gzFile fp_drtrace, + trace_entry_t* drtrace, + trace_entry_t* p_drtrace, + int gather_ntop, + int scatter_ntop, + int* gather_offset, + int* scatter_offset, + int64_t** gather_patterns, // updated + int64_t** scatter_patterns, // updated + addr_t* gather_base, // updated + addr_t* scatter_base, // updated + addr_t* gather_top, + addr_t* scatter_top +) +{ + uint64_t mcnt = 0; + int iret = 0; + trace_entry_t* drline; + addr_t iaddr; + int64_t maddr; + int i = 0; + + p_drtrace = NULL; + int breakout = 0; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { + + //decode drtrace + drline = p_drtrace; + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + + //iaddr + iaddr = drline->addr; + + + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + + maddr = drline->addr / drline->size; + + if ((++mcnt % PERSAMPLE) == 0) { +#if SAMPLE + break; +#endif + printf("."); + fflush(stdout); + } + + //gather ? + if (drline->type == 0x0) { + + for (i = 0; i < gather_ntop; i++) { + + //found it + if (iaddr == gather_top[i]) { + + if (gather_base[i] == 0) + gather_base[i] = maddr; + + //Add index + if (gather_offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = 1; + } + //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); + gather_patterns[i][gather_offset[i]++] = (int64_t) (maddr - gather_base[i]); + + break; + } + } + + //scatter ? + } else { + + for (i = 0; i < scatter_ntop; i++) { + + //found it + if (iaddr == scatter_top[i]) { + + //set base + if (scatter_base[i] == 0) + scatter_base[i] = maddr; + + //Add index + if (scatter_offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = 1; + } + scatter_patterns[i][scatter_offset[i]++] = (int64_t) (maddr - scatter_base[i]); + break; + } + } + } + + } //MEM + + p_drtrace++; + + } //while drtrace +} + +int get_top_target( + const char* target_type, + char** target_srcline, + int64_t* target_icnt, // updated + addr_t* target_iaddrs, // updated + addr_t* target_top, // updated + addr_t* target_tot, // updated + addr_t* target_top_idx // updates +) +{ + int target_ntop = 0; + int bestcnt; + + for (int j = 0; j < NTOP; j++) { + + int bestcnt = 0; + addr_t best_iaddr = 0; + int bestidx = -1; + + for (int k = 0; k < NGS; k++) { + + if (target_icnt[k] == 0) + continue; + + if (target_iaddrs[k] == 0) { + break; + } + + if (target_icnt[k] > bestcnt) { + bestcnt = target_icnt[k]; + best_iaddr = target_iaddrs[k]; + bestidx = k; + } + } + + if (best_iaddr == 0) { + break; + } else { + target_ntop++; + target_top[j] = best_iaddr; + target_top_idx[j] = bestidx; + target_tot[j] = target_icnt[bestidx]; + target_icnt[bestidx] = 0; + + //printf("%s -- %016lx: %16lu -- %s\n", target_type, target_top[j], target_tot[j], target_srcline[bestidx]); + } + } + + return target_ntop; +} int main(int argc, char **argv) { @@ -839,10 +1023,6 @@ int main(int argc, char **argv) { } //while drtrace - - - - //metrics gather_occ_avg /= gather_cnt; scatter_occ_avg /= scatter_cnt; @@ -852,6 +1032,7 @@ int main(int argc, char **argv) { //close files gzclose(fp_drtrace); + printf("DRTRACE STATS\n"); printf("DRTRACE LINES: %16lu\n", drtrace_lines); printf("OPCODES: %16lu\n", opcodes); @@ -873,125 +1054,22 @@ int main(int argc, char **argv) { //Must have symbol printf("\nSymbol table lookup for gathers..."); fflush(stdout); - gather_cnt = 0.0; - for (k = 0; k < NGS; k++) { - - if (gather_iaddrs[k] == 0) - break; - - translate_iaddr(binary, gather_srcline[k], gather_iaddrs[k]); - - if (startswith(gather_srcline[k], "?")) - gather_icnt[k] = 0; - - gather_cnt += gather_icnt[k]; - } - printf("done.\n"); - //printf("\nTOP GATHERS\n"); + gather_cnt = update_source_lines(gather_iaddrs, gather_srcline, gather_icnt, binary); //Get top gathers - gather_ntop = 0; - for (j = 0; j < NTOP; j++) { - - bestcnt = 0; - best_iaddr = 0; - bestidx = -1; - - for (k = 0; k < NGS; k++) { - - if (gather_icnt[k] == 0) - continue; + gather_ntop = get_top_target("GIADDR", gather_srcline, gather_icnt, gather_iaddrs, gather_top, gather_tot, gather_top_idx); - if (gather_iaddrs[k] == 0) { - break; - } - - if (gather_icnt[k] > bestcnt) { - bestcnt = gather_icnt[k]; - best_iaddr = gather_iaddrs[k]; - bestidx = k; - } - - } - - if (best_iaddr == 0) { - break; - - } else { - - gather_ntop++; - //printf("GIADDR -- %016lx: %16lu -- %s\n", - // gather_iaddrs[bestidx], gather_icnt[bestidx], gather_srcline[bestidx]); - - gather_top[j] = best_iaddr; - gather_top_idx[j] = bestidx; - gather_tot[j] = gather_icnt[bestidx]; - gather_icnt[bestidx] = 0; - - } - } //Find source lines - scatter_cnt = 0.0; - printf("Symbol table lookup for scatters..."); - fflush(stdout); - //Check it is not a library - for (k = 0; k < NGS; k++) { - - if (scatter_iaddrs[k] == 0) { - break; - } - translate_iaddr(binary, scatter_srcline[k], scatter_iaddrs[k]); - if (startswith(scatter_srcline[k], "?")) - scatter_icnt[k] = 0; - - scatter_cnt += scatter_icnt[k]; - } - printf("done.\n"); + scatter_cnt = update_source_lines(scatter_iaddrs, scatter_srcline, scatter_icnt, binary); //Get top scatters //printf("\nTOP SCATTERS\n"); - scatter_ntop = 0; - for (j = 0; j < NTOP; j++) { - - bestcnt = 0; - best_iaddr = 0; - bestidx = -1; - - for (k = 0; k < NGS; k++) { - - if (scatter_icnt[k] == 0) - continue; - - if (scatter_iaddrs[k] == 0) { - break; - } - - if (scatter_icnt[k] > bestcnt) { - bestcnt = scatter_icnt[k]; - best_iaddr = scatter_iaddrs[k]; - bestidx = k; - } - } + scatter_ntop = get_top_target("SIADDR", scatter_srcline, scatter_icnt, scatter_iaddrs, scatter_top, scatter_tot, scatter_top_idx); - if (best_iaddr == 0) { - break; - - } else { - - scatter_ntop++; - scatter_top[j] = best_iaddr; - scatter_top_idx[j] = bestidx; - scatter_tot[j] = scatter_icnt[bestidx]; - scatter_icnt[bestidx] = 0; - //printf("SIADDR -- %016lx: %16lu -- %s\n", - // scatter_top[j], scatter_tot[j], scatter_srcline[bestidx]); - } - } //Second Pass - //Open trace fp_drtrace = gzopen(argv[1], "hrb"); if (fp_drtrace == NULL) { @@ -999,92 +1077,8 @@ int main(int argc, char **argv) { exit(-1); } - mcnt = 0; - iret = 0; - p_drtrace = NULL; - int breakout = 0; - printf("\nSecond pass to fill gather / scatter subtraces\n"); - fflush(stdout); - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { - - //decode drtrace - drline = p_drtrace; - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - - //iaddr - iaddr = drline->addr; - - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { - - maddr = drline->addr / drline->size; - - if ((++mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } - - //gather ? - if (drline->type == 0x0) { - - for (i = 0; i < gather_ntop; i++) { - - //found it - if (iaddr == gather_top[i]) { - - if (gather_base[i] == 0) - gather_base[i] = maddr; - - //Add index - if (gather_offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; - } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_patterns[i][gather_offset[i]++] = (int64_t) (maddr - gather_base[i]); - - break; - } - } - - //scatter ? - } else { - - for (i = 0; i < scatter_ntop; i++) { - - //found it - if (iaddr == scatter_top[i]) { - - //set base - if (scatter_base[i] == 0) - scatter_base[i] = maddr; - - //Add index - if (scatter_offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; - } - scatter_patterns[i][scatter_offset[i]++] = (int64_t) (maddr - scatter_base[i]); - break; - } - } - } - - } //MEM - - p_drtrace++; - - } //while drtrace + second_pass(fp_drtrace, drtrace, p_drtrace, gather_ntop, scatter_ntop, gather_offset, scatter_offset, + gather_patterns, scatter_patterns, gather_base, scatter_base, gather_top, scatter_top); gzclose(fp_drtrace); @@ -1103,7 +1097,6 @@ int main(int argc, char **argv) { free(scatter_patterns[i]); } - return 0; } From cffaf7bc4876780bf9377ef65683446d35a219bb Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 29 Mar 2024 22:35:47 -0400 Subject: [PATCH 04/76] Bug fixes and support trace file name handling improvements (doesnt require compressed trace file to end in .gz so we can use symlinks etc). Got rid of compiler warnings. --- gs_patterns.c | 67 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/gs_patterns.c b/gs_patterns.c index 98a7ae1..92b1932 100644 --- a/gs_patterns.c +++ b/gs_patterns.c @@ -90,7 +90,7 @@ int endswith(const char *a, const char *b) { } //https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c -char *str_replace(char *orig, char *rep, char *with) { +const char *str_replace(const char *orig, const char *rep, const char *with) { char *result; // the return string char *ins; // the next insert point char *tmp; // varies @@ -114,7 +114,7 @@ char *str_replace(char *orig, char *rep, char *with) { len_with = strlen(with); // count the number of replacements needed - ins = orig; + ins = (char*)orig; for (count = 0; tmp = strstr(ins, rep); ++count) { ins = tmp + len_rep; } @@ -144,8 +144,8 @@ char *get_str(char *line, char *bparse, char *aparse) { char *sline; - sline = str_replace(line, bparse, ""); - sline = str_replace(sline, aparse, ""); + sline = (char*)str_replace(line, bparse, ""); + sline = (char*)str_replace(sline, aparse, ""); return sline; } @@ -161,7 +161,7 @@ int cnt_str(char *line, char c) { return cnt; } -void translate_iaddr(char *binary, char *source_line, addr_t iaddr) { +void translate_iaddr(const char *binary, char *source_line, addr_t iaddr) { int i = 0; int ntranslated = 0; @@ -248,13 +248,21 @@ void create_spatter_file( //Create spatter file FILE *fp, *fp2; char *json_name, *gs_info; - json_name = str_replace(trace_file_name, ".gz", ".json"); + json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); + if (strstr(json_name, ".json") == 0) { + strncat(json_name, ".json", strlen(".json"+1)); + } + fp = fopen(json_name, "w"); if (fp == NULL) { printf("ERROR: Could not open %s!\n", json_name); exit(-1); } - gs_info = str_replace(trace_file_name, ".gz", ".txt"); + gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); + if (strstr(gs_info, ".json") == 0) { + strncat(gs_info, ".txt", strlen(".txt")+1); + } + fp2 = fopen(gs_info, "w"); if (fp2 == NULL) { printf("ERROR: Could not open %s!\n", gs_info); @@ -294,7 +302,10 @@ void create_spatter_file( //create a binary file FILE *fp_bin; char *bin_name; - bin_name = str_replace(trace_file_name, ".gz", ".sbin"); + bin_name = (char*)str_replace(trace_file_name, ".gz", ".sbin"); + if (strstr(bin_name, ".sbin") == 0) { + strncat(bin_name, ".sbin", strlen(".sbin")+1); + } printf("%s\n", bin_name); fp_bin = fopen(bin_name, "w"); if (fp_bin == NULL) { @@ -302,11 +313,11 @@ void create_spatter_file( exit(-1); } - printf("GIADDR -- %p\n", gather_top[i]); + printf("GIADDR -- %p\n", (void*) gather_top[i]); printf("SRCLINE -- %s\n", gather_srcline[gather_top_idx[i]]); printf("GATHER %c -- %6.3f%c (512-bit chunks)\n", '%', 100.0 * (double) gather_tot[i] / gather_cnt, '%'); - printf("NDISTS -- %ld\n", gather_offset[i]); + printf("NDISTS -- %ld\n", (long int)gather_offset[i]); int64_t nlcnt = 0; for (j = 0; j < gather_offset[i]; j++) { @@ -358,7 +369,7 @@ void create_spatter_file( fprintf(fp, "], \"count\":1}"); fprintf(fp2, "%s,G,%ld,%6.3f\n", - gather_srcline[gather_top_idx[i]], gather_offset[i], + gather_srcline[gather_top_idx[i]], (long int)gather_offset[i], 100.0 * (double) gather_tot[i] / gather_cnt); } printf("***************************************************************************************\n\n"); @@ -393,7 +404,10 @@ void create_spatter_file( //create a binary file FILE *fp_bin; char *bin_name; - bin_name = str_replace(trace_file_name, ".gz", ".sbin"); + bin_name = (char*)str_replace(trace_file_name, ".gz", ".sbin"); + if (strstr(bin_name, ".sbin") == 0) { + strncat(bin_name, ".sbin", strlen(".sbin")+1); + } printf("%s\n", bin_name); fp_bin = fopen(bin_name, "w"); if (fp_bin == NULL) { @@ -401,11 +415,11 @@ void create_spatter_file( exit(-1); } - printf("SIADDR -- %p\n", scatter_top[i]); + printf("SIADDR -- %p\n", (void*)scatter_top[i]); printf("SRCLINE -- %s\n", scatter_srcline[scatter_top_idx[i]]); printf("SCATTER %c -- %6.3f%c (512-bit chunks)\n", '%', 100.0 * (double) scatter_tot[i] / scatter_cnt, '%'); - printf("NDISTS -- %ld\n", scatter_offset[i]); + printf("NDISTS -- %ld\n", (long int)scatter_offset[i]); int64_t nlcnt = 0; for (j = 0; j < scatter_offset[i]; j++) { @@ -455,7 +469,7 @@ void create_spatter_file( fprintf(fp, "], \"count\":1}"); fprintf(fp2, "%s,S,%ld,%6.3f\n", - scatter_srcline[scatter_top_idx[i]], scatter_offset[i], + scatter_srcline[scatter_top_idx[i]], (long int)scatter_offset[i], 100.0 * (double) scatter_tot[i] / scatter_cnt); } printf("***************************************************************************************\n\n"); @@ -477,39 +491,36 @@ void normalize_stats( int64_t** scatter_patterns // updated ) { - int i = 0; - int j = 0; - //Normalize int64_t smallest; - for (i = 0; i < gather_ntop; i++) { + for (int i = 0; i < gather_ntop; i++) { //Find smallest smallest = 0; - for (j = 0; j < gather_offset[i]; j++) { + for (int j = 0; j < gather_offset[i]; j++) { if (gather_patterns[i][j] < smallest) smallest = gather_patterns[i][j]; } smallest *= -1; //Normalize - for (j = 0; j < gather_offset[i]; j++) { + for (int j = 0; j < gather_offset[i]; j++) { gather_patterns[i][j] += smallest; } } - for (i = 0; i < scatter_ntop; i++) { + for (int i = 0; i < scatter_ntop; i++) { //Find smallest smallest = 0; - for (j = 0; j < scatter_offset[i]; j++) { + for (int j = 0; j < scatter_offset[i]; j++) { if (scatter_patterns[i][j] < smallest) smallest = scatter_patterns[i][j]; } smallest *= -1; //Normalize - for (j = 0; j < scatter_offset[i]; j++) { + for (int j = 0; j < scatter_offset[i]; j++) { scatter_patterns[i][j] += smallest; } } @@ -517,7 +528,7 @@ void normalize_stats( double update_source_lines( addr_t* target_iaddrs, - char** target_srcline, + char target_srcline[][1024], //was char** int64_t* target_icnt, // updated const char* binary_file_name) { @@ -1057,7 +1068,7 @@ int main(int argc, char **argv) { gather_cnt = update_source_lines(gather_iaddrs, gather_srcline, gather_icnt, binary); //Get top gathers - gather_ntop = get_top_target("GIADDR", gather_srcline, gather_icnt, gather_iaddrs, gather_top, gather_tot, gather_top_idx); + gather_ntop = get_top_target("GIADDR", (char**) gather_srcline, gather_icnt, gather_iaddrs, gather_top, gather_tot, gather_top_idx); //Find source lines @@ -1066,7 +1077,7 @@ int main(int argc, char **argv) { //Get top scatters //printf("\nTOP SCATTERS\n"); - scatter_ntop = get_top_target("SIADDR", scatter_srcline, scatter_icnt, scatter_iaddrs, scatter_top, scatter_tot, scatter_top_idx); + scatter_ntop = get_top_target("SIADDR", (char**)scatter_srcline, scatter_icnt, scatter_iaddrs, scatter_top, scatter_tot, scatter_top_idx); //Second Pass @@ -1089,7 +1100,7 @@ int main(int argc, char **argv) { create_spatter_file(argv[1], gather_ntop, scatter_ntop, gather_offset, scatter_offset, gather_patterns, scatter_patterns, gather_tot, scatter_tot, gather_top, gather_top_idx, scatter_top, scatter_top_idx, - gather_srcline, scatter_srcline, gather_cnt, scatter_cnt + (char**)gather_srcline, (char**)scatter_srcline, gather_cnt, scatter_cnt ); for (i = 0; i < NTOP; i++) { From 156fe23e4576ab187dcbab41ea56775bde4112cb Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 30 Mar 2024 17:08:00 -0400 Subject: [PATCH 05/76] Fix sourceline lookup, json filename bug, added max line length define -- Checkpoint prior to c+++ -- --- gs_patterns.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/gs_patterns.c b/gs_patterns.c index 92b1932..1248964 100644 --- a/gs_patterns.c +++ b/gs_patterns.c @@ -48,6 +48,8 @@ #define ADDREND (0xFFFFFFFFFFFFFFFFUL) #define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) +#define MAX_LINE_LENGTH 1024 + typedef uintptr_t addr_t; //FROM DR SOURCE @@ -165,8 +167,8 @@ void translate_iaddr(const char *binary, char *source_line, addr_t iaddr) { int i = 0; int ntranslated = 0; - char path[1024]; - char cmd[1024]; + char path[MAX_LINE_LENGTH]; + char cmd[MAX_LINE_LENGTH]; FILE *fp; sprintf(cmd, "addr2line -e %s 0x%lx", binary, iaddr); @@ -227,8 +229,8 @@ void create_spatter_file( addr_t* scatter_top, addr_t* scatter_top_idx, /* */ - char** gather_srcline, - char** scatter_srcline, + char gather_srcline[][MAX_LINE_LENGTH], + char scatter_srcline[][MAX_LINE_LENGTH], double gather_cnt, double scatter_cnt ) @@ -250,7 +252,7 @@ void create_spatter_file( char *json_name, *gs_info; json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); if (strstr(json_name, ".json") == 0) { - strncat(json_name, ".json", strlen(".json"+1)); + strncat(json_name, ".json", strlen(".json")+1); } fp = fopen(json_name, "w"); @@ -528,7 +530,7 @@ void normalize_stats( double update_source_lines( addr_t* target_iaddrs, - char target_srcline[][1024], //was char** + char target_srcline[][MAX_LINE_LENGTH], //was char** int64_t* target_icnt, // updated const char* binary_file_name) { @@ -557,8 +559,8 @@ void second_pass(gzFile fp_drtrace, trace_entry_t* p_drtrace, int gather_ntop, int scatter_ntop, - int* gather_offset, - int* scatter_offset, + int* gather_offset, // updated + int* scatter_offset, // updated int64_t** gather_patterns, // updated int64_t** scatter_patterns, // updated addr_t* gather_base, // updated @@ -726,7 +728,7 @@ int main(int argc, char **argv) { int64_t ngs = 0; char *eptr; char binary[1024]; - char srcline[1024]; + char srcline[MAX_LINE_LENGTH]; //dtrace vars int64_t drtrace_lines = 0; @@ -765,11 +767,11 @@ int main(int argc, char **argv) { static int64_t w_cnt[2][IWINDOW]; //First pass to find top gather / scatters - static char gather_srcline[NGS][1024]; + static char gather_srcline[NGS][MAX_LINE_LENGTH]; static addr_t gather_iaddrs[NGS] = {0}; static int64_t gather_icnt[NGS] = {0}; static int64_t gather_occ[NGS] = {0}; - static char scatter_srcline[NGS][1024]; + static char scatter_srcline[NGS][MAX_LINE_LENGTH]; static addr_t scatter_iaddrs[NGS] = {0}; static int64_t scatter_icnt[NGS] = {0}; static int64_t scatter_occ[NGS] = {0}; @@ -1100,7 +1102,7 @@ int main(int argc, char **argv) { create_spatter_file(argv[1], gather_ntop, scatter_ntop, gather_offset, scatter_offset, gather_patterns, scatter_patterns, gather_tot, scatter_tot, gather_top, gather_top_idx, scatter_top, scatter_top_idx, - (char**)gather_srcline, (char**)scatter_srcline, gather_cnt, scatter_cnt + gather_srcline, scatter_srcline, gather_cnt, scatter_cnt ); for (i = 0; i < NTOP; i++) { From 1f93c2fdfb159a901b850cbbcfd1ac5836f08ea4 Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 30 Mar 2024 17:18:37 -0400 Subject: [PATCH 06/76] Migrated to C++ --- CMakeLists.txt | 14 +++----------- gs_patterns.c => gs_patterns.cpp | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) rename gs_patterns.c => gs_patterns.cpp (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c36ba9..bbc5bc5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,16 +2,8 @@ cmake_minimum_required(VERSION 3.1...3.25) set (CMAKE_VERBOSE_MAKEFILE "1") -project( - gs_patterns - VERSION 1.0 - LANGUAGES C) +project( gs_patterns VERSION 1.0 LANGUAGES CXX) -add_executable(gs_patterns gs_patterns.c) +add_executable(gs_patterns gs_patterns.cpp) -#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lz") -#set(CMAKE_LINK_LIBRARY_FLAG "-lz -lm") -#set(CMAKE_C_LINK_LIBRARY_FLAG "-lm -lz") -set(CMAKE_C_STANDARD_LIBRARIES "-lm -lz ${CMAKE_C_STANDARD_LIBRARIES}") - -#set (CMAKE_C_FLAGS "-lm -lz") +set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gs_patterns.c b/gs_patterns.cpp similarity index 99% rename from gs_patterns.c rename to gs_patterns.cpp index 1248964..384ccc2 100644 --- a/gs_patterns.c +++ b/gs_patterns.cpp @@ -121,7 +121,7 @@ const char *str_replace(const char *orig, const char *rep, const char *with) { ins = tmp + len_rep; } - tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1); + tmp = result = (char*)malloc(strlen(orig) + (len_with - len_rep) * count + 1); if (!result) return NULL; @@ -132,7 +132,7 @@ const char *str_replace(const char *orig, const char *rep, const char *with) { // ins points to the next occurrence of rep in orig // orig points to the remainder of orig after "end of rep" while (count--) { - ins = strstr(orig, rep); + ins = (char*)strstr(orig, rep); len_front = ins - orig; tmp = strncpy(tmp, orig, len_front) + len_front; tmp = strcpy(tmp, with) + len_with; From 35c1c6628b2c32b57a0f026bbf5c66f708cf4052 Mon Sep 17 00:00:00 2001 From: christopher Date: Sun, 31 Mar 2024 17:16:19 -0400 Subject: [PATCH 07/76] introduced Metrics class, partially used. tests pass. --- gs_patterns.cpp | 806 +++++++++++++++++++++--------------------------- 1 file changed, 359 insertions(+), 447 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 384ccc2..5290328 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -9,6 +9,10 @@ #include #include +#include +#include +#include + #define MAX(X, Y) (((X) < (Y)) ? Y : X) #define MIN(X, Y) (((X) > (Y)) ? Y : X) #define ABS(X) (((X) < 0) ? (-1) * (X) : X) @@ -64,6 +68,64 @@ struct _trace_entry_t { } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; +class Metrics +{ +public: + typedef enum { GATHER=0, SCATTER } metrics_type; + + Metrics(metrics_type mType) : _mType(mType) + { + /// TODO: Convert to new/free + for (int j = 0; j < NTOP; j++) { + patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); + if (patterns[j] == NULL) { + printf("ERROR: Could not allocate gather_patterns!\n"); + throw std::runtime_error("Could not allocate patterns for " + type_as_string()); //exit(-1); + } + } + } + + ~Metrics() + { + /// TODO: Convert to new/free + for (int i = 0; i < NTOP; i++) { + free(patterns[i]); + } + } + + Metrics(const Metrics &) = delete; + Metrics & operator=(const Metrics & right) = delete; + + std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } + std::string getName() { return !_mType ? "Gather" : "Scatter"; } + std::string getShortName() { return !_mType ? "G" : "S"; } + + auto get_srcline() { return srcline[_mType]; } + +//private: + int ntop = 0; + double cnt = 0.0; + int offset[NTOP] = {0}; + + addr_t tot[NTOP] = {0}; + addr_t top[NTOP] = {0}; + addr_t top_idx[NTOP] = {0}; + + int64_t* patterns[NTOP] = {0}; + +private: + static char srcline[2][NGS][MAX_LINE_LENGTH]; // was static (may move out and have 1 per type) + + metrics_type _mType; +}; + +/* +class Address_Instr +{ +public: +}; +*/ + static inline int popcount(uint64_t x) { int c; @@ -213,40 +275,10 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) return 1; } -void create_spatter_file( - const char* trace_file_name, - int gather_ntop, - int scatter_ntop, - int* gather_offset, - int* scatter_offset, - int64_t** gather_patterns, - int64_t** scatter_patterns, - /* */ - addr_t* gather_tot, - addr_t* scatter_tot, - addr_t* gather_top, - addr_t* gather_top_idx, - addr_t* scatter_top, - addr_t* scatter_top_idx, - /* */ - char gather_srcline[][MAX_LINE_LENGTH], - char scatter_srcline[][MAX_LINE_LENGTH], - double gather_cnt, - double scatter_cnt -) -{ - int i = 0; - int j = 0; - - //Create stride histogram and create spatter - int sidx; - int first_spatter = 1; - int unique_strides; - int64_t idx, pidx; - int64_t n_stride[1027]; - double outbounds; - //print +void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics); +void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) +{ //Create spatter file FILE *fp, *fp2; char *json_name, *gs_info; @@ -275,118 +307,39 @@ void create_spatter_file( fprintf(fp, "[ "); fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - printf("\n"); - for (i = 0; i < gather_ntop; i++) { - printf("***************************************************************************************\n"); - - unique_strides = 0; - for (j = 0; j < 1027; j++) - n_stride[j] = 0; - - for (j = 1; j < gather_offset[i]; j++) { - sidx = gather_patterns[i][j] - gather_patterns[i][j - 1] + 513; - sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > 1025) ? 1026 : sidx; - n_stride[sidx]++; - } - - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } - } - - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) gather_offset[i]; + create_metrics_file(fp, fp2, trace_file_name, gather_metrics); - //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (1) { + create_metrics_file(fp, fp2, trace_file_name, scatter_metrics); - //create a binary file - FILE *fp_bin; - char *bin_name; - bin_name = (char*)str_replace(trace_file_name, ".gz", ".sbin"); - if (strstr(bin_name, ".sbin") == 0) { - strncat(bin_name, ".sbin", strlen(".sbin")+1); - } - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { - printf("ERROR: Could not open %s!\n", bin_name); - exit(-1); - } - - printf("GIADDR -- %p\n", (void*) gather_top[i]); - printf("SRCLINE -- %s\n", gather_srcline[gather_top_idx[i]]); - printf("GATHER %c -- %6.3f%c (512-bit chunks)\n", - '%', 100.0 * (double) gather_tot[i] / gather_cnt, '%'); - printf("NDISTS -- %ld\n", (long int)gather_offset[i]); - - int64_t nlcnt = 0; - for (j = 0; j < gather_offset[i]; j++) { - - if (j < 39) { - printf("%10ld ", gather_patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); - - } else if (j >= (gather_offset[i] - 39)) { - printf("%10ld ", gather_patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); - - } else if (j == 39) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - if (j == 0) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j - 513, n_stride[j]); - } - } - - if (first_spatter) { - first_spatter = 0; - fprintf(fp, " {\"kernel\":\"Gather\", \"pattern\":["); - - } else { - fprintf(fp, ",\n {\"kernel\":\"Gather\", \"pattern\":["); - } - - fwrite(gather_patterns[i], sizeof(uint64_t), gather_offset[i], fp_bin); - fclose(fp_bin); - - for (j = 0; j < gather_offset[i] - 1; j++) - fprintf(fp, "%ld,", gather_patterns[i][j]); - fprintf(fp, "%ld", gather_patterns[i][gather_offset[i] - 1]); + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); +} - fprintf(fp, "], \"count\":1}"); +void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics) +{ + int i = 0; + int j = 0; - fprintf(fp2, "%s,G,%ld,%6.3f\n", - gather_srcline[gather_top_idx[i]], (long int)gather_offset[i], - 100.0 * (double) gather_tot[i] / gather_cnt); - } - printf("***************************************************************************************\n\n"); - } + //Create stride histogram and create spatter + int sidx; + static bool first_spatter = true; + int unique_strides; + int64_t idx, pidx; + int64_t n_stride[1027]; + double outbounds; printf("\n"); - for (i = 0; i < scatter_ntop; i++) { + for (i = 0; i < target_metrics.ntop; i++) { printf("***************************************************************************************\n"); unique_strides = 0; for (j = 0; j < 1027; j++) n_stride[j] = 0; - for (j = 1; j < scatter_offset[i]; j++) { - sidx = scatter_patterns[i][j] - scatter_patterns[i][j - 1] + 513; + for (j = 1; j < target_metrics.offset[i]; j++) { + sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513; sidx = (sidx < 1) ? 0 : sidx; sidx = (sidx > 1025) ? 1026 : sidx; n_stride[sidx]++; @@ -398,9 +351,9 @@ void create_spatter_file( } } - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) scatter_offset[i]; + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; - //if (((unique_strides > NSTRIDES) | (outbounds > OUTTHRESH) ) && (scatter_offset[i] > USTRIDES) ){ + //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ if (1) { //create a binary file @@ -417,23 +370,23 @@ void create_spatter_file( exit(-1); } - printf("SIADDR -- %p\n", (void*)scatter_top[i]); - printf("SRCLINE -- %s\n", scatter_srcline[scatter_top_idx[i]]); - printf("SCATTER %c -- %6.3f%c (512-bit chunks)\n", - '%', 100.0 * (double) scatter_tot[i] / scatter_cnt, '%'); - printf("NDISTS -- %ld\n", (long int)scatter_offset[i]); + printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); + printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); + printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(), + '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%'); + printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]); int64_t nlcnt = 0; - for (j = 0; j < scatter_offset[i]; j++) { + for (j = 0; j < target_metrics.offset[i]; j++) { if (j < 39) { - printf("%10ld ", scatter_patterns[i][j]); + printf("%10ld ", target_metrics.patterns[i][j]); fflush(stdout); if ((++nlcnt % 13) == 0) printf("\n"); - } else if (j >= (scatter_offset[i] - 39)) { - printf("%10ld ", scatter_patterns[i][j]); + } else if (j >= (target_metrics.offset[i] - 39)) { + printf("%10ld ", target_metrics.patterns[i][j]); fflush(stdout); if ((++nlcnt % 13) == 0) printf("\n"); @@ -443,6 +396,7 @@ void create_spatter_file( } printf("\n"); printf("DIST HISTOGRAM --\n"); + for (j = 0; j < 1027; j++) { if (n_stride[j] > 0) { if (j == 0) @@ -455,75 +409,47 @@ void create_spatter_file( } if (first_spatter) { - first_spatter = 0; - fprintf(fp, " {\"kernel\":\"Scatter\", \"pattern\":["); - + first_spatter = false; + fprintf(fp, " {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); } else { - fprintf(fp, ", {\"kernel\":\"Scatter\", \"pattern\":["); + fprintf(fp, ",\n {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); } - fwrite(scatter_patterns[i], sizeof(uint64_t), scatter_offset[i], fp_bin); + fwrite(target_metrics.patterns[i], sizeof(uint64_t), target_metrics.offset[i], fp_bin); fclose(fp_bin); - for (j = 0; j < scatter_offset[i] - 1; j++) - fprintf(fp, "%ld,", scatter_patterns[i][j]); - fprintf(fp, "%ld", scatter_patterns[i][scatter_offset[i] - 1]); + for (j = 0; j < target_metrics.offset[i] - 1; j++) + fprintf(fp, "%ld,", target_metrics.patterns[i][j]); + fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]); fprintf(fp, "], \"count\":1}"); - fprintf(fp2, "%s,S,%ld,%6.3f\n", - scatter_srcline[scatter_top_idx[i]], (long int)scatter_offset[i], - 100.0 * (double) scatter_tot[i] / scatter_cnt); + fprintf(fp2, "%s,%s,%ld,%6.3f\n", + target_metrics.get_srcline()[target_metrics.top_idx[i]], target_metrics.getShortName().c_str(), + (long int)target_metrics.offset[i], + 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt); } printf("***************************************************************************************\n\n"); } - - //Footer - fprintf(fp, " ]"); - fclose(fp); - fclose(fp2); - } -void normalize_stats( - int gather_ntop, - int scatter_ntop, - int* gather_offset, - int* scatter_offset, - int64_t** gather_patterns, // updated - int64_t** scatter_patterns // updated -) +void normalize_stats(Metrics & target_metrics) { //Normalize int64_t smallest; - for (int i = 0; i < gather_ntop; i++) { + for (int i = 0; i < target_metrics.ntop; i++) { //Find smallest smallest = 0; - for (int j = 0; j < gather_offset[i]; j++) { - if (gather_patterns[i][j] < smallest) - smallest = gather_patterns[i][j]; + for (int j = 0; j < target_metrics.offset[i]; j++) { + if (target_metrics.patterns[i][j] < smallest) + smallest = target_metrics.patterns[i][j]; } - smallest *= -1; - //Normalize - for (int j = 0; j < gather_offset[i]; j++) { - gather_patterns[i][j] += smallest; - } - } - - for (int i = 0; i < scatter_ntop; i++) { - - //Find smallest - smallest = 0; - for (int j = 0; j < scatter_offset[i]; j++) { - if (scatter_patterns[i][j] < smallest) - smallest = scatter_patterns[i][j]; - } smallest *= -1; //Normalize - for (int j = 0; j < scatter_offset[i]; j++) { - scatter_patterns[i][j] += smallest; + for (int j = 0; j < target_metrics.offset[i]; j++) { + target_metrics.patterns[i][j] += smallest; } } } @@ -554,20 +480,8 @@ double update_source_lines( return scatter_cnt; } -void second_pass(gzFile fp_drtrace, - trace_entry_t* drtrace, - trace_entry_t* p_drtrace, - int gather_ntop, - int scatter_ntop, - int* gather_offset, // updated - int* scatter_offset, // updated - int64_t** gather_patterns, // updated - int64_t** scatter_patterns, // updated - addr_t* gather_base, // updated - addr_t* scatter_base, // updated - addr_t* gather_top, - addr_t* scatter_top -) +void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drtrace, + Metrics & gather_metrics, Metrics & scatter_metrics) { uint64_t mcnt = 0; int iret = 0; @@ -576,6 +490,9 @@ void second_pass(gzFile fp_drtrace, int64_t maddr; int i = 0; + static addr_t gather_base[NTOP] = {0}; + static addr_t scatter_base[NTOP] = {0}; + p_drtrace = NULL; int breakout = 0; printf("\nSecond pass to fill gather / scatter subtraces\n"); @@ -612,21 +529,21 @@ void second_pass(gzFile fp_drtrace, //gather ? if (drline->type == 0x0) { - for (i = 0; i < gather_ntop; i++) { + for (i = 0; i < gather_metrics.ntop; i++) { //found it - if (iaddr == gather_top[i]) { + if (iaddr == gather_metrics.top[i]) { if (gather_base[i] == 0) gather_base[i] = maddr; //Add index - if (gather_offset[i] >= PSIZE) { + if (gather_metrics.offset[i] >= PSIZE) { printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); breakout = 1; } //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_patterns[i][gather_offset[i]++] = (int64_t) (maddr - gather_base[i]); + gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); break; } @@ -635,21 +552,21 @@ void second_pass(gzFile fp_drtrace, //scatter ? } else { - for (i = 0; i < scatter_ntop; i++) { + for (i = 0; i < scatter_metrics.ntop; i++) { //found it - if (iaddr == scatter_top[i]) { + if (iaddr == scatter_metrics.top[i]) { //set base if (scatter_base[i] == 0) scatter_base[i] = maddr; //Add index - if (scatter_offset[i] >= PSIZE) { + if (scatter_metrics.offset[i] >= PSIZE) { printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); breakout = 1; } - scatter_patterns[i][scatter_offset[i]++] = (int64_t) (maddr - scatter_base[i]); + scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); break; } } @@ -713,6 +630,8 @@ int get_top_target( return target_ntop; } +char Metrics::srcline[2][NGS][MAX_LINE_LENGTH]; + int main(int argc, char **argv) { //generic @@ -750,8 +669,8 @@ int main(int argc, char **argv) { int64_t mcl; int64_t gather_bytes_hist[100] = {0}; int64_t scatter_bytes_hist[100] = {0}; - double gather_cnt = 0.0; - double scatter_cnt = 0.0; + ///double gather_cnt = 0.0; + ///double scatter_cnt = 0.0; double other_cnt = 0.0; double gather_score = 0.0; double gather_occ_avg = 0.0; @@ -767,11 +686,11 @@ int main(int argc, char **argv) { static int64_t w_cnt[2][IWINDOW]; //First pass to find top gather / scatters - static char gather_srcline[NGS][MAX_LINE_LENGTH]; + ///static char gather_srcline[NGS][MAX_LINE_LENGTH]; static addr_t gather_iaddrs[NGS] = {0}; static int64_t gather_icnt[NGS] = {0}; static int64_t gather_occ[NGS] = {0}; - static char scatter_srcline[NGS][MAX_LINE_LENGTH]; + ///static char scatter_srcline[NGS][MAX_LINE_LENGTH]; static addr_t scatter_iaddrs[NGS] = {0}; static int64_t scatter_icnt[NGS] = {0}; static int64_t scatter_occ[NGS] = {0}; @@ -786,32 +705,16 @@ int main(int argc, char **argv) { static int scatter_offset[NTOP] = {0}; static addr_t best_iaddr; - static addr_t gather_tot[NTOP] = {0}; - static addr_t scatter_tot[NTOP] = {0}; - static addr_t gather_top[NTOP] = {0}; - static addr_t gather_top_idx[NTOP] = {0}; - static addr_t scatter_top[NTOP] = {0}; - static addr_t scatter_top_idx[NTOP] = {0}; + ///static addr_t gather_tot[NTOP] = {0}; + ///static addr_t scatter_tot[NTOP] = {0}; + ///static addr_t gather_top[NTOP] = {0}; + ///static addr_t gather_top_idx[NTOP] = {0}; + ///static addr_t scatter_top[NTOP] = {0}; + ///static addr_t scatter_top_idx[NTOP] = {0}; static addr_t gather_base[NTOP] = {0}; static addr_t scatter_base[NTOP] = {0}; - static int64_t *gather_patterns[NTOP] = {0}; - static int64_t *scatter_patterns[NTOP] = {0}; - - for (j = 0; j < NTOP; j++) { - gather_patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); - if (gather_patterns[j] == NULL) { - printf("ERROR: Could not allocate gather_patterns!\n"); - exit(-1); - } - } - - for (j = 0; j < NTOP; j++) { - scatter_patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); - if (scatter_patterns[j] == NULL) { - printf("ERROR: Could not allocate scatter_patterns!\n"); - exit(-1); - } - } + //static int64_t *gather_patterns[NTOP] = {0}; + //static int64_t *scatter_patterns[NTOP] = {0}; if (argc == 3) { @@ -829,285 +732,294 @@ int main(int argc, char **argv) { exit(-1); } - //init window arrays - for (w = 0; w < 2; w++) { - for (i = 0; i < IWINDOW; i++) { - w_iaddrs[w][i] = -1; - w_bytes[w][i] = 0; - w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - w_maddr[w][i][j] = -1; - } - } + try { - uint64_t mcnt = 0; - uint64_t unique_iaddrs = 0; - int unsynced = 0; - uint64_t unsync_cnt = 0; - addr_t ciaddr; + //Metrics gather_metrics(Metrics::GATHER); + //Metrics scatter_metrics(Metrics::SCATTER); - printf("First pass to find top gather / scatter iaddresses\n"); - fflush(stdout); + Metrics* g = new Metrics(Metrics::GATHER); + Metrics* s = new Metrics(Metrics::SCATTER); + Metrics & gather_metrics = *g; + Metrics & scatter_metrics = *s; - //read dr trace entries instrs - //printf("%16s %16s %16s %16s %16s %16s\n", "iaddr", "rw", "byte", "bytes", "cnt", "maddr"); - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { + //init window arrays + for (w = 0; w < 2; w++) { + for (i = 0; i < IWINDOW; i++) { + w_iaddrs[w][i] = -1; + w_bytes[w][i] = 0; + w_cnt[w][i] = 0; + for (j = 0; j < VBYTES; j++) + w_maddr[w][i][j] = -1; + } + } - //decode drtrace - drline = p_drtrace; + uint64_t mcnt = 0; + uint64_t unique_iaddrs = 0; + int unsynced = 0; + uint64_t unsync_cnt = 0; + addr_t ciaddr; - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + printf("First pass to find top gather / scatter iaddresses\n"); + fflush(stdout); - //iaddr - iaddr = drline->addr; + //read dr trace entries instrs + //printf("%16s %16s %16s %16s %16s %16s\n", "iaddr", "rw", "byte", "bytes", "cnt", "maddr"); + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { - //nops - opcodes++; - did_opcode = 1; + //decode drtrace + drline = p_drtrace; - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - w_rw_idx = drline->type; + //iaddr + iaddr = drline->addr; - //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iaddr, drline->addr, drline->addr % 64, drline->size); + //nops + opcodes++; + did_opcode = 1; - if ((++mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { - //is iaddr in window - w_idx = -1; - for (i = 0; i < IWINDOW; i++) { + w_rw_idx = drline->type; - //new iaddr - if (w_iaddrs[w_rw_idx][i] == -1) { - w_idx = i; - break; + //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", + // iaddr, drline->addr, drline->addr % 64, drline->size); - //iaddr exists - } else if (w_iaddrs[w_rw_idx][i] == iaddr) { - w_idx = i; + if ((++mcnt % PERSAMPLE) == 0) { + #if SAMPLE break; + #endif + printf("."); + fflush(stdout); } - } - //new window - if ((w_idx == -1) || (w_bytes[w_rw_idx][w_idx] >= VBYTES) || - (w_cnt[w_rw_idx][w_idx] >= VBYTES)) { + //is iaddr in window + w_idx = -1; + for (i = 0; i < IWINDOW; i++) { + + //new iaddr + if (w_iaddrs[w_rw_idx][i] == -1) { + w_idx = i; + break; - /***************************/ - //do analysis - /***************************/ - //i = each window - for (w = 0; w < 2; w++) { // 2 + //iaddr exists + } else if (w_iaddrs[w_rw_idx][i] == iaddr) { + w_idx = i; + break; + } + } - for (i = 0; i < IWINDOW; i++) { // 1024 + //new window + if ((w_idx == -1) || (w_bytes[w_rw_idx][w_idx] >= VBYTES) || + (w_cnt[w_rw_idx][w_idx] >= VBYTES)) { - if (w_iaddrs[w][i] == -1) - break; + /***************************/ + //do analysis + /***************************/ + //i = each window + for (w = 0; w < 2; w++) { // 2 - byte = w_bytes[w][i] / w_cnt[w][i]; + for (i = 0; i < IWINDOW; i++) { // 1024 - //First pass - //Determine - //gather/scatter? - gs = -1; - for (j = 0; j < w_cnt[w][i]; j++) { + if (w_iaddrs[w][i] == -1) + break; - //address and cl - maddr = w_maddr[w][i][j]; - assert(maddr > -1); + byte = w_bytes[w][i] / w_cnt[w][i]; - //previous addr - if (j == 0) - maddr_prev = maddr - 1; + //First pass + //Determine + //gather/scatter? + gs = -1; + for (j = 0; j < w_cnt[w][i]; j++) { - //gather / scatter - if (maddr != maddr_prev) { - if ((gs == -1) && (abs(maddr - maddr_prev) > 1)) - gs = w; - } - maddr_prev = maddr; - } + //address and cl + maddr = w_maddr[w][i][j]; + assert(maddr > -1); - for (j = 0; j < w_cnt[w][i]; j++) { + //previous addr + if (j == 0) + maddr_prev = maddr - 1; - if (gs == -1) { - other_cnt++; - continue; + //gather / scatter + if (maddr != maddr_prev) { + if ((gs == -1) && (abs(maddr - maddr_prev) > 1)) + gs = w; + } + maddr_prev = maddr; } - } - - if (gs == 0) { // GATHER - gather_occ_avg += w_cnt[w][i]; - gather_cnt += 1.0; + for (j = 0; j < w_cnt[w][i]; j++) { - for (k = 0; k < NGS; k++) { - if (gather_iaddrs[k] == 0) { - gather_iaddrs[k] = w_iaddrs[w][i]; - gather_icnt[k]++; - gather_occ[k] += w_cnt[w][i]; - break; + if (gs == -1) { + other_cnt++; + continue; } + } - if (gather_iaddrs[k] == w_iaddrs[w][i]) { - gather_icnt[k]++; - gather_occ[k] += w_cnt[w][i]; - break; - } + if (gs == 0) { // GATHER - } + gather_occ_avg += w_cnt[w][i]; + gather_metrics.cnt += 1.0; - } else if (gs == 1) { // SCATTER + for (k = 0; k < NGS; k++) { + if (gather_iaddrs[k] == 0) { + gather_iaddrs[k] = w_iaddrs[w][i]; + gather_icnt[k]++; + gather_occ[k] += w_cnt[w][i]; + break; + } - scatter_occ_avg += w_cnt[w][i]; - scatter_cnt += 1.0; + if (gather_iaddrs[k] == w_iaddrs[w][i]) { + gather_icnt[k]++; + gather_occ[k] += w_cnt[w][i]; + break; + } - for (k = 0; k < NGS; k++) { - if (scatter_iaddrs[k] == 0) { - scatter_iaddrs[k] = w_iaddrs[w][i]; - scatter_icnt[k]++; - scatter_occ[k] += w_cnt[w][i]; - break; } - if (scatter_iaddrs[k] == w_iaddrs[w][i]) { - scatter_icnt[k]++; - scatter_occ[k] += w_cnt[w][i]; - break; + } else if (gs == 1) { // SCATTER + + scatter_occ_avg += w_cnt[w][i]; + scatter_metrics.cnt += 1.0; + + for (k = 0; k < NGS; k++) { + if (scatter_iaddrs[k] == 0) { + scatter_iaddrs[k] = w_iaddrs[w][i]; + scatter_icnt[k]++; + scatter_occ[k] += w_cnt[w][i]; + break; + } + + if (scatter_iaddrs[k] == w_iaddrs[w][i]) { + scatter_icnt[k]++; + scatter_occ[k] += w_cnt[w][i]; + break; + } } } - } - } //WINDOW i + } //WINDOW i - w_idx = 0; + w_idx = 0; - //reset windows - for (i = 0; i < IWINDOW; i++) { - w_iaddrs[w][i] = -1; - w_bytes[w][i] = 0; - w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - w_maddr[w][i][j] = -1; - } - } // rw w - } //analysis + //reset windows + for (i = 0; i < IWINDOW; i++) { + w_iaddrs[w][i] = -1; + w_bytes[w][i] = 0; + w_cnt[w][i] = 0; + for (j = 0; j < VBYTES; j++) + w_maddr[w][i][j] = -1; + } + } // rw w + } //analysis - //Set window values - w_iaddrs[w_rw_idx][w_idx] = iaddr; - w_maddr[w_rw_idx][w_idx][w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; - w_bytes[w_rw_idx][w_idx] += drline->size; + //Set window values + w_iaddrs[w_rw_idx][w_idx] = iaddr; + w_maddr[w_rw_idx][w_idx][w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; + w_bytes[w_rw_idx][w_idx] += drline->size; - //num access per iaddr in loop - w_cnt[w_rw_idx][w_idx]++; + //num access per iaddr in loop + w_cnt[w_rw_idx][w_idx]++; - if (did_opcode) { + if (did_opcode) { - opcodes_mem++; - addrs++; - did_opcode = 0; + opcodes_mem++; + addrs++; + did_opcode = 0; + } else { + addrs++; + } + + /***********************/ + /** SOMETHING ELSE **/ + /***********************/ } else { - addrs++; + other++; } - /***********************/ - /** SOMETHING ELSE **/ - /***********************/ - } else { - other++; - } - - p_drtrace++; - drtrace_lines++; - - } //while drtrace + p_drtrace++; + drtrace_lines++; + } //while drtrace - //metrics - gather_occ_avg /= gather_cnt; - scatter_occ_avg /= scatter_cnt; - printf("\n RESULTS \n"); + //metrics + gather_occ_avg /= gather_metrics.cnt; + scatter_occ_avg /= scatter_metrics.cnt; - //close files - gzclose(fp_drtrace); + printf("\n RESULTS \n"); + //close files + gzclose(fp_drtrace); - printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", drtrace_lines); - printf("OPCODES: %16lu\n", opcodes); - printf("MEMOPCODES: %16lu\n", opcodes_mem); - printf("LOAD/STORES: %16lu\n", addrs); - printf("OTHER: %16lu\n", other); - printf("\n"); + printf("DRTRACE STATS\n"); + printf("DRTRACE LINES: %16lu\n", drtrace_lines); + printf("OPCODES: %16lu\n", opcodes); + printf("MEMOPCODES: %16lu\n", opcodes_mem); + printf("LOAD/STORES: %16lu\n", addrs); + printf("OTHER: %16lu\n", other); - printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(gather_cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(other_cnt) / log(2.0)); + printf("\n"); - //Find source lines + printf("GATHER/SCATTER STATS: \n"); + printf("LOADS per GATHER: %16.3f\n", gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(other_cnt) / log(2.0)); - //Must have symbol - printf("\nSymbol table lookup for gathers..."); - fflush(stdout); - gather_cnt = update_source_lines(gather_iaddrs, gather_srcline, gather_icnt, binary); + //Find source lines - //Get top gathers - gather_ntop = get_top_target("GIADDR", (char**) gather_srcline, gather_icnt, gather_iaddrs, gather_top, gather_tot, gather_top_idx); + //Must have symbol + printf("\nSymbol table lookup for gathers..."); + fflush(stdout); + gather_metrics.cnt = update_source_lines(gather_iaddrs, gather_metrics.get_srcline(), gather_icnt, binary); + //Get top gathers + gather_metrics.ntop = get_top_target("GIADDR", (char**) gather_metrics.get_srcline(), gather_icnt, gather_iaddrs, + gather_metrics.top, gather_metrics.tot, gather_metrics.top_idx); - //Find source lines - printf("Symbol table lookup for scatters..."); - scatter_cnt = update_source_lines(scatter_iaddrs, scatter_srcline, scatter_icnt, binary); - //Get top scatters - //printf("\nTOP SCATTERS\n"); - scatter_ntop = get_top_target("SIADDR", (char**)scatter_srcline, scatter_icnt, scatter_iaddrs, scatter_top, scatter_tot, scatter_top_idx); + //Find source lines + printf("Symbol table lookup for scatters..."); + scatter_metrics.cnt = update_source_lines(scatter_iaddrs, scatter_metrics.get_srcline(), scatter_icnt, binary); + //Get top scatters + //printf("\nTOP SCATTERS\n"); + scatter_metrics.ntop = get_top_target("SIADDR", (char**) scatter_metrics.get_srcline(), scatter_icnt, scatter_iaddrs, + scatter_metrics.top, scatter_metrics.tot, scatter_metrics.top_idx); - //Second Pass - //Open trace - fp_drtrace = gzopen(argv[1], "hrb"); - if (fp_drtrace == NULL) { - printf("ERROR: Could not open %s!\n", argv[1]); - exit(-1); - } + //Second Pass + //Open trace + fp_drtrace = gzopen(argv[1], "hrb"); + if (fp_drtrace == NULL) { + printf("ERROR: Could not open %s!\n", argv[1]); + exit(-1); + } - second_pass(fp_drtrace, drtrace, p_drtrace, gather_ntop, scatter_ntop, gather_offset, scatter_offset, - gather_patterns, scatter_patterns, gather_base, scatter_base, gather_top, scatter_top); + second_pass(fp_drtrace, drtrace, p_drtrace, gather_metrics, scatter_metrics); - gzclose(fp_drtrace); + gzclose(fp_drtrace); - printf("\n"); + printf("\n"); - normalize_stats( gather_ntop, scatter_ntop, gather_offset, scatter_offset, gather_patterns, scatter_patterns); + normalize_stats(gather_metrics); + normalize_stats(scatter_metrics); - create_spatter_file(argv[1], - gather_ntop, scatter_ntop, gather_offset, scatter_offset, gather_patterns, scatter_patterns, - gather_tot, scatter_tot, gather_top, gather_top_idx, scatter_top, scatter_top_idx, - gather_srcline, scatter_srcline, gather_cnt, scatter_cnt - ); + create_spatter_file(argv[1], gather_metrics, scatter_metrics); - for (i = 0; i < NTOP; i++) { - free(gather_patterns[i]); - free(scatter_patterns[i]); + } + catch (const std::exception & ex) + { + std::cerr << "Error: " << ex.what() << std::endl; + exit(-1); } return 0; From 7ee76f7c8acfbd1be449e514411765e43dd3c402 Mon Sep 17 00:00:00 2001 From: christopher Date: Sun, 31 Mar 2024 18:03:24 -0400 Subject: [PATCH 08/76] Clean-ups and move some defines and Metrics to gs_patterns.h. --- CMakeLists.txt | 3 +- gs_patterns.cpp | 183 ++++++------------------------------------------ gs_patterns.h | 110 +++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 161 deletions(-) create mode 100644 gs_patterns.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bbc5bc5..41599e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ set (CMAKE_VERBOSE_MAKEFILE "1") project( gs_patterns VERSION 1.0 LANGUAGES CXX) -add_executable(gs_patterns gs_patterns.cpp) +add_executable(gs_patterns gs_patterns.cpp + gs_patterns.h) set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 5290328..e0e227e 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -13,32 +13,7 @@ #include #include -#define MAX(X, Y) (((X) < (Y)) ? Y : X) -#define MIN(X, Y) (((X) > (Y)) ? Y : X) -#define ABS(X) (((X) < 0) ? (-1) * (X) : X) - -//triggers -#define SAMPLE 0 -#define PERSAMPLE 10000000 -//#define PERSAMPLE 1000 - -//info -#define CLSIZE (64) -#define VBITS (512) -#define NBUFS (1LL<<10) -#define IWINDOW (1024) -#define NGS (8096) - -//patterns -#define USTRIDES 1024 //Threshold for number of accesses -#define NSTRIDES 15 //Threshold for number of unique distances -#define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram -#define NTOP (10) -#define PSIZE (1<<23) -//#define PSIZE (1<<18) - -//DONT CHANGE -#define VBYTES (VBITS/8) +#include "gs_patterns.h" //Terminal colors #define KNRM "\x1B[0m" @@ -48,84 +23,6 @@ #define KMAG "\x1B[35m" #define KCYN "\x1B[36m" -//address status -#define ADDREND (0xFFFFFFFFFFFFFFFFUL) -#define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) - -#define MAX_LINE_LENGTH 1024 - -typedef uintptr_t addr_t; - -//FROM DR SOURCE -//DR trace -struct _trace_entry_t { - unsigned short type; // 2 bytes: trace_type_t - unsigned short size; - union { - addr_t addr; - unsigned char length[sizeof(addr_t)]; - }; -} __attribute__((packed)); -typedef struct _trace_entry_t trace_entry_t; - -class Metrics -{ -public: - typedef enum { GATHER=0, SCATTER } metrics_type; - - Metrics(metrics_type mType) : _mType(mType) - { - /// TODO: Convert to new/free - for (int j = 0; j < NTOP; j++) { - patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); - if (patterns[j] == NULL) { - printf("ERROR: Could not allocate gather_patterns!\n"); - throw std::runtime_error("Could not allocate patterns for " + type_as_string()); //exit(-1); - } - } - } - - ~Metrics() - { - /// TODO: Convert to new/free - for (int i = 0; i < NTOP; i++) { - free(patterns[i]); - } - } - - Metrics(const Metrics &) = delete; - Metrics & operator=(const Metrics & right) = delete; - - std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } - std::string getName() { return !_mType ? "Gather" : "Scatter"; } - std::string getShortName() { return !_mType ? "G" : "S"; } - - auto get_srcline() { return srcline[_mType]; } - -//private: - int ntop = 0; - double cnt = 0.0; - int offset[NTOP] = {0}; - - addr_t tot[NTOP] = {0}; - addr_t top[NTOP] = {0}; - addr_t top_idx[NTOP] = {0}; - - int64_t* patterns[NTOP] = {0}; - -private: - static char srcline[2][NGS][MAX_LINE_LENGTH]; // was static (may move out and have 1 per type) - - metrics_type _mType; -}; - -/* -class Address_Instr -{ -public: -}; -*/ - static inline int popcount(uint64_t x) { int c; @@ -456,8 +353,8 @@ void normalize_stats(Metrics & target_metrics) double update_source_lines( addr_t* target_iaddrs, - char target_srcline[][MAX_LINE_LENGTH], //was char** int64_t* target_icnt, // updated + Metrics & target_metrics, const char* binary_file_name) { double scatter_cnt = 0.0; @@ -469,8 +366,8 @@ double update_source_lines( if (target_iaddrs[k] == 0) { break; } - translate_iaddr(binary_file_name, target_srcline[k], target_iaddrs[k]); - if (startswith(target_srcline[k], "?")) + translate_iaddr(binary_file_name, target_metrics.get_srcline()[k], target_iaddrs[k]); + if (startswith(target_metrics.get_srcline()[k], "?")) target_icnt[k] = 0; scatter_cnt += target_icnt[k]; @@ -580,14 +477,9 @@ void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drt } int get_top_target( - const char* target_type, - char** target_srcline, int64_t* target_icnt, // updated addr_t* target_iaddrs, // updated - addr_t* target_top, // updated - addr_t* target_tot, // updated - addr_t* target_top_idx // updates -) + Metrics & target_metrics) { int target_ntop = 0; int bestcnt; @@ -618,12 +510,12 @@ int get_top_target( break; } else { target_ntop++; - target_top[j] = best_iaddr; - target_top_idx[j] = bestidx; - target_tot[j] = target_icnt[bestidx]; + target_metrics.top[j] = best_iaddr; + target_metrics.top_idx[j] = bestidx; + target_metrics.tot[j] = target_icnt[bestidx]; target_icnt[bestidx] = 0; - //printf("%s -- %016lx: %16lu -- %s\n", target_type, target_top[j], target_tot[j], target_srcline[bestidx]); + //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); } } @@ -669,8 +561,6 @@ int main(int argc, char **argv) { int64_t mcl; int64_t gather_bytes_hist[100] = {0}; int64_t scatter_bytes_hist[100] = {0}; - ///double gather_cnt = 0.0; - ///double scatter_cnt = 0.0; double other_cnt = 0.0; double gather_score = 0.0; double gather_occ_avg = 0.0; @@ -686,35 +576,16 @@ int main(int argc, char **argv) { static int64_t w_cnt[2][IWINDOW]; //First pass to find top gather / scatters - ///static char gather_srcline[NGS][MAX_LINE_LENGTH]; static addr_t gather_iaddrs[NGS] = {0}; static int64_t gather_icnt[NGS] = {0}; static int64_t gather_occ[NGS] = {0}; - ///static char scatter_srcline[NGS][MAX_LINE_LENGTH]; static addr_t scatter_iaddrs[NGS] = {0}; static int64_t scatter_icnt[NGS] = {0}; static int64_t scatter_occ[NGS] = {0}; - //Second Pass - int dotrace; - int bestcnt; - int bestidx; - int gather_ntop = 0; - int scatter_ntop = 0; - static int gather_offset[NTOP] = {0}; - static int scatter_offset[NTOP] = {0}; - static addr_t best_iaddr; - ///static addr_t gather_tot[NTOP] = {0}; - ///static addr_t scatter_tot[NTOP] = {0}; - ///static addr_t gather_top[NTOP] = {0}; - ///static addr_t gather_top_idx[NTOP] = {0}; - ///static addr_t scatter_top[NTOP] = {0}; - ///static addr_t scatter_top_idx[NTOP] = {0}; static addr_t gather_base[NTOP] = {0}; static addr_t scatter_base[NTOP] = {0}; - //static int64_t *gather_patterns[NTOP] = {0}; - //static int64_t *scatter_patterns[NTOP] = {0}; if (argc == 3) { @@ -734,13 +605,8 @@ int main(int argc, char **argv) { try { - //Metrics gather_metrics(Metrics::GATHER); - //Metrics scatter_metrics(Metrics::SCATTER); - - Metrics* g = new Metrics(Metrics::GATHER); - Metrics* s = new Metrics(Metrics::SCATTER); - Metrics & gather_metrics = *g; - Metrics & scatter_metrics = *s; + Metrics gather_metrics(Metrics::GATHER); + Metrics scatter_metrics(Metrics::SCATTER); //init window arrays for (w = 0; w < 2; w++) { @@ -958,7 +824,6 @@ int main(int argc, char **argv) { //close files gzclose(fp_drtrace); - printf("DRTRACE STATS\n"); printf("DRTRACE LINES: %16lu\n", drtrace_lines); printf("OPCODES: %16lu\n", opcodes); @@ -975,28 +840,23 @@ int main(int argc, char **argv) { printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); printf("OTHER COUNT: %16.3f (log2)\n", log(other_cnt) / log(2.0)); - //Find source lines - - //Must have symbol + // Find source lines for gathers - Must have symbol printf("\nSymbol table lookup for gathers..."); fflush(stdout); - gather_metrics.cnt = update_source_lines(gather_iaddrs, gather_metrics.get_srcline(), gather_icnt, binary); + gather_metrics.cnt = update_source_lines(gather_iaddrs, gather_icnt, gather_metrics, binary); //Get top gathers - gather_metrics.ntop = get_top_target("GIADDR", (char**) gather_metrics.get_srcline(), gather_icnt, gather_iaddrs, - gather_metrics.top, gather_metrics.tot, gather_metrics.top_idx); - + gather_metrics.ntop = get_top_target(gather_icnt, gather_iaddrs, gather_metrics); - //Find source lines + // Find source lines for scatters printf("Symbol table lookup for scatters..."); - scatter_metrics.cnt = update_source_lines(scatter_iaddrs, scatter_metrics.get_srcline(), scatter_icnt, binary); + scatter_metrics.cnt = update_source_lines(scatter_iaddrs, scatter_icnt, scatter_metrics, binary); //Get top scatters - //printf("\nTOP SCATTERS\n"); - scatter_metrics.ntop = get_top_target("SIADDR", (char**) scatter_metrics.get_srcline(), scatter_icnt, scatter_iaddrs, - scatter_metrics.top, scatter_metrics.tot, scatter_metrics.top_idx); + scatter_metrics.ntop = get_top_target(scatter_icnt, scatter_iaddrs, scatter_metrics); + + // ----------------- Second Pass ----------------- - //Second Pass //Open trace fp_drtrace = gzopen(argv[1], "hrb"); if (fp_drtrace == NULL) { @@ -1007,12 +867,15 @@ int main(int argc, char **argv) { second_pass(fp_drtrace, drtrace, p_drtrace, gather_metrics, scatter_metrics); gzclose(fp_drtrace); - printf("\n"); + // ----------------- Normalize ----------------- + normalize_stats(gather_metrics); normalize_stats(scatter_metrics); + // ----------------- Create Spatter File ----------------- + create_spatter_file(argv[1], gather_metrics, scatter_metrics); } diff --git a/gs_patterns.h b/gs_patterns.h new file mode 100644 index 0000000..cd2f4b9 --- /dev/null +++ b/gs_patterns.h @@ -0,0 +1,110 @@ +// +// Created by christopher on 3/31/24. +// + +#pragma once + +#define MAX(X, Y) (((X) < (Y)) ? Y : X) +#define MIN(X, Y) (((X) > (Y)) ? Y : X) +#define ABS(X) (((X) < 0) ? (-1) * (X) : X) + +//triggers +#define SAMPLE 0 +#define PERSAMPLE 10000000 +//#define PERSAMPLE 1000 + +//info +#define CLSIZE (64) +#define VBITS (512) +#define NBUFS (1LL<<10) +#define IWINDOW (1024) +#define NGS (8096) + +//patterns +#define USTRIDES 1024 //Threshold for number of accesses +#define NSTRIDES 15 //Threshold for number of unique distances +#define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram +#define NTOP (10) +#define PSIZE (1<<23) +//#define PSIZE (1<<18) + +//DONT CHANGE +#define VBYTES (VBITS/8) + +//address status +#define ADDREND (0xFFFFFFFFFFFFFFFFUL) +#define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) + +#define MAX_LINE_LENGTH 1024 + +typedef uintptr_t addr_t; + +//FROM DR SOURCE +//DR trace +struct _trace_entry_t { + unsigned short type; // 2 bytes: trace_type_t + unsigned short size; + union { + addr_t addr; + unsigned char length[sizeof(addr_t)]; + }; +} __attribute__((packed)); +typedef struct _trace_entry_t trace_entry_t; + +class Metrics +{ +public: + typedef enum { GATHER=0, SCATTER } metrics_type; + + Metrics(metrics_type mType) : _mType(mType) + { + /// TODO: Convert to new/free + for (int j = 0; j < NTOP; j++) { + patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); + if (patterns[j] == NULL) { + printf("ERROR: Could not allocate gather_patterns!\n"); + throw std::runtime_error("Could not allocate patterns for " + type_as_string()); //exit(-1); + } + } + } + + ~Metrics() + { + /// TODO: Convert to new/free + for (int i = 0; i < NTOP; i++) { + free(patterns[i]); + } + } + + Metrics(const Metrics &) = delete; + Metrics & operator=(const Metrics & right) = delete; + + std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } + std::string getName() { return !_mType ? "Gather" : "Scatter"; } + std::string getShortName() { return !_mType ? "G" : "S"; } + + auto get_srcline() { return srcline[_mType]; } + +//private: + int ntop = 0; + double cnt = 0.0; + int offset[NTOP] = {0}; + + addr_t tot[NTOP] = {0}; + addr_t top[NTOP] = {0}; + addr_t top_idx[NTOP] = {0}; + + int64_t* patterns[NTOP] = {0}; + +private: + static char srcline[2][NGS][MAX_LINE_LENGTH]; // was static (may move out and have 1 per type) + + metrics_type _mType; +}; + +/* +class Address_Instr +{ +public: +}; +*/ From 36b962d7fc13743799a07e227eccc144d857f61c Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 2 Apr 2024 01:01:23 -0400 Subject: [PATCH 09/76] Refatored first pass and some required interfaces required carrying state, remove some statics and unused variables. Tested. --- gs_patterns.cpp | 538 +++++++++++++++++++++++------------------------- gs_patterns.h | 91 +++++++- 2 files changed, 340 insertions(+), 289 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index e0e227e..bc4507e 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -23,6 +23,19 @@ #define KMAG "\x1B[35m" #define KCYN "\x1B[36m" +// Class Static data initialization +char Metrics::srcline[2][NGS][MAX_LINE_LENGTH]; +addr_t InstrInfo::iaddrs[2][NGS]; +int64_t InstrInfo::icnt[2][NGS]; +int64_t InstrInfo::occ[2][NGS]; + +#if 0 +int64_t InstrWindow::w_iaddrs[2][IWINDOW]; +int64_t InstrWindow::w_bytes[2][IWINDOW]; +int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; +int64_t InstrWindow::w_cnt[2][IWINDOW]; +#endif + static inline int popcount(uint64_t x) { int c; @@ -351,11 +364,7 @@ void normalize_stats(Metrics & target_metrics) } } -double update_source_lines( - addr_t* target_iaddrs, - int64_t* target_icnt, // updated - Metrics & target_metrics, - const char* binary_file_name) +double update_source_lines(InstrInfo & target_iinfo, Metrics & target_metrics, const char* binary_file_name) { double scatter_cnt = 0.0; @@ -363,22 +372,21 @@ double update_source_lines( //Check it is not a library for (int k = 0; k < NGS; k++) { - if (target_iaddrs[k] == 0) { + if (target_iinfo.get_iaddrs()[k] == 0) { break; } - translate_iaddr(binary_file_name, target_metrics.get_srcline()[k], target_iaddrs[k]); + translate_iaddr(binary_file_name, target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); if (startswith(target_metrics.get_srcline()[k], "?")) - target_icnt[k] = 0; + target_iinfo.get_icnt()[k] = 0; - scatter_cnt += target_icnt[k]; + scatter_cnt += target_iinfo.get_icnt()[k]; } printf("done.\n"); return scatter_cnt; } -void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drtrace, - Metrics & gather_metrics, Metrics & scatter_metrics) +void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics) { uint64_t mcnt = 0; int iret = 0; @@ -387,13 +395,16 @@ void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drt int64_t maddr; int i = 0; + // TODO: remove these statics static addr_t gather_base[NTOP] = {0}; static addr_t scatter_base[NTOP] = {0}; - p_drtrace = NULL; - int breakout = 0; + bool breakout = false; printf("\nSecond pass to fill gather / scatter subtraces\n"); fflush(stdout); + + trace_entry_t* p_drtrace = NULL; + static trace_entry_t drtrace[NBUFS]; while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { //decode drtrace @@ -437,7 +448,7 @@ void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drt //Add index if (gather_metrics.offset[i] >= PSIZE) { printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; + breakout = true; } //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); @@ -461,7 +472,7 @@ void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drt //Add index if (scatter_metrics.offset[i] >= PSIZE) { printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = 1; + breakout = true; } scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); break; @@ -476,10 +487,7 @@ void second_pass(gzFile fp_drtrace, trace_entry_t* drtrace, trace_entry_t* p_drt } //while drtrace } -int get_top_target( - int64_t* target_icnt, // updated - addr_t* target_iaddrs, // updated - Metrics & target_metrics) +int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) { int target_ntop = 0; int bestcnt; @@ -492,16 +500,16 @@ int get_top_target( for (int k = 0; k < NGS; k++) { - if (target_icnt[k] == 0) + if (target_iinfo.get_icnt()[k] == 0) continue; - if (target_iaddrs[k] == 0) { + if (target_iinfo.get_iaddrs()[k] == 0) { break; } - if (target_icnt[k] > bestcnt) { - bestcnt = target_icnt[k]; - best_iaddr = target_iaddrs[k]; + if (target_iinfo.get_icnt()[k] > bestcnt) { + bestcnt = target_iinfo.get_icnt()[k]; + best_iaddr = target_iinfo.get_iaddrs()[k]; bestidx = k; } } @@ -512,8 +520,8 @@ int get_top_target( target_ntop++; target_metrics.top[j] = best_iaddr; target_metrics.top_idx[j] = bestidx; - target_metrics.tot[j] = target_icnt[bestidx]; - target_icnt[bestidx] = 0; + target_metrics.tot[j] = target_iinfo.get_icnt()[bestidx]; + target_iinfo.get_icnt()[bestidx] = 0; //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); } @@ -522,302 +530,264 @@ int get_top_target( return target_ntop; } -char Metrics::srcline[2][NGS][MAX_LINE_LENGTH]; +void handle_trace_entry( + trace_entry_t *drline, + TraceInfo & trace_info, + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + InstrWindow & iw) +{ + int i, j, k, w; + int w_rw_idx; + int w_idx; + int gs; -int main(int argc, char **argv) { + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - //generic - int i, j, k, m, n, w; - int iwindow = 0; - int iret = 0; - int ret; - int did_opcode = 0; - int windowfull = 0; - int byte; - int do_gs_traces = 0; - int do_filter = 1; - int64_t ngs = 0; - char *eptr; - char binary[1024]; - char srcline[MAX_LINE_LENGTH]; + iw.iaddr = drline->addr; - //dtrace vars - int64_t drtrace_lines = 0; - trace_entry_t *drline; - trace_entry_t *drline2; - trace_entry_t *p_drtrace = NULL; - static trace_entry_t drtrace[NBUFS]; - gzFile fp_drtrace; - FILE *fp_gs; + //nops + trace_info.opcodes++; + trace_info.did_opcode = true; - //metrics - int gs; - uint64_t opcodes = 0; - uint64_t opcodes_mem = 0; - uint64_t addrs = 0; - uint64_t other = 0; - int64_t maddr_prev; - int64_t maddr; - int64_t mcl; - int64_t gather_bytes_hist[100] = {0}; - int64_t scatter_bytes_hist[100] = {0}; - double other_cnt = 0.0; - double gather_score = 0.0; - double gather_occ_avg = 0.0; - double scatter_occ_avg = 0.0; - - //windows - int w_rw_idx; - int w_idx; - addr_t iaddr; - static int64_t w_iaddrs[2][IWINDOW]; - static int64_t w_bytes[2][IWINDOW]; - static int64_t w_maddr[2][IWINDOW][VBYTES]; - static int64_t w_cnt[2][IWINDOW]; - - //First pass to find top gather / scatters - static addr_t gather_iaddrs[NGS] = {0}; - static int64_t gather_icnt[NGS] = {0}; - static int64_t gather_occ[NGS] = {0}; - static addr_t scatter_iaddrs[NGS] = {0}; - static int64_t scatter_icnt[NGS] = {0}; - static int64_t scatter_occ[NGS] = {0}; - - static addr_t best_iaddr; - static addr_t gather_base[NTOP] = {0}; - static addr_t scatter_base[NTOP] = {0}; + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { - if (argc == 3) { + w_rw_idx = drline->type; - // 1 open dr trace - fp_drtrace = gzopen(argv[1], "hrb"); - if (fp_drtrace == NULL) { - printf("ERROR: Could not open %s!\n", argv[1]); - exit(-1); + //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", + // iaddr, drline->addr, drline->addr % 64, drline->size); + + if ((++trace_info.mcnt % PERSAMPLE) == 0) { +#if SAMPLE + break; +#endif + printf("."); + fflush(stdout); } - strcpy(binary, argv[2]); + //is iaddr in window + w_idx = -1; + for (i = 0; i < IWINDOW; i++) { - } else { - printf("ERROR: Invalid arguments, should be: trace.gz binary\n"); - exit(-1); - } - - try { + //new iaddr + if (iw.w_iaddrs[w_rw_idx][i] == -1) { + w_idx = i; + break; - Metrics gather_metrics(Metrics::GATHER); - Metrics scatter_metrics(Metrics::SCATTER); - - //init window arrays - for (w = 0; w < 2; w++) { - for (i = 0; i < IWINDOW; i++) { - w_iaddrs[w][i] = -1; - w_bytes[w][i] = 0; - w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - w_maddr[w][i][j] = -1; + //iaddr exists + } else if (iw.w_iaddrs[w_rw_idx][i] == iw.iaddr) { + w_idx = i; + break; } } - uint64_t mcnt = 0; - uint64_t unique_iaddrs = 0; - int unsynced = 0; - uint64_t unsync_cnt = 0; - addr_t ciaddr; + //new window + if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= VBYTES) || + (iw.w_cnt[w_rw_idx][w_idx] >= VBYTES)) { - printf("First pass to find top gather / scatter iaddresses\n"); - fflush(stdout); + /***************************/ + //do analysis + /***************************/ + //i = each window + for (w = 0; w < 2; w++) { // 2 - //read dr trace entries instrs - //printf("%16s %16s %16s %16s %16s %16s\n", "iaddr", "rw", "byte", "bytes", "cnt", "maddr"); - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { + for (i = 0; i < IWINDOW; i++) { // 1024 - //decode drtrace - drline = p_drtrace; + if (iw.w_iaddrs[w][i] == -1) + break; - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + int byte = iw.w_bytes[w][i] / iw.w_cnt[w][i]; - //iaddr - iaddr = drline->addr; + //First pass + //Determine + //gather/scatter? + gs = -1; + for (j = 0; j < iw.w_cnt[w][i]; j++) { - //nops - opcodes++; - did_opcode = 1; + //address and cl + iw.maddr = iw.w_maddr[w][i][j]; + assert(iw.maddr > -1); - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + //previous addr + if (j == 0) + iw.maddr_prev = iw.maddr - 1; - w_rw_idx = drline->type; + //gather / scatter + if (iw.maddr != iw.maddr_prev) { + if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) + gs = w; + } + iw.maddr_prev = iw.maddr; + } - //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iaddr, drline->addr, drline->addr % 64, drline->size); + for (j = 0; j < iw.w_cnt[w][i]; j++) { - if ((++mcnt % PERSAMPLE) == 0) { - #if SAMPLE - break; - #endif - printf("."); - fflush(stdout); - } + if (gs == -1) { + trace_info.other_cnt++; + continue; + } + } - //is iaddr in window - w_idx = -1; - for (i = 0; i < IWINDOW; i++) { + if (gs == 0) { // GATHER - //new iaddr - if (w_iaddrs[w_rw_idx][i] == -1) { - w_idx = i; - break; + trace_info.gather_occ_avg += iw.w_cnt[w][i]; + gather_metrics.cnt += 1.0; - //iaddr exists - } else if (w_iaddrs[w_rw_idx][i] == iaddr) { - w_idx = i; - break; + for (k = 0; k < NGS; k++) { + if (gather_iinfo.get_iaddrs()[k] == 0) { + gather_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; + (gather_iinfo.get_icnt()[k])++; + gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + if (gather_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { + (gather_iinfo.get_icnt()[k])++; + gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + } + + } else if (gs == 1) { // SCATTER + + trace_info.scatter_occ_avg += iw.w_cnt[w][i]; + scatter_metrics.cnt += 1.0; + + for (k = 0; k < NGS; k++) { + if (scatter_iinfo.get_iaddrs()[k] == 0) { + scatter_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; + (scatter_iinfo.get_icnt()[k])++; + scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + if (scatter_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { + (scatter_iinfo.get_icnt()[k])++; + scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + } } + } //WINDOW i + + w_idx = 0; + + //reset windows + for (i = 0; i < IWINDOW; i++) { + iw.w_iaddrs[w][i] = -1; + iw.w_bytes[w][i] = 0; + iw.w_cnt[w][i] = 0; + for (j = 0; j < VBYTES; j++) + iw.w_maddr[w][i][j] = -1; } + } // rw w + } //analysis - //new window - if ((w_idx == -1) || (w_bytes[w_rw_idx][w_idx] >= VBYTES) || - (w_cnt[w_rw_idx][w_idx] >= VBYTES)) { + //Set window values + iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; + iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; + iw.w_bytes[w_rw_idx][w_idx] += drline->size; - /***************************/ - //do analysis - /***************************/ - //i = each window - for (w = 0; w < 2; w++) { // 2 + //num access per iaddr in loop + iw.w_cnt[w_rw_idx][w_idx]++; - for (i = 0; i < IWINDOW; i++) { // 1024 + if (trace_info.did_opcode) { - if (w_iaddrs[w][i] == -1) - break; + trace_info.opcodes_mem++; + trace_info.addrs++; + trace_info.did_opcode = false; - byte = w_bytes[w][i] / w_cnt[w][i]; + } else { + trace_info.addrs++; + } - //First pass - //Determine - //gather/scatter? - gs = -1; - for (j = 0; j < w_cnt[w][i]; j++) { + /***********************/ + /** SOMETHING ELSE **/ + /***********************/ + } else { + trace_info.other++; + } + +} - //address and cl - maddr = w_maddr[w][i][j]; - assert(maddr > -1); +void first_pass( + TraceInfo & trace_info, + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + gzFile & fp_drtrace +) +{ + int iret = 0; + trace_entry_t *drline; + InstrWindow iw; - //previous addr - if (j == 0) - maddr_prev = maddr - 1; + printf("First pass to find top gather / scatter iaddresses\n"); + fflush(stdout); - //gather / scatter - if (maddr != maddr_prev) { - if ((gs == -1) && (abs(maddr - maddr_prev) > 1)) - gs = w; - } - maddr_prev = maddr; - } + trace_entry_t *p_drtrace = NULL; + static trace_entry_t drtrace[NBUFS]; + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { + //decode drtrace + drline = p_drtrace; - for (j = 0; j < w_cnt[w][i]; j++) { + handle_trace_entry(drline, trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, iw); - if (gs == -1) { - other_cnt++; - continue; - } - } + p_drtrace++; + trace_info.drtrace_lines++; - if (gs == 0) { // GATHER - - gather_occ_avg += w_cnt[w][i]; - gather_metrics.cnt += 1.0; - - for (k = 0; k < NGS; k++) { - if (gather_iaddrs[k] == 0) { - gather_iaddrs[k] = w_iaddrs[w][i]; - gather_icnt[k]++; - gather_occ[k] += w_cnt[w][i]; - break; - } - - if (gather_iaddrs[k] == w_iaddrs[w][i]) { - gather_icnt[k]++; - gather_occ[k] += w_cnt[w][i]; - break; - } - - } - - } else if (gs == 1) { // SCATTER - - scatter_occ_avg += w_cnt[w][i]; - scatter_metrics.cnt += 1.0; - - for (k = 0; k < NGS; k++) { - if (scatter_iaddrs[k] == 0) { - scatter_iaddrs[k] = w_iaddrs[w][i]; - scatter_icnt[k]++; - scatter_occ[k] += w_cnt[w][i]; - break; - } - - if (scatter_iaddrs[k] == w_iaddrs[w][i]) { - scatter_icnt[k]++; - scatter_occ[k] += w_cnt[w][i]; - break; - } - } - } - } //WINDOW i + } //while drtrace +} - w_idx = 0; +int main(int argc, char **argv) { - //reset windows - for (i = 0; i < IWINDOW; i++) { - w_iaddrs[w][i] = -1; - w_bytes[w][i] = 0; - w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - w_maddr[w][i][j] = -1; - } - } // rw w - } //analysis + int ret; + char binary[1024]; + gzFile fp_drtrace; + FILE *fp_gs; - //Set window values - w_iaddrs[w_rw_idx][w_idx] = iaddr; - w_maddr[w_rw_idx][w_idx][w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; - w_bytes[w_rw_idx][w_idx] += drline->size; + if (argc == 3) { - //num access per iaddr in loop - w_cnt[w_rw_idx][w_idx]++; + // 1 open dr trace + fp_drtrace = gzopen(argv[1], "hrb"); + if (fp_drtrace == NULL) { + printf("ERROR: Could not open %s!\n", argv[1]); + exit(-1); + } - if (did_opcode) { + strcpy(binary, argv[2]); - opcodes_mem++; - addrs++; - did_opcode = 0; + } else { + printf("ERROR: Invalid arguments, should be: trace.gz binary\n"); + exit(-1); + } - } else { - addrs++; - } + try { + Metrics gather_metrics(GATHER); + Metrics scatter_metrics(SCATTER); - /***********************/ - /** SOMETHING ELSE **/ - /***********************/ - } else { - other++; - } + InstrInfo gather_iinfo(GATHER); + InstrInfo scatter_iinfo(SCATTER); - p_drtrace++; - drtrace_lines++; + TraceInfo trace_info; - } //while drtrace + // ----------------- First Pass ----------------- + first_pass(trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); //metrics - gather_occ_avg /= gather_metrics.cnt; - scatter_occ_avg /= scatter_metrics.cnt; + trace_info.gather_occ_avg /= gather_metrics.cnt; + trace_info.scatter_occ_avg /= scatter_metrics.cnt; printf("\n RESULTS \n"); @@ -825,35 +795,35 @@ int main(int argc, char **argv) { gzclose(fp_drtrace); printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", drtrace_lines); - printf("OPCODES: %16lu\n", opcodes); - printf("MEMOPCODES: %16lu\n", opcodes_mem); - printf("LOAD/STORES: %16lu\n", addrs); - printf("OTHER: %16lu\n", other); + printf("DRTRACE LINES: %16lu\n", trace_info.drtrace_lines); + printf("OPCODES: %16lu\n", trace_info.opcodes); + printf("MEMOPCODES: %16lu\n", trace_info.opcodes_mem); + printf("LOAD/STORES: %16lu\n", trace_info.addrs); + printf("OTHER: %16lu\n", trace_info.other); printf("\n"); printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", scatter_occ_avg); + printf("LOADS per GATHER: %16.3f\n", trace_info.gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", trace_info.scatter_occ_avg); printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(other_cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(trace_info.other_cnt) / log(2.0)); // Find source lines for gathers - Must have symbol printf("\nSymbol table lookup for gathers..."); fflush(stdout); - gather_metrics.cnt = update_source_lines(gather_iaddrs, gather_icnt, gather_metrics, binary); + gather_metrics.cnt = update_source_lines(gather_iinfo, gather_metrics, binary); - //Get top gathers - gather_metrics.ntop = get_top_target(gather_icnt, gather_iaddrs, gather_metrics); + // Get top gathers + gather_metrics.ntop = get_top_target(gather_iinfo, gather_metrics); // Find source lines for scatters printf("Symbol table lookup for scatters..."); - scatter_metrics.cnt = update_source_lines(scatter_iaddrs, scatter_icnt, scatter_metrics, binary); + scatter_metrics.cnt = update_source_lines(scatter_iinfo, scatter_metrics, binary); - //Get top scatters - scatter_metrics.ntop = get_top_target(scatter_icnt, scatter_iaddrs, scatter_metrics); + // Get top scatters + scatter_metrics.ntop = get_top_target(scatter_iinfo, scatter_metrics); // ----------------- Second Pass ----------------- @@ -864,7 +834,7 @@ int main(int argc, char **argv) { exit(-1); } - second_pass(fp_drtrace, drtrace, p_drtrace, gather_metrics, scatter_metrics); + second_pass(fp_drtrace, gather_metrics, scatter_metrics); gzclose(fp_drtrace); printf("\n"); diff --git a/gs_patterns.h b/gs_patterns.h index cd2f4b9..56aa5bc 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -51,11 +51,11 @@ struct _trace_entry_t { } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; +typedef enum { GATHER=0, SCATTER } metrics_type; + class Metrics { public: - typedef enum { GATHER=0, SCATTER } metrics_type; - Metrics(metrics_type mType) : _mType(mType) { /// TODO: Convert to new/free @@ -102,9 +102,90 @@ class Metrics metrics_type _mType; }; -/* -class Address_Instr + +class InstrInfo +{ +public: + InstrInfo(metrics_type mType) : _mType(mType) { } + ~InstrInfo() { } + + InstrInfo(const InstrInfo &) = delete; + InstrInfo & operator=(const InstrInfo & right) = delete; + + addr_t* get_iaddrs() { return iaddrs[_mType]; } + int64_t* get_icnt() { return icnt[_mType]; } + int64_t* get_occ() { return occ[_mType]; } + +private: + static addr_t iaddrs[2][NGS]; + static int64_t icnt[2][NGS]; + static int64_t occ[2][NGS]; + //addr_t base[2][NTOP]; + + metrics_type _mType; +}; + +class TraceInfo // Stats { public: + /// TODO: need an reset method to zero out counters + + uint64_t opcodes = 0; + uint64_t opcodes_mem = 0; + uint64_t addrs = 0; + uint64_t other = 0; + //int gs; // needed across calls? + int64_t ngs = 0; + int64_t drtrace_lines = 0; + + bool did_opcode = false; // revist this --------------- + double other_cnt = 0.0; + double gather_score = 0.0; + double gather_occ_avg = 0.0; + double scatter_occ_avg = 0.0; + + uint64_t mcnt = 0; + }; -*/ + +class InstrWindow +{ +public: + InstrWindow() { + //init window arrays + for (int w = 0; w < 2; w++) { + for (int i = 0; i < IWINDOW; i++) { + w_iaddrs[w][i] = -1; + w_bytes[w][i] = 0; + w_cnt[w][i] = 0; + for (int j = 0; j < VBYTES; j++) + w_maddr[w][i][j] = -1; + } + } + } + + ~InstrWindow() { } + + InstrWindow(const InstrWindow &) = delete; + InstrWindow & operator=(const InstrWindow & right) = delete; + +#if 0 + static int64_t w_iaddrs[2][IWINDOW]; + static int64_t w_bytes[2][IWINDOW]; + static int64_t w_maddr[2][IWINDOW][VBYTES]; + static int64_t w_cnt[2][IWINDOW]; +#else + // moved from static storage to instance variables (watch out for stack overflow) + // Revisit and move to heap if an issue - estimate of 2k*3 + 128k + int64_t w_iaddrs[2][IWINDOW]; + int64_t w_bytes[2][IWINDOW]; + int64_t w_maddr[2][IWINDOW][VBYTES]; + int64_t w_cnt[2][IWINDOW]; +#endif + + // State which must be carried with each call to handle a trace + addr_t iaddr; + int64_t maddr_prev; + int64_t maddr; + +}; \ No newline at end of file From 74570b6a91313deb51d756a9ffab9d21e538795d Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 2 Apr 2024 11:01:32 -0400 Subject: [PATCH 10/76] Remove additional function statics for thread safety. --- gs_patterns.cpp | 23 ++++++++++++----------- gs_patterns.h | 2 -- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index bc4507e..332e426 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -185,7 +185,7 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) return 1; } -void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics); +void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter); void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) { @@ -217,9 +217,10 @@ void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, fprintf(fp, "[ "); fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - create_metrics_file(fp, fp2, trace_file_name, gather_metrics); + bool first_spatter = true; + create_metrics_file(fp, fp2, trace_file_name, gather_metrics, first_spatter); - create_metrics_file(fp, fp2, trace_file_name, scatter_metrics); + create_metrics_file(fp, fp2, trace_file_name, scatter_metrics, first_spatter); //Footer fprintf(fp, " ]"); @@ -227,14 +228,13 @@ void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, fclose(fp2); } -void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics) +void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter) { int i = 0; int j = 0; //Create stride histogram and create spatter int sidx; - static bool first_spatter = true; int unique_strides; int64_t idx, pidx; int64_t n_stride[1027]; @@ -388,23 +388,23 @@ double update_source_lines(InstrInfo & target_iinfo, Metrics & target_metrics, c void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics) { - uint64_t mcnt = 0; + uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. int iret = 0; trace_entry_t* drline; addr_t iaddr; int64_t maddr; int i = 0; - // TODO: remove these statics - static addr_t gather_base[NTOP] = {0}; - static addr_t scatter_base[NTOP] = {0}; + addr_t gather_base[NTOP] = {0}; + addr_t scatter_base[NTOP] = {0}; bool breakout = false; printf("\nSecond pass to fill gather / scatter subtraces\n"); fflush(stdout); trace_entry_t* p_drtrace = NULL; - static trace_entry_t drtrace[NBUFS]; + trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { //decode drtrace @@ -736,7 +736,8 @@ void first_pass( fflush(stdout); trace_entry_t *p_drtrace = NULL; - static trace_entry_t drtrace[NBUFS]; + trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { //decode drtrace drline = p_drtrace; diff --git a/gs_patterns.h b/gs_patterns.h index 56aa5bc..b386e8b 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -120,7 +120,6 @@ class InstrInfo static addr_t iaddrs[2][NGS]; static int64_t icnt[2][NGS]; static int64_t occ[2][NGS]; - //addr_t base[2][NTOP]; metrics_type _mType; }; @@ -134,7 +133,6 @@ class TraceInfo // Stats uint64_t opcodes_mem = 0; uint64_t addrs = 0; uint64_t other = 0; - //int gs; // needed across calls? int64_t ngs = 0; int64_t drtrace_lines = 0; From aead2b45c05e7e162e7fefcb17258c65b2487e51 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 2 Apr 2024 14:21:06 -0400 Subject: [PATCH 11/76] Renamed some functions, removed unused vars, reordered update of statistics and 2nd pass. --- gs_patterns.cpp | 230 ++++++++++++++++++++++++++---------------------- 1 file changed, 127 insertions(+), 103 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 332e426..97794c6 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -185,49 +185,6 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) return 1; } -void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter); - -void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) -{ - //Create spatter file - FILE *fp, *fp2; - char *json_name, *gs_info; - json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); - if (strstr(json_name, ".json") == 0) { - strncat(json_name, ".json", strlen(".json")+1); - } - - fp = fopen(json_name, "w"); - if (fp == NULL) { - printf("ERROR: Could not open %s!\n", json_name); - exit(-1); - } - gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); - if (strstr(gs_info, ".json") == 0) { - strncat(gs_info, ".txt", strlen(".txt")+1); - } - - fp2 = fopen(gs_info, "w"); - if (fp2 == NULL) { - printf("ERROR: Could not open %s!\n", gs_info); - exit(-1); - } - - //Header - fprintf(fp, "[ "); - fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - - bool first_spatter = true; - create_metrics_file(fp, fp2, trace_file_name, gather_metrics, first_spatter); - - create_metrics_file(fp, fp2, trace_file_name, scatter_metrics, first_spatter); - - //Footer - fprintf(fp, " ]"); - fclose(fp); - fclose(fp2); -} - void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter) { int i = 0; @@ -342,6 +299,47 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri } } +void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) +{ + //Create spatter file + FILE *fp, *fp2; + char *json_name, *gs_info; + json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); + if (strstr(json_name, ".json") == 0) { + strncat(json_name, ".json", strlen(".json")+1); + } + + fp = fopen(json_name, "w"); + if (fp == NULL) { + printf("ERROR: Could not open %s!\n", json_name); + exit(-1); + } + gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); + if (strstr(gs_info, ".json") == 0) { + strncat(gs_info, ".txt", strlen(".txt")+1); + } + + fp2 = fopen(gs_info, "w"); + if (fp2 == NULL) { + printf("ERROR: Could not open %s!\n", gs_info); + exit(-1); + } + + //Header + fprintf(fp, "[ "); + fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); + + bool first_spatter = true; + create_metrics_file(fp, fp2, trace_file_name, gather_metrics, first_spatter); + + create_metrics_file(fp, fp2, trace_file_name, scatter_metrics, first_spatter); + + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); +} + void normalize_stats(Metrics & target_metrics) { //Normalize @@ -364,7 +362,7 @@ void normalize_stats(Metrics & target_metrics) } } -double update_source_lines(InstrInfo & target_iinfo, Metrics & target_metrics, const char* binary_file_name) +double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const char* binary_file_name) { double scatter_cnt = 0.0; @@ -386,6 +384,7 @@ double update_source_lines(InstrInfo & target_iinfo, Metrics & target_metrics, c return scatter_cnt; } +// Second Pass void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics) { uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. @@ -414,15 +413,13 @@ void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_ /** INSTR 0xa-0x10 and 0x1e **/ /*****************************/ if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - - //iaddr iaddr = drline->addr; - /***********************/ /** MEM 0x00 and 0x01 **/ /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + } + else if ((drline->type == 0x0) || (drline->type == 0x1)) { maddr = drline->addr / drline->size; @@ -434,7 +431,7 @@ void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_ fflush(stdout); } - //gather ? + // gather ? if (drline->type == 0x0) { for (i = 0; i < gather_metrics.ntop; i++) { @@ -456,9 +453,9 @@ void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_ break; } } - - //scatter ? - } else { + } + // scatter ? + else { for (i = 0; i < scatter_metrics.ntop; i++) { @@ -479,8 +476,7 @@ void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_ } } } - - } //MEM + } // MEM p_drtrace++; @@ -716,17 +712,37 @@ void handle_trace_entry( } else { trace_info.other++; } +} +void display_stats(TraceInfo & trace_info, Metrics & gather_metrics, Metrics & scatter_metrics) +{ + printf("\n RESULTS \n"); + + printf("DRTRACE STATS\n"); + printf("DRTRACE LINES: %16lu\n", trace_info.drtrace_lines); + printf("OPCODES: %16lu\n", trace_info.opcodes); + printf("MEMOPCODES: %16lu\n", trace_info.opcodes_mem); + printf("LOAD/STORES: %16lu\n", trace_info.addrs); + printf("OTHER: %16lu\n", trace_info.other); + + printf("\n"); + + printf("GATHER/SCATTER STATS: \n"); + printf("LOADS per GATHER: %16.3f\n", trace_info.gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", trace_info.scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(trace_info.other_cnt) / log(2.0)); } -void first_pass( +// First Pass +void process_traces( TraceInfo & trace_info, InstrInfo & gather_iinfo, InstrInfo & scatter_iinfo, Metrics & gather_metrics, Metrics & scatter_metrics, - gzFile & fp_drtrace -) + gzFile & fp_drtrace) { int iret = 0; trace_entry_t *drline; @@ -748,14 +764,59 @@ void first_pass( trace_info.drtrace_lines++; } //while drtrace + + //metrics + trace_info.gather_occ_avg /= gather_metrics.cnt; + trace_info.scatter_occ_avg /= scatter_metrics.cnt; + + display_stats(trace_info, gather_metrics, scatter_metrics); + } +void update_source_lines( + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + const char * binary) +{ + // Find source lines for gathers - Must have symbol + printf("\nSymbol table lookup for gathers..."); + gather_metrics.cnt = update_source_lines_from_binary(gather_iinfo, gather_metrics, binary); + + // Find source lines for scatters + printf("Symbol table lookup for scatters..."); + scatter_metrics.cnt = update_source_lines_from_binary(scatter_iinfo, scatter_metrics, binary); +} + +void update_metrics( + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + gzFile & fp_drtrace) +{ + // Get top gathers + gather_metrics.ntop = get_top_target(gather_iinfo, gather_metrics); + + // Get top scatters + scatter_metrics.ntop = get_top_target(scatter_iinfo, scatter_metrics); + + // ----------------- Second Pass ----------------- + + second_pass(fp_drtrace, gather_metrics, scatter_metrics); + + // ----------------- Normalize ----------------- + + normalize_stats(gather_metrics); + normalize_stats(scatter_metrics); +} + + int main(int argc, char **argv) { - int ret; char binary[1024]; gzFile fp_drtrace; - FILE *fp_gs; if (argc == 3) { @@ -782,51 +843,17 @@ int main(int argc, char **argv) { TraceInfo trace_info; - // ----------------- First Pass ----------------- - - first_pass(trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); - - //metrics - trace_info.gather_occ_avg /= gather_metrics.cnt; - trace_info.scatter_occ_avg /= scatter_metrics.cnt; - - printf("\n RESULTS \n"); + // ----------------- Process Traces ----------------- + process_traces(trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); //close files gzclose(fp_drtrace); - printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", trace_info.drtrace_lines); - printf("OPCODES: %16lu\n", trace_info.opcodes); - printf("MEMOPCODES: %16lu\n", trace_info.opcodes_mem); - printf("LOAD/STORES: %16lu\n", trace_info.addrs); - printf("OTHER: %16lu\n", trace_info.other); - - printf("\n"); - - printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", trace_info.gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", trace_info.scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(trace_info.other_cnt) / log(2.0)); - - // Find source lines for gathers - Must have symbol - printf("\nSymbol table lookup for gathers..."); fflush(stdout); - gather_metrics.cnt = update_source_lines(gather_iinfo, gather_metrics, binary); - - // Get top gathers - gather_metrics.ntop = get_top_target(gather_iinfo, gather_metrics); - // Find source lines for scatters - printf("Symbol table lookup for scatters..."); - scatter_metrics.cnt = update_source_lines(scatter_iinfo, scatter_metrics, binary); + // ----------------- Update Source Lines ----------------- - // Get top scatters - scatter_metrics.ntop = get_top_target(scatter_iinfo, scatter_metrics); - - // ----------------- Second Pass ----------------- + update_source_lines(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, binary); //Open trace fp_drtrace = gzopen(argv[1], "hrb"); @@ -835,16 +862,13 @@ int main(int argc, char **argv) { exit(-1); } - second_pass(fp_drtrace, gather_metrics, scatter_metrics); + // ----------------- Update Metrics ----------------- + + update_metrics(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); gzclose(fp_drtrace); printf("\n"); - // ----------------- Normalize ----------------- - - normalize_stats(gather_metrics); - normalize_stats(scatter_metrics); - // ----------------- Create Spatter File ----------------- create_spatter_file(argv[1], gather_metrics, scatter_metrics); From 219fbe682812e8897864457b8d360647ef9e0c08 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 2 Apr 2024 15:59:54 -0400 Subject: [PATCH 12/76] move file io to functions and fixup flush calls and error mesgs. --- gs_patterns.cpp | 71 ++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 97794c6..90efb1b 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -197,6 +197,8 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri int64_t n_stride[1027]; double outbounds; + if (first_spatter) printf("\n"); + printf("\n"); for (i = 0; i < target_metrics.ntop; i++) { printf("***************************************************************************************\n"); @@ -366,7 +368,6 @@ double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & targe { double scatter_cnt = 0.0; - fflush(stdout); //Check it is not a library for (int k = 0; k < NGS; k++) { @@ -712,6 +713,8 @@ void handle_trace_entry( } else { trace_info.other++; } + + trace_info.drtrace_lines++; } void display_stats(TraceInfo & trace_info, Metrics & gather_metrics, Metrics & scatter_metrics) @@ -761,9 +764,7 @@ void process_traces( handle_trace_entry(drline, trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, iw); p_drtrace++; - trace_info.drtrace_lines++; - - } //while drtrace + } //metrics trace_info.gather_occ_avg /= gather_metrics.cnt; @@ -782,10 +783,14 @@ void update_source_lines( { // Find source lines for gathers - Must have symbol printf("\nSymbol table lookup for gathers..."); + fflush(stdout); + gather_metrics.cnt = update_source_lines_from_binary(gather_iinfo, gather_metrics, binary); // Find source lines for scatters printf("Symbol table lookup for scatters..."); + fflush(stdout); + scatter_metrics.cnt = update_source_lines_from_binary(scatter_iinfo, scatter_metrics, binary); } @@ -812,29 +817,37 @@ void update_metrics( normalize_stats(scatter_metrics); } +gzFile open_trace_file(const std::string & trace_file_name) +{ + gzFile fp; -int main(int argc, char **argv) { + fp = gzopen(trace_file_name.c_str(), "hrb"); + if (fp == NULL) { + throw std::runtime_error("Could not open " + trace_file_name + "!"); + } + return fp; +} +void close_trace_file (gzFile & fp) +{ + gzclose(fp); +} + +int main(int argc, char **argv) +{ char binary[1024]; gzFile fp_drtrace; - if (argc == 3) { - - // 1 open dr trace - fp_drtrace = gzopen(argv[1], "hrb"); - if (fp_drtrace == NULL) { - printf("ERROR: Could not open %s!\n", argv[1]); - exit(-1); + try + { + if (argc == 3) { + fp_drtrace = open_trace_file(std::string(argv[1])); + strcpy(binary, argv[2]); + } + else { + throw std::runtime_error("Invalid arguments, should be: trace.gz binary"); } - strcpy(binary, argv[2]); - - } else { - printf("ERROR: Invalid arguments, should be: trace.gz binary\n"); - exit(-1); - } - - try { Metrics gather_metrics(GATHER); Metrics scatter_metrics(SCATTER); @@ -846,28 +859,19 @@ int main(int argc, char **argv) { // ----------------- Process Traces ----------------- process_traces(trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); - //close files - gzclose(fp_drtrace); - fflush(stdout); + close_trace_file(fp_drtrace); // ----------------- Update Source Lines ----------------- update_source_lines(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, binary); - //Open trace - fp_drtrace = gzopen(argv[1], "hrb"); - if (fp_drtrace == NULL) { - printf("ERROR: Could not open %s!\n", argv[1]); - exit(-1); - } - // ----------------- Update Metrics ----------------- + fp_drtrace = open_trace_file(argv[1]); update_metrics(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); - gzclose(fp_drtrace); - printf("\n"); + close_trace_file(fp_drtrace); // ----------------- Create Spatter File ----------------- @@ -876,10 +880,9 @@ int main(int argc, char **argv) { } catch (const std::exception & ex) { - std::cerr << "Error: " << ex.what() << std::endl; + std::cerr << "ERROR: " << ex.what() << std::endl; exit(-1); } return 0; - } From d693277c858f359f18bbf5689bd6af10e6605cad Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 2 Apr 2024 17:59:32 -0400 Subject: [PATCH 13/76] Introduced GS exceptions --- gs_patterns.cpp | 68 ++++++++++++++++++++++++++++++++----------------- gs_patterns.h | 35 +++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 90efb1b..8e378e3 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include "gs_patterns.h" @@ -135,7 +136,7 @@ int cnt_str(char *line, char c) { return cnt; } -void translate_iaddr(const char *binary, char *source_line, addr_t iaddr) { +void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { int i = 0; int ntranslated = 0; @@ -143,13 +144,12 @@ void translate_iaddr(const char *binary, char *source_line, addr_t iaddr) { char cmd[MAX_LINE_LENGTH]; FILE *fp; - sprintf(cmd, "addr2line -e %s 0x%lx", binary, iaddr); + sprintf(cmd, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); /* Open the command for reading. */ fp = popen(cmd, "r"); if (fp == NULL) { - printf("Failed to run command\n"); - exit(1); + throw GSError("Failed to run command"); } /* Read the output a line at a time - output it. */ @@ -235,8 +235,7 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri printf("%s\n", bin_name); fp_bin = fopen(bin_name, "w"); if (fp_bin == NULL) { - printf("ERROR: Could not open %s!\n", bin_name); - exit(-1); + throw GSFileError("Could not open " + std::string(bin_name) + "!"); } printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); @@ -313,8 +312,7 @@ void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, fp = fopen(json_name, "w"); if (fp == NULL) { - printf("ERROR: Could not open %s!\n", json_name); - exit(-1); + throw GSFileError("Could not open " + std::string(json_name) + "!"); } gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); if (strstr(gs_info, ".json") == 0) { @@ -323,8 +321,7 @@ void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, fp2 = fopen(gs_info, "w"); if (fp2 == NULL) { - printf("ERROR: Could not open %s!\n", gs_info); - exit(-1); + throw GSFileError("Could not open " + std::string(gs_info) + "!"); } //Header @@ -364,7 +361,7 @@ void normalize_stats(Metrics & target_metrics) } } -double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const char* binary_file_name) +double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const std::string & binary_file_name) { double scatter_cnt = 0.0; @@ -541,6 +538,12 @@ void handle_trace_entry( int w_idx; int gs; + if (drline->type == 0 && drline->size == 0) { + std::ostringstream os; + os << "Invalid trace entry: type: [" << drline->type << "] size: [" << drline->size << "]"; + throw GSDataError(os.str()); + } + /*****************************/ /** INSTR 0xa-0x10 and 0x1e **/ /*****************************/ @@ -779,7 +782,7 @@ void update_source_lines( InstrInfo & scatter_iinfo, Metrics & gather_metrics, Metrics & scatter_metrics, - const char * binary) + const std::string & binary) { // Find source lines for gathers - Must have symbol printf("\nSymbol table lookup for gathers..."); @@ -823,7 +826,7 @@ gzFile open_trace_file(const std::string & trace_file_name) fp = gzopen(trace_file_name.c_str(), "hrb"); if (fp == NULL) { - throw std::runtime_error("Could not open " + trace_file_name + "!"); + throw GSFileError("Could not open " + trace_file_name + "!"); } return fp; } @@ -835,19 +838,18 @@ void close_trace_file (gzFile & fp) int main(int argc, char **argv) { - char binary[1024]; - gzFile fp_drtrace; - try { - if (argc == 3) { - fp_drtrace = open_trace_file(std::string(argv[1])); - strcpy(binary, argv[2]); - } - else { - throw std::runtime_error("Invalid arguments, should be: trace.gz binary"); + if (argc != 3) { + throw GSError("Invalid arguments, should be: trace.gz binary_file_name"); } + gzFile fp_drtrace; + std::string trace_file_name(argv[1]); + std::string binary_file_name(argv[2]); + + fp_drtrace = open_trace_file(trace_file_name); + Metrics gather_metrics(GATHER); Metrics scatter_metrics(SCATTER); @@ -864,7 +866,7 @@ int main(int argc, char **argv) // ----------------- Update Source Lines ----------------- - update_source_lines(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, binary); + update_source_lines(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, binary_file_name); // ----------------- Update Metrics ----------------- fp_drtrace = open_trace_file(argv[1]); @@ -878,6 +880,26 @@ int main(int argc, char **argv) create_spatter_file(argv[1], gather_metrics, scatter_metrics); } + catch (const GSFileError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + catch (const GSAllocError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + catch (const GSDataError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(1); + } + catch (const GSError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(1); + } catch (const std::exception & ex) { std::cerr << "ERROR: " << ex.what() << std::endl; diff --git a/gs_patterns.h b/gs_patterns.h index b386e8b..aff02f3 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -53,6 +53,38 @@ typedef struct _trace_entry_t trace_entry_t; typedef enum { GATHER=0, SCATTER } metrics_type; +class GSError : public std::exception +{ +public: + GSError (const std::string & reason) : _reason(reason) { } + ~GSError() {} + + const char * what() const noexcept override { return _reason.c_str(); } +private: + std::string _reason; +}; + +class GSFileError : public GSError +{ +public: + GSFileError (const std::string & reason) : GSError(reason) { } + ~GSFileError() {} +}; + +class GSDataError : public GSError +{ +public: + GSDataError (const std::string & reason) : GSError(reason) { } + ~GSDataError() {} +}; + +class GSAllocError : public GSError +{ +public: + GSAllocError (const std::string & reason) : GSError(reason) { } + ~GSAllocError() {} +}; + class Metrics { public: @@ -62,8 +94,7 @@ class Metrics for (int j = 0; j < NTOP; j++) { patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); if (patterns[j] == NULL) { - printf("ERROR: Could not allocate gather_patterns!\n"); - throw std::runtime_error("Could not allocate patterns for " + type_as_string()); //exit(-1); + throw GSAllocError("Could not allocate patterns for " + type_as_string() + "!"); } } } From 37bca07d9fdb82f961d0adaac987ebbf3b171764 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 2 Apr 2024 19:08:27 -0400 Subject: [PATCH 14/76] Split gs_patterns.cpp into a few headers and source files in prep for library separation. --- CMakeLists.txt | 11 +- gs_patterns.cpp | 715 +------------------------------------------ gs_patterns.h | 5 +- gs_patterns_core.cpp | 625 +++++++++++++++++++++++++++++++++++++ gs_patterns_core.h | 54 ++++ utils.cpp | 109 +++++++ utils.h | 20 ++ 7 files changed, 825 insertions(+), 714 deletions(-) create mode 100644 gs_patterns_core.cpp create mode 100644 gs_patterns_core.h create mode 100644 utils.cpp create mode 100644 utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 41599e8..d80f436 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,14 @@ set (CMAKE_VERBOSE_MAKEFILE "1") project( gs_patterns VERSION 1.0 LANGUAGES CXX) -add_executable(gs_patterns gs_patterns.cpp - gs_patterns.h) +add_executable( + gs_patterns + utils.h + utils.cpp + gs_patterns.h + gs_patterns.cpp + gs_patterns_core.h + gs_patterns_core.cpp +) set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 8e378e3..7aa65a2 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -1,20 +1,17 @@ + + #include #include -#include -#include -#include -#include -#include -#include #include -#include #include #include #include +#include #include #include "gs_patterns.h" +#include "gs_patterns_core.h" //Terminal colors #define KNRM "\x1B[0m" @@ -37,330 +34,6 @@ int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; int64_t InstrWindow::w_cnt[2][IWINDOW]; #endif -static inline int popcount(uint64_t x) { - int c; - - for (c = 0; x != 0; x >>= 1) - if (x & 1) - c++; - return c; -} - -//string tools -int startswith(const char *a, const char *b) { - if (strncmp(b, a, strlen(b)) == 0) - return 1; - return 0; -} - -int endswith(const char *a, const char *b) { - int idx = strlen(a); - int preidx = strlen(b); - - if (preidx >= idx) - return 0; - if (strncmp(b, &a[idx - preidx], preidx) == 0) - return 1; - return 0; -} - -//https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c -const char *str_replace(const char *orig, const char *rep, const char *with) { - char *result; // the return string - char *ins; // the next insert point - char *tmp; // varies - int len_rep; // length of rep (the string to remove) - int len_with; // length of with (the string to replace rep with) - int len_front; // distance between rep and end of last rep - int count; // number of replacements - - // sanity checks and initialization - if (!orig) - return NULL; - - if (!rep) - return orig; - - len_rep = strlen(rep); - if (len_rep == 0) - return NULL; // empty rep causes infinite loop during count - if (!with) - with = ""; - len_with = strlen(with); - - // count the number of replacements needed - ins = (char*)orig; - for (count = 0; tmp = strstr(ins, rep); ++count) { - ins = tmp + len_rep; - } - - tmp = result = (char*)malloc(strlen(orig) + (len_with - len_rep) * count + 1); - - if (!result) - return NULL; - - // first time through the loop, all the variable are set correctly - // from here on, - // tmp points to the end of the result string - // ins points to the next occurrence of rep in orig - // orig points to the remainder of orig after "end of rep" - while (count--) { - ins = (char*)strstr(orig, rep); - len_front = ins - orig; - tmp = strncpy(tmp, orig, len_front) + len_front; - tmp = strcpy(tmp, with) + len_with; - orig += len_front + len_rep; // move to next "end of rep" - } - strcpy(tmp, orig); - return result; -} - -char *get_str(char *line, char *bparse, char *aparse) { - - char *sline; - - sline = (char*)str_replace(line, bparse, ""); - sline = (char*)str_replace(sline, aparse, ""); - - return sline; -} - -int cnt_str(char *line, char c) { - - int cnt = 0; - for (int i = 0; line[i] != '\0'; i++) { - if (line[i] == c) - cnt++; - } - - return cnt; -} - -void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { - - int i = 0; - int ntranslated = 0; - char path[MAX_LINE_LENGTH]; - char cmd[MAX_LINE_LENGTH]; - FILE *fp; - - sprintf(cmd, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); - - /* Open the command for reading. */ - fp = popen(cmd, "r"); - if (fp == NULL) { - throw GSError("Failed to run command"); - } - - /* Read the output a line at a time - output it. */ - while (fgets(path, sizeof(path), fp) != NULL) { - strcpy(source_line, path); - source_line[strcspn(source_line, "\n")] = 0; - } - - /* close */ - pclose(fp); - - return; -} - -int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { - - int idx; - - idx = (*edx) / sizeof(trace_entry_t); - //first read - if (*p_val == NULL) { - *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); - *p_val = val; - - } else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); - *p_val = val; - } - - if (*edx == 0) - return 0; - - return 1; -} - -void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter) -{ - int i = 0; - int j = 0; - - //Create stride histogram and create spatter - int sidx; - int unique_strides; - int64_t idx, pidx; - int64_t n_stride[1027]; - double outbounds; - - if (first_spatter) printf("\n"); - - printf("\n"); - for (i = 0; i < target_metrics.ntop; i++) { - printf("***************************************************************************************\n"); - - unique_strides = 0; - for (j = 0; j < 1027; j++) - n_stride[j] = 0; - - for (j = 1; j < target_metrics.offset[i]; j++) { - sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513; - sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > 1025) ? 1026 : sidx; - n_stride[sidx]++; - } - - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } - } - - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; - - //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (1) { - - //create a binary file - FILE *fp_bin; - char *bin_name; - bin_name = (char*)str_replace(trace_file_name, ".gz", ".sbin"); - if (strstr(bin_name, ".sbin") == 0) { - strncat(bin_name, ".sbin", strlen(".sbin")+1); - } - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { - throw GSFileError("Could not open " + std::string(bin_name) + "!"); - } - - printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); - printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); - printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(), - '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%'); - printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]); - - int64_t nlcnt = 0; - for (j = 0; j < target_metrics.offset[i]; j++) { - - if (j < 39) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); - - } else if (j >= (target_metrics.offset[i] - 39)) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if ((++nlcnt % 13) == 0) - printf("\n"); - - } else if (j == 39) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - if (j == 0) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j - 513, n_stride[j]); - } - } - - if (first_spatter) { - first_spatter = false; - fprintf(fp, " {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); - } else { - fprintf(fp, ",\n {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); - } - - fwrite(target_metrics.patterns[i], sizeof(uint64_t), target_metrics.offset[i], fp_bin); - fclose(fp_bin); - - for (j = 0; j < target_metrics.offset[i] - 1; j++) - fprintf(fp, "%ld,", target_metrics.patterns[i][j]); - fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]); - fprintf(fp, "], \"count\":1}"); - - fprintf(fp2, "%s,%s,%ld,%6.3f\n", - target_metrics.get_srcline()[target_metrics.top_idx[i]], target_metrics.getShortName().c_str(), - (long int)target_metrics.offset[i], - 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt); - } - printf("***************************************************************************************\n\n"); - } -} - -void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) -{ - //Create spatter file - FILE *fp, *fp2; - char *json_name, *gs_info; - json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); - if (strstr(json_name, ".json") == 0) { - strncat(json_name, ".json", strlen(".json")+1); - } - - fp = fopen(json_name, "w"); - if (fp == NULL) { - throw GSFileError("Could not open " + std::string(json_name) + "!"); - } - gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); - if (strstr(gs_info, ".json") == 0) { - strncat(gs_info, ".txt", strlen(".txt")+1); - } - - fp2 = fopen(gs_info, "w"); - if (fp2 == NULL) { - throw GSFileError("Could not open " + std::string(gs_info) + "!"); - } - - //Header - fprintf(fp, "[ "); - fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - - bool first_spatter = true; - create_metrics_file(fp, fp2, trace_file_name, gather_metrics, first_spatter); - - create_metrics_file(fp, fp2, trace_file_name, scatter_metrics, first_spatter); - - //Footer - fprintf(fp, " ]"); - fclose(fp); - fclose(fp2); -} - -void normalize_stats(Metrics & target_metrics) -{ - //Normalize - int64_t smallest; - for (int i = 0; i < target_metrics.ntop; i++) { - - //Find smallest - smallest = 0; - for (int j = 0; j < target_metrics.offset[i]; j++) { - if (target_metrics.patterns[i][j] < smallest) - smallest = target_metrics.patterns[i][j]; - } - - smallest *= -1; - - //Normalize - for (int j = 0; j < target_metrics.offset[i]; j++) { - target_metrics.patterns[i][j] += smallest; - } - } -} - double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const std::string & binary_file_name) { double scatter_cnt = 0.0; @@ -382,364 +55,6 @@ double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & targe return scatter_cnt; } -// Second Pass -void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics) -{ - uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. - int iret = 0; - trace_entry_t* drline; - addr_t iaddr; - int64_t maddr; - int i = 0; - - addr_t gather_base[NTOP] = {0}; - addr_t scatter_base[NTOP] = {0}; - - bool breakout = false; - printf("\nSecond pass to fill gather / scatter subtraces\n"); - fflush(stdout); - - trace_entry_t* p_drtrace = NULL; - trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) - - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { - - //decode drtrace - drline = p_drtrace; - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - iaddr = drline->addr; - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } - else if ((drline->type == 0x0) || (drline->type == 0x1)) { - - maddr = drline->addr / drline->size; - - if ((++mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } - - // gather ? - if (drline->type == 0x0) { - - for (i = 0; i < gather_metrics.ntop; i++) { - - //found it - if (iaddr == gather_metrics.top[i]) { - - if (gather_base[i] == 0) - gather_base[i] = maddr; - - //Add index - if (gather_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; - } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); - - break; - } - } - } - // scatter ? - else { - - for (i = 0; i < scatter_metrics.ntop; i++) { - - //found it - if (iaddr == scatter_metrics.top[i]) { - - //set base - if (scatter_base[i] == 0) - scatter_base[i] = maddr; - - //Add index - if (scatter_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; - } - scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); - break; - } - } - } - } // MEM - - p_drtrace++; - - } //while drtrace -} - -int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) -{ - int target_ntop = 0; - int bestcnt; - - for (int j = 0; j < NTOP; j++) { - - int bestcnt = 0; - addr_t best_iaddr = 0; - int bestidx = -1; - - for (int k = 0; k < NGS; k++) { - - if (target_iinfo.get_icnt()[k] == 0) - continue; - - if (target_iinfo.get_iaddrs()[k] == 0) { - break; - } - - if (target_iinfo.get_icnt()[k] > bestcnt) { - bestcnt = target_iinfo.get_icnt()[k]; - best_iaddr = target_iinfo.get_iaddrs()[k]; - bestidx = k; - } - } - - if (best_iaddr == 0) { - break; - } else { - target_ntop++; - target_metrics.top[j] = best_iaddr; - target_metrics.top_idx[j] = bestidx; - target_metrics.tot[j] = target_iinfo.get_icnt()[bestidx]; - target_iinfo.get_icnt()[bestidx] = 0; - - //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); - } - } - - return target_ntop; -} - -void handle_trace_entry( - trace_entry_t *drline, - TraceInfo & trace_info, - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - InstrWindow & iw) -{ - int i, j, k, w; - int w_rw_idx; - int w_idx; - int gs; - - if (drline->type == 0 && drline->size == 0) { - std::ostringstream os; - os << "Invalid trace entry: type: [" << drline->type << "] size: [" << drline->size << "]"; - throw GSDataError(os.str()); - } - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - - iw.iaddr = drline->addr; - - //nops - trace_info.opcodes++; - trace_info.did_opcode = true; - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { - - w_rw_idx = drline->type; - - //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iaddr, drline->addr, drline->addr % 64, drline->size); - - if ((++trace_info.mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } - - //is iaddr in window - w_idx = -1; - for (i = 0; i < IWINDOW; i++) { - - //new iaddr - if (iw.w_iaddrs[w_rw_idx][i] == -1) { - w_idx = i; - break; - - //iaddr exists - } else if (iw.w_iaddrs[w_rw_idx][i] == iw.iaddr) { - w_idx = i; - break; - } - } - - //new window - if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= VBYTES) || - (iw.w_cnt[w_rw_idx][w_idx] >= VBYTES)) { - - /***************************/ - //do analysis - /***************************/ - //i = each window - for (w = 0; w < 2; w++) { // 2 - - for (i = 0; i < IWINDOW; i++) { // 1024 - - if (iw.w_iaddrs[w][i] == -1) - break; - - int byte = iw.w_bytes[w][i] / iw.w_cnt[w][i]; - - //First pass - //Determine - //gather/scatter? - gs = -1; - for (j = 0; j < iw.w_cnt[w][i]; j++) { - - //address and cl - iw.maddr = iw.w_maddr[w][i][j]; - assert(iw.maddr > -1); - - //previous addr - if (j == 0) - iw.maddr_prev = iw.maddr - 1; - - //gather / scatter - if (iw.maddr != iw.maddr_prev) { - if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) - gs = w; - } - iw.maddr_prev = iw.maddr; - } - - for (j = 0; j < iw.w_cnt[w][i]; j++) { - - if (gs == -1) { - trace_info.other_cnt++; - continue; - } - } - - if (gs == 0) { // GATHER - - trace_info.gather_occ_avg += iw.w_cnt[w][i]; - gather_metrics.cnt += 1.0; - - for (k = 0; k < NGS; k++) { - if (gather_iinfo.get_iaddrs()[k] == 0) { - gather_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; - (gather_iinfo.get_icnt()[k])++; - gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - - if (gather_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { - (gather_iinfo.get_icnt()[k])++; - gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - - } - - } else if (gs == 1) { // SCATTER - - trace_info.scatter_occ_avg += iw.w_cnt[w][i]; - scatter_metrics.cnt += 1.0; - - for (k = 0; k < NGS; k++) { - if (scatter_iinfo.get_iaddrs()[k] == 0) { - scatter_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; - (scatter_iinfo.get_icnt()[k])++; - scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - - if (scatter_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { - (scatter_iinfo.get_icnt()[k])++; - scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - } - } - } //WINDOW i - - w_idx = 0; - - //reset windows - for (i = 0; i < IWINDOW; i++) { - iw.w_iaddrs[w][i] = -1; - iw.w_bytes[w][i] = 0; - iw.w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - iw.w_maddr[w][i][j] = -1; - } - } // rw w - } //analysis - - //Set window values - iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; - iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; - iw.w_bytes[w_rw_idx][w_idx] += drline->size; - - //num access per iaddr in loop - iw.w_cnt[w_rw_idx][w_idx]++; - - if (trace_info.did_opcode) { - - trace_info.opcodes_mem++; - trace_info.addrs++; - trace_info.did_opcode = false; - - } else { - trace_info.addrs++; - } - - /***********************/ - /** SOMETHING ELSE **/ - /***********************/ - } else { - trace_info.other++; - } - - trace_info.drtrace_lines++; -} - -void display_stats(TraceInfo & trace_info, Metrics & gather_metrics, Metrics & scatter_metrics) -{ - printf("\n RESULTS \n"); - - printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", trace_info.drtrace_lines); - printf("OPCODES: %16lu\n", trace_info.opcodes); - printf("MEMOPCODES: %16lu\n", trace_info.opcodes_mem); - printf("LOAD/STORES: %16lu\n", trace_info.addrs); - printf("OTHER: %16lu\n", trace_info.other); - - printf("\n"); - - printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", trace_info.gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", trace_info.scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(trace_info.other_cnt) / log(2.0)); -} // First Pass void process_traces( @@ -797,28 +112,6 @@ void update_source_lines( scatter_metrics.cnt = update_source_lines_from_binary(scatter_iinfo, scatter_metrics, binary); } -void update_metrics( - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - gzFile & fp_drtrace) -{ - // Get top gathers - gather_metrics.ntop = get_top_target(gather_iinfo, gather_metrics); - - // Get top scatters - scatter_metrics.ntop = get_top_target(scatter_iinfo, scatter_metrics); - - // ----------------- Second Pass ----------------- - - second_pass(fp_drtrace, gather_metrics, scatter_metrics); - - // ----------------- Normalize ----------------- - - normalize_stats(gather_metrics); - normalize_stats(scatter_metrics); -} gzFile open_trace_file(const std::string & trace_file_name) { diff --git a/gs_patterns.h b/gs_patterns.h index aff02f3..bdf9ab0 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -4,6 +4,9 @@ #pragma once +#include +#include + #define MAX(X, Y) (((X) < (Y)) ? Y : X) #define MIN(X, Y) (((X) > (Y)) ? Y : X) #define ABS(X) (((X) < 0) ? (-1) * (X) : X) @@ -158,7 +161,7 @@ class InstrInfo class TraceInfo // Stats { public: - /// TODO: need an reset method to zero out counters + /// TODO: need a reset method to zero out counters uint64_t opcodes = 0; uint64_t opcodes_mem = 0; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp new file mode 100644 index 0000000..106f411 --- /dev/null +++ b/gs_patterns_core.cpp @@ -0,0 +1,625 @@ +// +// Created by christopher on 4/2/24. +// + +#include /// TODO: use cassert instead +#include +#include + +#include +#include + +#include "utils.h" +#include "gs_patterns.h" + +int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_entry_t); + //first read + if (*p_val == NULL) { + *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *p_val = val; + + } else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *p_val = val; + } + + if (*edx == 0) + return 0; + + return 1; +} + +void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { + + int i = 0; + int ntranslated = 0; + char path[MAX_LINE_LENGTH]; + char cmd[MAX_LINE_LENGTH]; + FILE *fp; + + sprintf(cmd, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); + + /* Open the command for reading. */ + fp = popen(cmd, "r"); + if (fp == NULL) { + throw GSError("Failed to run command"); + } + + /* Read the output a line at a time - output it. */ + while (fgets(path, sizeof(path), fp) != NULL) { + strcpy(source_line, path); + source_line[strcspn(source_line, "\n")] = 0; + } + + /* close */ + pclose(fp); + + return; +} + + +void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter) +{ + int i = 0; + int j = 0; + + //Create stride histogram and create spatter + int sidx; + int unique_strides; + int64_t idx, pidx; + int64_t n_stride[1027]; + double outbounds; + + if (first_spatter) printf("\n"); + + printf("\n"); + for (i = 0; i < target_metrics.ntop; i++) { + printf("***************************************************************************************\n"); + + unique_strides = 0; + for (j = 0; j < 1027; j++) + n_stride[j] = 0; + + for (j = 1; j < target_metrics.offset[i]; j++) { + sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > 1025) ? 1026 : sidx; + n_stride[sidx]++; + } + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } + + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; + + //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ + if (1) { + + //create a binary file + FILE *fp_bin; + char *bin_name; + bin_name = (char*)str_replace(trace_file_name, ".gz", ".sbin"); + if (strstr(bin_name, ".sbin") == 0) { + strncat(bin_name, ".sbin", strlen(".sbin")+1); + } + printf("%s\n", bin_name); + fp_bin = fopen(bin_name, "w"); + if (fp_bin == NULL) { + throw GSFileError("Could not open " + std::string(bin_name) + "!"); + } + + printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); + printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); + printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(), + '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%'); + printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]); + + int64_t nlcnt = 0; + for (j = 0; j < target_metrics.offset[i]; j++) { + + if (j < 39) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j >= (target_metrics.offset[i] - 39)) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if ((++nlcnt % 13) == 0) + printf("\n"); + + } else if (j == 39) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + if (j == 0) + printf("%6s: %ld\n", "< -512", n_stride[j]); + else if (j == 1026) + printf("%6s: %ld\n", "> 512", n_stride[j]); + else + printf("%6d: %ld\n", j - 513, n_stride[j]); + } + } + + if (first_spatter) { + first_spatter = false; + fprintf(fp, " {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); + } else { + fprintf(fp, ",\n {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); + } + + fwrite(target_metrics.patterns[i], sizeof(uint64_t), target_metrics.offset[i], fp_bin); + fclose(fp_bin); + + for (j = 0; j < target_metrics.offset[i] - 1; j++) + fprintf(fp, "%ld,", target_metrics.patterns[i][j]); + fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]); + fprintf(fp, "], \"count\":1}"); + + fprintf(fp2, "%s,%s,%ld,%6.3f\n", + target_metrics.get_srcline()[target_metrics.top_idx[i]], target_metrics.getShortName().c_str(), + (long int)target_metrics.offset[i], + 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt); + } + printf("***************************************************************************************\n\n"); + } +} + +void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) +{ + //Create spatter file + FILE *fp, *fp2; + char *json_name, *gs_info; + json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); + if (strstr(json_name, ".json") == 0) { + strncat(json_name, ".json", strlen(".json")+1); + } + + fp = fopen(json_name, "w"); + if (fp == NULL) { + throw GSFileError("Could not open " + std::string(json_name) + "!"); + } + gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); + if (strstr(gs_info, ".json") == 0) { + strncat(gs_info, ".txt", strlen(".txt")+1); + } + + fp2 = fopen(gs_info, "w"); + if (fp2 == NULL) { + throw GSFileError("Could not open " + std::string(gs_info) + "!"); + } + + //Header + fprintf(fp, "[ "); + fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); + + bool first_spatter = true; + create_metrics_file(fp, fp2, trace_file_name, gather_metrics, first_spatter); + + create_metrics_file(fp, fp2, trace_file_name, scatter_metrics, first_spatter); + + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); +} + +void normalize_stats(Metrics & target_metrics) +{ + //Normalize + int64_t smallest; + for (int i = 0; i < target_metrics.ntop; i++) { + + //Find smallest + smallest = 0; + for (int j = 0; j < target_metrics.offset[i]; j++) { + if (target_metrics.patterns[i][j] < smallest) + smallest = target_metrics.patterns[i][j]; + } + + smallest *= -1; + + //Normalize + for (int j = 0; j < target_metrics.offset[i]; j++) { + target_metrics.patterns[i][j] += smallest; + } + } +} + +void handle_trace_entry( + trace_entry_t *drline, + TraceInfo & trace_info, + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + InstrWindow & iw) +{ + int i, j, k, w; + int w_rw_idx; + int w_idx; + int gs; + + if (drline->type == 0 && drline->size == 0) { + std::ostringstream os; + os << "Invalid trace entry: type: [" << drline->type << "] size: [" << drline->size << "]"; + throw GSDataError(os.str()); + } + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + + iw.iaddr = drline->addr; + + //nops + trace_info.opcodes++; + trace_info.did_opcode = true; + + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + + w_rw_idx = drline->type; + + //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", + // iaddr, drline->addr, drline->addr % 64, drline->size); + + if ((++trace_info.mcnt % PERSAMPLE) == 0) { +#if SAMPLE + break; +#endif + printf("."); + fflush(stdout); + } + + //is iaddr in window + w_idx = -1; + for (i = 0; i < IWINDOW; i++) { + + //new iaddr + if (iw.w_iaddrs[w_rw_idx][i] == -1) { + w_idx = i; + break; + + //iaddr exists + } else if (iw.w_iaddrs[w_rw_idx][i] == iw.iaddr) { + w_idx = i; + break; + } + } + + //new window + if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= VBYTES) || + (iw.w_cnt[w_rw_idx][w_idx] >= VBYTES)) { + + /***************************/ + //do analysis + /***************************/ + //i = each window + for (w = 0; w < 2; w++) { // 2 + + for (i = 0; i < IWINDOW; i++) { // 1024 + + if (iw.w_iaddrs[w][i] == -1) + break; + + int byte = iw.w_bytes[w][i] / iw.w_cnt[w][i]; + + //First pass + //Determine + //gather/scatter? + gs = -1; + for (j = 0; j < iw.w_cnt[w][i]; j++) { + + //address and cl + iw.maddr = iw.w_maddr[w][i][j]; + assert(iw.maddr > -1); + + //previous addr + if (j == 0) + iw.maddr_prev = iw.maddr - 1; + + //gather / scatter + if (iw.maddr != iw.maddr_prev) { + if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) + gs = w; + } + iw.maddr_prev = iw.maddr; + } + + for (j = 0; j < iw.w_cnt[w][i]; j++) { + + if (gs == -1) { + trace_info.other_cnt++; + continue; + } + } + + if (gs == 0) { // GATHER + + trace_info.gather_occ_avg += iw.w_cnt[w][i]; + gather_metrics.cnt += 1.0; + + for (k = 0; k < NGS; k++) { + if (gather_iinfo.get_iaddrs()[k] == 0) { + gather_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; + (gather_iinfo.get_icnt()[k])++; + gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + if (gather_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { + (gather_iinfo.get_icnt()[k])++; + gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + } + + } else if (gs == 1) { // SCATTER + + trace_info.scatter_occ_avg += iw.w_cnt[w][i]; + scatter_metrics.cnt += 1.0; + + for (k = 0; k < NGS; k++) { + if (scatter_iinfo.get_iaddrs()[k] == 0) { + scatter_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; + (scatter_iinfo.get_icnt()[k])++; + scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + if (scatter_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { + (scatter_iinfo.get_icnt()[k])++; + scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + } + } + } //WINDOW i + + w_idx = 0; + + //reset windows + for (i = 0; i < IWINDOW; i++) { + iw.w_iaddrs[w][i] = -1; + iw.w_bytes[w][i] = 0; + iw.w_cnt[w][i] = 0; + for (j = 0; j < VBYTES; j++) + iw.w_maddr[w][i][j] = -1; + } + } // rw w + } //analysis + + //Set window values + iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; + iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; + iw.w_bytes[w_rw_idx][w_idx] += drline->size; + + //num access per iaddr in loop + iw.w_cnt[w_rw_idx][w_idx]++; + + if (trace_info.did_opcode) { + + trace_info.opcodes_mem++; + trace_info.addrs++; + trace_info.did_opcode = false; + + } else { + trace_info.addrs++; + } + + /***********************/ + /** SOMETHING ELSE **/ + /***********************/ + } else { + trace_info.other++; + } + + trace_info.drtrace_lines++; +} + +void display_stats(TraceInfo & trace_info, Metrics & gather_metrics, Metrics & scatter_metrics) +{ + printf("\n RESULTS \n"); + + printf("DRTRACE STATS\n"); + printf("DRTRACE LINES: %16lu\n", trace_info.drtrace_lines); + printf("OPCODES: %16lu\n", trace_info.opcodes); + printf("MEMOPCODES: %16lu\n", trace_info.opcodes_mem); + printf("LOAD/STORES: %16lu\n", trace_info.addrs); + printf("OTHER: %16lu\n", trace_info.other); + + printf("\n"); + + printf("GATHER/SCATTER STATS: \n"); + printf("LOADS per GATHER: %16.3f\n", trace_info.gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", trace_info.scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(trace_info.other_cnt) / log(2.0)); +} + + +int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) +{ + int target_ntop = 0; + int bestcnt; + + for (int j = 0; j < NTOP; j++) { + + int bestcnt = 0; + addr_t best_iaddr = 0; + int bestidx = -1; + + for (int k = 0; k < NGS; k++) { + + if (target_iinfo.get_icnt()[k] == 0) + continue; + + if (target_iinfo.get_iaddrs()[k] == 0) { + break; + } + + if (target_iinfo.get_icnt()[k] > bestcnt) { + bestcnt = target_iinfo.get_icnt()[k]; + best_iaddr = target_iinfo.get_iaddrs()[k]; + bestidx = k; + } + } + + if (best_iaddr == 0) { + break; + } else { + target_ntop++; + target_metrics.top[j] = best_iaddr; + target_metrics.top_idx[j] = bestidx; + target_metrics.tot[j] = target_iinfo.get_icnt()[bestidx]; + target_iinfo.get_icnt()[bestidx] = 0; + + //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); + } + } + + return target_ntop; +} + +// Second Pass +void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics) +{ + uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. + int iret = 0; + trace_entry_t* drline; + addr_t iaddr; + int64_t maddr; + int i = 0; + + addr_t gather_base[NTOP] = {0}; + addr_t scatter_base[NTOP] = {0}; + + bool breakout = false; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + + trace_entry_t* p_drtrace = NULL; + trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) + + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { + + //decode drtrace + drline = p_drtrace; + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + iaddr = drline->addr; + + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } + else if ((drline->type == 0x0) || (drline->type == 0x1)) { + + maddr = drline->addr / drline->size; + + if ((++mcnt % PERSAMPLE) == 0) { +#if SAMPLE + break; +#endif + printf("."); + fflush(stdout); + } + + // gather ? + if (drline->type == 0x0) { + + for (i = 0; i < gather_metrics.ntop; i++) { + + //found it + if (iaddr == gather_metrics.top[i]) { + + if (gather_base[i] == 0) + gather_base[i] = maddr; + + //Add index + if (gather_metrics.offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = true; + } + //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); + gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); + + break; + } + } + } + // scatter ? + else { + + for (i = 0; i < scatter_metrics.ntop; i++) { + + //found it + if (iaddr == scatter_metrics.top[i]) { + + //set base + if (scatter_base[i] == 0) + scatter_base[i] = maddr; + + //Add index + if (scatter_metrics.offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = true; + } + scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); + break; + } + } + } + } // MEM + + p_drtrace++; + + } //while drtrace +} + +void update_metrics( + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + gzFile & fp_drtrace) +{ + // Get top gathers + gather_metrics.ntop = get_top_target(gather_iinfo, gather_metrics); + + // Get top scatters + scatter_metrics.ntop = get_top_target(scatter_iinfo, scatter_metrics); + + // ----------------- Second Pass ----------------- + + second_pass(fp_drtrace, gather_metrics, scatter_metrics); + + // ----------------- Normalize ----------------- + + normalize_stats(gather_metrics); + normalize_stats(scatter_metrics); +} + + + diff --git a/gs_patterns_core.h b/gs_patterns_core.h new file mode 100644 index 0000000..47c4ee3 --- /dev/null +++ b/gs_patterns_core.h @@ -0,0 +1,54 @@ +// +// Created by christopher on 4/2/24. +// + +#pragma once + +#include +#include +#include + +#include "utils.h" +#include "gs_patterns.h" + +int drline_read(gzFile fp, + trace_entry_t * val, + trace_entry_t ** p_val, + int * edx); + +void translate_iaddr(const std::string & binary, + char * source_line, + addr_t iaddr); + +void create_metrics_file(FILE * fp, + FILE * fp2, + const char* trace_file_name, + Metrics & target_metrics, + bool & first_spatter); + +void create_spatter_file(const char* trace_file_name, + Metrics & gather_metrics, + Metrics & scatter_metrics); + +void normalize_stats(Metrics & target_metrics); + +void handle_trace_entry(trace_entry_t *drline, + TraceInfo & trace_info, + InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + InstrWindow & iw); + +void display_stats(TraceInfo & trace_info, + Metrics & gather_metrics, + Metrics & scatter_metrics); + +int get_top_target(InstrInfo & target_iinfo, + Metrics & target_metrics); + +void update_metrics(InstrInfo & gather_iinfo, + InstrInfo & scatter_iinfo, + Metrics & gather_metrics, + Metrics & scatter_metrics, + gzFile & fp_drtrace); diff --git a/utils.cpp b/utils.cpp new file mode 100644 index 0000000..69ba3c3 --- /dev/null +++ b/utils.cpp @@ -0,0 +1,109 @@ +// +// Created by christopher on 4/2/24. +// + +#include +#include +#include +#include + +#include "utils.h" + +static inline int popcount(uint64_t x) { + int c; + + for (c = 0; x != 0; x >>= 1) + if (x & 1) + c++; + return c; +} + +//string tools +int startswith(const char *a, const char *b) { + if (strncmp(b, a, strlen(b)) == 0) + return 1; + return 0; +} + +int endswith(const char *a, const char *b) { + int idx = strlen(a); + int preidx = strlen(b); + + if (preidx >= idx) + return 0; + if (strncmp(b, &a[idx - preidx], preidx) == 0) + return 1; + return 0; +} + +//https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c +const char *str_replace(const char *orig, const char *rep, const char *with) { + char *result; // the return string + char *ins; // the next insert point + char *tmp; // varies + int len_rep; // length of rep (the string to remove) + int len_with; // length of with (the string to replace rep with) + int len_front; // distance between rep and end of last rep + int count; // number of replacements + + // sanity checks and initialization + if (!orig) + return NULL; + + if (!rep) + return orig; + + len_rep = strlen(rep); + if (len_rep == 0) + return NULL; // empty rep causes infinite loop during count + if (!with) + with = ""; + len_with = strlen(with); + + // count the number of replacements needed + ins = (char*)orig; + for (count = 0; tmp = strstr(ins, rep); ++count) { + ins = tmp + len_rep; + } + + tmp = result = (char*)malloc(strlen(orig) + (len_with - len_rep) * count + 1); + + if (!result) + return NULL; + + // first time through the loop, all the variable are set correctly + // from here on, + // tmp points to the end of the result string + // ins points to the next occurrence of rep in orig + // orig points to the remainder of orig after "end of rep" + while (count--) { + ins = (char*)strstr(orig, rep); + len_front = ins - orig; + tmp = strncpy(tmp, orig, len_front) + len_front; + tmp = strcpy(tmp, with) + len_with; + orig += len_front + len_rep; // move to next "end of rep" + } + strcpy(tmp, orig); + return result; +} + +char *get_str(char *line, char *bparse, char *aparse) { + + char *sline; + + sline = (char*)str_replace(line, bparse, ""); + sline = (char*)str_replace(sline, aparse, ""); + + return sline; +} + +int cnt_str(char *line, char c) { + + int cnt = 0; + for (int i = 0; line[i] != '\0'; i++) { + if (line[i] == c) + cnt++; + } + + return cnt; +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..f7bffa8 --- /dev/null +++ b/utils.h @@ -0,0 +1,20 @@ +// +// Created by christopher on 4/2/24. +// + +#pragma once + +#include +#include + +static inline int popcount(uint64_t x); + +int startswith(const char *a, const char *b); + +int endswith(const char *a, const char *b); + +const char *str_replace(const char *orig, const char *rep, const char *with); + +char *get_str(char *line, char *bparse, char *aparse); + +int cnt_str(char *line, char c); \ No newline at end of file From 9fa0dc6829c1c795876b582253a2383a10c99b1d Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 3 Apr 2024 09:01:52 -0400 Subject: [PATCH 15/76] Split static lib and binary (libgs_patterns_core.a and binary gs_patterns) --- CMakeLists.txt | 10 +++++++--- gs_patterns.cpp | 1 - 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d80f436..e237407 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,14 +4,18 @@ set (CMAKE_VERBOSE_MAKEFILE "1") project( gs_patterns VERSION 1.0 LANGUAGES CXX) -add_executable( - gs_patterns +add_library(gs_patterns_core STATIC utils.h utils.cpp gs_patterns.h - gs_patterns.cpp gs_patterns_core.h gs_patterns_core.cpp ) +add_executable( gs_patterns + gs_patterns.cpp ) + +target_link_libraries(gs_patterns gs_patterns_core) + + set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 7aa65a2..9895258 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -1,5 +1,4 @@ - #include #include #include From f90b736cb68f6019c747766329983b3607fe6bc2 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 3 Apr 2024 18:47:06 -0400 Subject: [PATCH 16/76] Implemented MemPatterns abstract base class and MemPatternsForPin for pin trace file pattern generation. Simplfied signature of some library functions which now use MemPatterns interfaces. Simplified gs_patterns.cpp main to use the new MemPatternsForPin and interfaces. --- CMakeLists.txt | 4 + gs_patterns.cpp | 185 ++++++++++++++++++++++++++++--------------- gs_patterns.h | 23 ++++++ gs_patterns_core.cpp | 70 +++++----------- gs_patterns_core.h | 33 +++----- 5 files changed, 178 insertions(+), 137 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e237407..f2773ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,10 @@ set (CMAKE_VERBOSE_MAKEFILE "1") project( gs_patterns VERSION 1.0 LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED On) +#set(CMAKE_CXX_EXTENSIONS Off) + add_library(gs_patterns_core STATIC utils.h utils.cpp diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 9895258..03ec40f 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -33,6 +33,106 @@ int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; int64_t InstrWindow::w_cnt[2][IWINDOW]; #endif +gzFile open_trace_file(const std::string & trace_file_name) +{ + gzFile fp; + + fp = gzopen(trace_file_name.c_str(), "hrb"); + if (fp == NULL) { + throw GSFileError("Could not open " + trace_file_name + "!"); + } + return fp; +} + +void close_trace_file (gzFile & fp) +{ + gzclose(fp); +} + +class MemPatternsForPin : public MemPatterns +{ +public: + MemPatternsForPin() : _metrics(GATHER, SCATTER), + _iinfo(GATHER, SCATTER) + { + } + + virtual ~MemPatternsForPin() override { } + + void handle_trace_entry(const trace_entry_t * tentry) override; + void generate_patterns() override; + + Metrics & get_gather_metrics() override { return _metrics.first; } + Metrics & get_scatter_metrics() override { return _metrics.second; } + InstrInfo & get_gather_iinfo () override { return _iinfo.first; } + InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } + TraceInfo & get_trace_info() override { return _trace_info; } + InstrWindow & get_instr_window() override { return _iw; } + + void set_trace_file(const std::string & trace_file_name) { _trace_file_name = trace_file_name; } + const std::string & get_trace_file_name() { return _trace_file_name; } + + void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } + const std::string & get_binary_file_name() { return _binary_file_name; } + + void update_metrics(); + +private: + std::pair _metrics; + std::pair _iinfo; + TraceInfo _trace_info; + InstrWindow _iw; + + std::string _trace_file_name; + std::string _binary_file_name; +}; + +void update_source_lines(MemPatternsForPin & mp); + +void MemPatternsForPin::handle_trace_entry(const trace_entry_t *tentry) +{ + // Call libgs_patterns + ::handle_trace_entry(*this, tentry); +} + +void MemPatternsForPin::generate_patterns() +{ + // ----------------- Update Source Lines ----------------- + + ::update_source_lines(*this); + + // ----------------- Update Metrics ----------------- + + update_metrics(); + + // ----------------- Create Spatter File ----------------- + + ::create_spatter_file(*this, _trace_file_name.c_str()); + +} + +void MemPatternsForPin::update_metrics() +{ + gzFile fp_drtrace = ::open_trace_file(get_trace_file_name()); + + // Get top gathers + get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); + + // Get top scatters + get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics()); + + // ----------------- Second Pass ----------------- + + ::second_pass(fp_drtrace, get_gather_metrics(), get_scatter_metrics()); + + // ----------------- Normalize ----------------- + + ::normalize_stats(get_gather_metrics()); + ::normalize_stats(get_scatter_metrics()); + + close_trace_file(fp_drtrace); +} + double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const std::string & binary_file_name) { double scatter_cnt = 0.0; @@ -54,20 +154,15 @@ double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & targe return scatter_cnt; } - // First Pass -void process_traces( - TraceInfo & trace_info, - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - gzFile & fp_drtrace) +void process_traces(MemPatternsForPin & mp) { int iret = 0; trace_entry_t *drline; InstrWindow iw; + gzFile fp_drtrace = open_trace_file(mp.get_trace_file_name()); + printf("First pass to find top gather / scatter iaddresses\n"); fflush(stdout); @@ -78,54 +173,35 @@ void process_traces( //decode drtrace drline = p_drtrace; - handle_trace_entry(drline, trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, iw); + //handle_trace_entry(drline, trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, iw); + mp.handle_trace_entry(drline); p_drtrace++; } + close_trace_file(fp_drtrace); + //metrics - trace_info.gather_occ_avg /= gather_metrics.cnt; - trace_info.scatter_occ_avg /= scatter_metrics.cnt; + mp.get_trace_info().gather_occ_avg /= mp.get_gather_metrics().cnt; + mp.get_trace_info().scatter_occ_avg /= mp.get_scatter_metrics().cnt; - display_stats(trace_info, gather_metrics, scatter_metrics); + display_stats(mp); } -void update_source_lines( - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - const std::string & binary) +void update_source_lines(MemPatternsForPin & mp) { // Find source lines for gathers - Must have symbol printf("\nSymbol table lookup for gathers..."); fflush(stdout); - gather_metrics.cnt = update_source_lines_from_binary(gather_iinfo, gather_metrics, binary); + mp.get_gather_metrics().cnt = update_source_lines_from_binary(mp.get_gather_iinfo(), mp.get_gather_metrics(), mp.get_binary_file_name()); // Find source lines for scatters printf("Symbol table lookup for scatters..."); fflush(stdout); - scatter_metrics.cnt = update_source_lines_from_binary(scatter_iinfo, scatter_metrics, binary); -} - - -gzFile open_trace_file(const std::string & trace_file_name) -{ - gzFile fp; - - fp = gzopen(trace_file_name.c_str(), "hrb"); - if (fp == NULL) { - throw GSFileError("Could not open " + trace_file_name + "!"); - } - return fp; -} - -void close_trace_file (gzFile & fp) -{ - gzclose(fp); + mp.get_scatter_metrics().cnt = update_source_lines_from_binary(mp.get_scatter_iinfo(), mp.get_scatter_metrics(), mp.get_binary_file_name()); } int main(int argc, char **argv) @@ -136,41 +212,18 @@ int main(int argc, char **argv) throw GSError("Invalid arguments, should be: trace.gz binary_file_name"); } - gzFile fp_drtrace; - std::string trace_file_name(argv[1]); - std::string binary_file_name(argv[2]); + MemPatternsForPin mp; - fp_drtrace = open_trace_file(trace_file_name); - - Metrics gather_metrics(GATHER); - Metrics scatter_metrics(SCATTER); - - InstrInfo gather_iinfo(GATHER); - InstrInfo scatter_iinfo(SCATTER); - - TraceInfo trace_info; + mp.set_trace_file(argv[1]); + mp.set_binary_file(argv[2]); // ----------------- Process Traces ----------------- - process_traces(trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); - - close_trace_file(fp_drtrace); - - // ----------------- Update Source Lines ----------------- - - update_source_lines(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, binary_file_name); - - // ----------------- Update Metrics ----------------- - fp_drtrace = open_trace_file(argv[1]); - - update_metrics(gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, fp_drtrace); - - close_trace_file(fp_drtrace); - - // ----------------- Create Spatter File ----------------- + process_traces(mp); - create_spatter_file(argv[1], gather_metrics, scatter_metrics); + // ----------------- Generate Patterns ----------------- + mp.generate_patterns(); } catch (const GSFileError & ex) { diff --git a/gs_patterns.h b/gs_patterns.h index bdf9ab0..c6bc16f 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -219,5 +219,28 @@ class InstrWindow addr_t iaddr; int64_t maddr_prev; int64_t maddr; +}; + +class MemPatterns +{ +public: + MemPatterns() { } + virtual ~MemPatterns() { }; + + MemPatterns(const MemPatterns &) = delete; + MemPatterns & operator=(const MemPatterns &) = delete; + + virtual void handle_trace_entry(const trace_entry_t * te) = 0; + virtual void generate_patterns() = 0; + + //virtual void update_metrics(const std::string & binary_file_name) = 0; + //virtual void create_spatter_file(const std::string & file_prefix) = 0; + + virtual Metrics & get_gather_metrics() = 0; + virtual Metrics & get_scatter_metrics() = 0; + virtual InstrInfo & get_gather_iinfo() = 0; + virtual InstrInfo & get_scatter_iinfo() = 0; + virtual TraceInfo & get_trace_info() = 0; + virtual InstrWindow & get_instr_window() = 0; }; \ No newline at end of file diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 106f411..36dc1bc 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -177,7 +177,7 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri } } -void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, Metrics & scatter_metrics) +void create_spatter_file(MemPatterns & mp, const char *trace_file_name) { //Create spatter file FILE *fp, *fp2; @@ -206,9 +206,9 @@ void create_spatter_file(const char* trace_file_name, Metrics & gather_metrics, fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); bool first_spatter = true; - create_metrics_file(fp, fp2, trace_file_name, gather_metrics, first_spatter); + create_metrics_file(fp, fp2, trace_file_name, mp.get_gather_metrics(), first_spatter); - create_metrics_file(fp, fp2, trace_file_name, scatter_metrics, first_spatter); + create_metrics_file(fp, fp2, trace_file_name, mp.get_scatter_metrics(), first_spatter); //Footer fprintf(fp, " ]"); @@ -238,20 +238,20 @@ void normalize_stats(Metrics & target_metrics) } } -void handle_trace_entry( - trace_entry_t *drline, - TraceInfo & trace_info, - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - InstrWindow & iw) +void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline) { int i, j, k, w; int w_rw_idx; int w_idx; int gs; + auto & trace_info = mp.get_trace_info(); + auto & gather_iinfo = mp.get_gather_iinfo(); + auto & scatter_iinfo = mp.get_scatter_iinfo(); + auto & gather_metrics = mp.get_gather_metrics(); + auto & scatter_metrics = mp.get_scatter_metrics(); + auto & iw = mp.get_instr_window(); + if (drline->type == 0 && drline->size == 0) { std::ostringstream os; os << "Invalid trace entry: type: [" << drline->type << "] size: [" << drline->size << "]"; @@ -434,25 +434,25 @@ void handle_trace_entry( trace_info.drtrace_lines++; } -void display_stats(TraceInfo & trace_info, Metrics & gather_metrics, Metrics & scatter_metrics) +void display_stats(MemPatterns & mp) { printf("\n RESULTS \n"); printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", trace_info.drtrace_lines); - printf("OPCODES: %16lu\n", trace_info.opcodes); - printf("MEMOPCODES: %16lu\n", trace_info.opcodes_mem); - printf("LOAD/STORES: %16lu\n", trace_info.addrs); - printf("OTHER: %16lu\n", trace_info.other); + printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().drtrace_lines); + printf("OPCODES: %16lu\n", mp.get_trace_info().opcodes); + printf("MEMOPCODES: %16lu\n", mp.get_trace_info().opcodes_mem); + printf("LOAD/STORES: %16lu\n", mp.get_trace_info().addrs); + printf("OTHER: %16lu\n", mp.get_trace_info().other); printf("\n"); printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", trace_info.gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", trace_info.scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(gather_metrics.cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(scatter_metrics.cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(trace_info.other_cnt) / log(2.0)); + printf("LOADS per GATHER: %16.3f\n", mp.get_trace_info().gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", mp.get_trace_info().scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(mp.get_gather_metrics().cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(mp.get_scatter_metrics().cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(mp.get_trace_info().other_cnt) / log(2.0)); } @@ -597,29 +597,3 @@ void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_ } //while drtrace } - -void update_metrics( - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - gzFile & fp_drtrace) -{ - // Get top gathers - gather_metrics.ntop = get_top_target(gather_iinfo, gather_metrics); - - // Get top scatters - scatter_metrics.ntop = get_top_target(scatter_iinfo, scatter_metrics); - - // ----------------- Second Pass ----------------- - - second_pass(fp_drtrace, gather_metrics, scatter_metrics); - - // ----------------- Normalize ----------------- - - normalize_stats(gather_metrics); - normalize_stats(scatter_metrics); -} - - - diff --git a/gs_patterns_core.h b/gs_patterns_core.h index 47c4ee3..5e2eca9 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -26,29 +26,16 @@ void create_metrics_file(FILE * fp, Metrics & target_metrics, bool & first_spatter); -void create_spatter_file(const char* trace_file_name, - Metrics & gather_metrics, - Metrics & scatter_metrics); +void create_spatter_file(MemPatterns & mp, const char *trace_file_name); + +void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline); + +void display_stats(MemPatterns & mp); + +void update_metrics(MemPatterns & mp, gzFile & fp_drtrace); + +int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); void normalize_stats(Metrics & target_metrics); -void handle_trace_entry(trace_entry_t *drline, - TraceInfo & trace_info, - InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - InstrWindow & iw); - -void display_stats(TraceInfo & trace_info, - Metrics & gather_metrics, - Metrics & scatter_metrics); - -int get_top_target(InstrInfo & target_iinfo, - Metrics & target_metrics); - -void update_metrics(InstrInfo & gather_iinfo, - InstrInfo & scatter_iinfo, - Metrics & gather_metrics, - Metrics & scatter_metrics, - gzFile & fp_drtrace); +void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics); \ No newline at end of file From ded14093c3a1d6f27d805971a3da94412479fd99 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 3 Apr 2024 19:57:01 -0400 Subject: [PATCH 17/76] Removed dependency on gzip files in gs_patterns_core.cpp, moved that to gs_patterns.cpp which handles trace file based input. Refatored second_pass handling so it can be done with any code able to get a collection of trace_t's. --- gs_patterns.cpp | 63 ++++++++++++++++++- gs_patterns_core.cpp | 145 ++++++++++++++++--------------------------- gs_patterns_core.h | 5 +- 3 files changed, 117 insertions(+), 96 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 03ec40f..cea0ea9 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -33,6 +33,12 @@ int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; int64_t InstrWindow::w_cnt[2][IWINDOW]; #endif +// Forward declarations +class MemPatternsForPin; + +void update_source_lines(MemPatternsForPin & mp); +void process_second_pass(gzFile & fp_drtrace, MemPatternsForPin & mp); + gzFile open_trace_file(const std::string & trace_file_name) { gzFile fp; @@ -49,6 +55,27 @@ void close_trace_file (gzFile & fp) gzclose(fp); } +int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_entry_t); + //first read + if (*p_val == NULL) { + *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *p_val = val; + + } else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *p_val = val; + } + + if (*edx == 0) + return 0; + + return 1; +} + class MemPatternsForPin : public MemPatterns { public: @@ -87,8 +114,6 @@ class MemPatternsForPin : public MemPatterns std::string _binary_file_name; }; -void update_source_lines(MemPatternsForPin & mp); - void MemPatternsForPin::handle_trace_entry(const trace_entry_t *tentry) { // Call libgs_patterns @@ -123,7 +148,8 @@ void MemPatternsForPin::update_metrics() // ----------------- Second Pass ----------------- - ::second_pass(fp_drtrace, get_gather_metrics(), get_scatter_metrics()); + //::second_pass(fp_drtrace, get_gather_metrics(), get_scatter_metrics()); + ::process_second_pass(fp_drtrace, *this); // ----------------- Normalize ----------------- @@ -189,6 +215,37 @@ void process_traces(MemPatternsForPin & mp) } +void process_second_pass(gzFile & fp_drtrace, MemPatternsForPin & mp) +{ + uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. + int iret = 0; + trace_entry_t *drline; + + // State carried thru + addr_t iaddr; + int64_t maddr; + addr_t gather_base[NTOP] = {0}; + addr_t scatter_base[NTOP] = {0}; + + bool breakout = false; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + + trace_entry_t *p_drtrace = NULL; + trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) + + while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { + + //decode drtrace + drline = p_drtrace; + + breakout = ::handle_2nd_pass_trace_entry(drline, mp.get_gather_metrics(), mp.get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); + + p_drtrace++; + } //while drtrace +} + void update_source_lines(MemPatternsForPin & mp) { // Find source lines for gathers - Must have symbol diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 36dc1bc..331f2b0 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -4,7 +4,6 @@ #include /// TODO: use cassert instead #include -#include #include #include @@ -12,27 +11,6 @@ #include "utils.h" #include "gs_patterns.h" -int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { - - int idx; - - idx = (*edx) / sizeof(trace_entry_t); - //first read - if (*p_val == NULL) { - *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); - *p_val = val; - - } else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); - *p_val = val; - } - - if (*edx == 0) - return 0; - - return 1; -} - void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { int i = 0; @@ -499,101 +477,84 @@ int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) return target_ntop; } -// Second Pass -void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics) +bool handle_2nd_pass_trace_entry(trace_entry_t * drline, + Metrics & gather_metrics, Metrics & scatter_metrics, + addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, + addr_t * gather_base, addr_t * scatter_base) { - uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. int iret = 0; - trace_entry_t* drline; - addr_t iaddr; - int64_t maddr; int i = 0; - addr_t gather_base[NTOP] = {0}; - addr_t scatter_base[NTOP] = {0}; - bool breakout = false; - printf("\nSecond pass to fill gather / scatter subtraces\n"); - fflush(stdout); - trace_entry_t* p_drtrace = NULL; - trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) - - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { - - //decode drtrace - drline = p_drtrace; - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - iaddr = drline->addr; + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + iaddr = drline->addr; - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } - else if ((drline->type == 0x0) || (drline->type == 0x1)) { + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } + else if ((drline->type == 0x0) || (drline->type == 0x1)) { - maddr = drline->addr / drline->size; + maddr = drline->addr / drline->size; - if ((++mcnt % PERSAMPLE) == 0) { + if ((++mcnt % PERSAMPLE) == 0) { #if SAMPLE - break; + break; #endif - printf("."); - fflush(stdout); - } - - // gather ? - if (drline->type == 0x0) { + printf("."); + fflush(stdout); + } - for (i = 0; i < gather_metrics.ntop; i++) { + // gather ? + if (drline->type == 0x0) { - //found it - if (iaddr == gather_metrics.top[i]) { + for (i = 0; i < gather_metrics.ntop; i++) { - if (gather_base[i] == 0) - gather_base[i] = maddr; + //found it + if (iaddr == gather_metrics.top[i]) { - //Add index - if (gather_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; - } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); + if (gather_base[i] == 0) + gather_base[i] = maddr; - break; + //Add index + if (gather_metrics.offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = true; } + //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); + gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); + + break; } } - // scatter ? - else { + } + // scatter ? + else { - for (i = 0; i < scatter_metrics.ntop; i++) { + for (i = 0; i < scatter_metrics.ntop; i++) { - //found it - if (iaddr == scatter_metrics.top[i]) { + //found it + if (iaddr == scatter_metrics.top[i]) { - //set base - if (scatter_base[i] == 0) - scatter_base[i] = maddr; + //set base + if (scatter_base[i] == 0) + scatter_base[i] = maddr; - //Add index - if (scatter_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; - } - scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); - break; + //Add index + if (scatter_metrics.offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = true; } + scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); + break; } } - } // MEM - - p_drtrace++; + } + } // MEM - } //while drtrace + return breakout; } diff --git a/gs_patterns_core.h b/gs_patterns_core.h index 5e2eca9..c93ba8e 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -38,4 +38,7 @@ int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); void normalize_stats(Metrics & target_metrics); -void second_pass(gzFile fp_drtrace, Metrics & gather_metrics, Metrics & scatter_metrics); \ No newline at end of file +bool handle_2nd_pass_trace_entry(trace_entry_t * drline, + Metrics & gather_metrics, Metrics & scatter_metrics, + addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, + addr_t * gather_base, addr_t * scatter_base); From 29d76ef886534561e7b6937e39164f0480b6b687 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 3 Apr 2024 22:43:59 -0400 Subject: [PATCH 18/76] Removed use of .gz tracefile from gs_patterns_core. Improved filename creation logic as well. --- gs_patterns.cpp | 23 +++++++++++++---- gs_patterns.h | 3 --- gs_patterns_core.cpp | 59 ++++++++++++++++++-------------------------- gs_patterns_core.h | 12 ++++----- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index cea0ea9..8dc86a2 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -44,7 +44,7 @@ gzFile open_trace_file(const std::string & trace_file_name) gzFile fp; fp = gzopen(trace_file_name.c_str(), "hrb"); - if (fp == NULL) { + if (NULL == fp) { throw GSFileError("Could not open " + trace_file_name + "!"); } return fp; @@ -61,7 +61,7 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) idx = (*edx) / sizeof(trace_entry_t); //first read - if (*p_val == NULL) { + if (NULL == *p_val) { *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); *p_val = val; @@ -70,7 +70,7 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) *p_val = val; } - if (*edx == 0) + if (0 == *edx) return 0; return 1; @@ -104,6 +104,8 @@ class MemPatternsForPin : public MemPatterns void update_metrics(); + std::string get_trace_file_prefix (); + private: std::pair _metrics; std::pair _iinfo; @@ -132,7 +134,7 @@ void MemPatternsForPin::generate_patterns() // ----------------- Create Spatter File ----------------- - ::create_spatter_file(*this, _trace_file_name.c_str()); + ::create_spatter_file(*this, get_trace_file_prefix()); } @@ -159,6 +161,17 @@ void MemPatternsForPin::update_metrics() close_trace_file(fp_drtrace); } +std::string MemPatternsForPin::get_trace_file_prefix() +{ + std::string prefix = _trace_file_name; + size_t pos = std::string::npos; + while (std::string::npos != (pos = prefix.find(".gz"))) + { + prefix.replace(pos, 3, ""); + } + return prefix; +} + double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const std::string & binary_file_name) { double scatter_cnt = 0.0; @@ -166,7 +179,7 @@ double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & targe //Check it is not a library for (int k = 0; k < NGS; k++) { - if (target_iinfo.get_iaddrs()[k] == 0) { + if (0 == target_iinfo.get_iaddrs()[k]) { break; } translate_iaddr(binary_file_name, target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); diff --git a/gs_patterns.h b/gs_patterns.h index c6bc16f..ba59160 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -233,9 +233,6 @@ class MemPatterns virtual void handle_trace_entry(const trace_entry_t * te) = 0; virtual void generate_patterns() = 0; - //virtual void update_metrics(const std::string & binary_file_name) = 0; - //virtual void create_spatter_file(const std::string & file_prefix) = 0; - virtual Metrics & get_gather_metrics() = 0; virtual Metrics & get_scatter_metrics() = 0; virtual InstrInfo & get_gather_iinfo() = 0; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 331f2b0..eeaaa8c 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -23,7 +23,7 @@ void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr /* Open the command for reading. */ fp = popen(cmd, "r"); - if (fp == NULL) { + if (NULL == fp) { throw GSError("Failed to run command"); } @@ -40,7 +40,7 @@ void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr } -void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metrics & target_metrics, bool & first_spatter) +void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) { int i = 0; int j = 0; @@ -82,14 +82,10 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri //create a binary file FILE *fp_bin; - char *bin_name; - bin_name = (char*)str_replace(trace_file_name, ".gz", ".sbin"); - if (strstr(bin_name, ".sbin") == 0) { - strncat(bin_name, ".sbin", strlen(".sbin")+1); - } - printf("%s\n", bin_name); - fp_bin = fopen(bin_name, "w"); - if (fp_bin == NULL) { + std::string bin_name = file_prefix + ".sbin"; + printf("%s\n", bin_name.c_str()); + fp_bin = fopen(bin_name.c_str(), "w"); + if (NULL == fp_bin) { throw GSFileError("Could not open " + std::string(bin_name) + "!"); } @@ -105,16 +101,16 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri if (j < 39) { printf("%10ld ", target_metrics.patterns[i][j]); fflush(stdout); - if ((++nlcnt % 13) == 0) + if (0 == (++nlcnt % 13)) printf("\n"); } else if (j >= (target_metrics.offset[i] - 39)) { printf("%10ld ", target_metrics.patterns[i][j]); fflush(stdout); - if ((++nlcnt % 13) == 0) + if (0 == (++nlcnt % 13)) printf("\n"); - } else if (j == 39) + } else if (39 == j) printf("...\n"); } printf("\n"); @@ -122,9 +118,9 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri for (j = 0; j < 1027; j++) { if (n_stride[j] > 0) { - if (j == 0) + if (0 == j) printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (j == 1026) + else if (1026 == j) printf("%6s: %ld\n", "> 512", n_stride[j]); else printf("%6d: %ld\n", j - 513, n_stride[j]); @@ -155,28 +151,21 @@ void create_metrics_file(FILE *fp, FILE *fp2, const char* trace_file_name, Metri } } -void create_spatter_file(MemPatterns & mp, const char *trace_file_name) +void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) { - //Create spatter file + // Create spatter file FILE *fp, *fp2; - char *json_name, *gs_info; - json_name = (char*)str_replace(trace_file_name, ".gz", ".json"); - if (strstr(json_name, ".json") == 0) { - strncat(json_name, ".json", strlen(".json")+1); - } - fp = fopen(json_name, "w"); - if (fp == NULL) { - throw GSFileError("Could not open " + std::string(json_name) + "!"); - } - gs_info = (char*)str_replace(trace_file_name, ".gz", ".txt"); - if (strstr(gs_info, ".json") == 0) { - strncat(gs_info, ".txt", strlen(".txt")+1); + std::string json_name = file_prefix + ".json"; + fp = fopen(json_name.c_str(), "w"); + if (NULL == fp) { + throw GSFileError("Could not open " + json_name + "!"); } - fp2 = fopen(gs_info, "w"); - if (fp2 == NULL) { - throw GSFileError("Could not open " + std::string(gs_info) + "!"); + std::string gs_info = file_prefix + ".txt"; + fp2 = fopen(gs_info.c_str(), "w"); + if (NULL == fp2) { + throw GSFileError("Could not open " + gs_info + "!"); } //Header @@ -184,9 +173,9 @@ void create_spatter_file(MemPatterns & mp, const char *trace_file_name) fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); bool first_spatter = true; - create_metrics_file(fp, fp2, trace_file_name, mp.get_gather_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); - create_metrics_file(fp, fp2, trace_file_name, mp.get_scatter_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); //Footer fprintf(fp, " ]"); @@ -230,7 +219,7 @@ void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline) auto & scatter_metrics = mp.get_scatter_metrics(); auto & iw = mp.get_instr_window(); - if (drline->type == 0 && drline->size == 0) { + if (0 == drline->type && 0 == drline->size) { std::ostringstream os; os << "Invalid trace entry: type: [" << drline->type << "] size: [" << drline->size << "]"; throw GSDataError(os.str()); diff --git a/gs_patterns_core.h b/gs_patterns_core.h index c93ba8e..ccd64b5 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -20,13 +20,13 @@ void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); -void create_metrics_file(FILE * fp, - FILE * fp2, - const char* trace_file_name, - Metrics & target_metrics, - bool & first_spatter); +void create_metrics_file(FILE * fp, + FILE * fp2, + const std::string & file_prefix, + Metrics & target_metrics, + bool & first_spatter); -void create_spatter_file(MemPatterns & mp, const char *trace_file_name); +void create_spatter_file(MemPatterns & mp, const std::string & file_prefix); void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline); From 284cd99d02cd085ba2fd8f69a889daf4c18168fb Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 3 Apr 2024 23:55:14 -0400 Subject: [PATCH 19/76] Converted remaining functions used by MemPatternsForPin into member methods. First cut of MemPatternsForNV (gsnv_patterns.cpp) -- no functional but compiles. --- CMakeLists.txt | 1 + gs_patterns.cpp | 77 ++++++++++++++++++++--------- gs_patterns.h | 15 +++--- gs_patterns_core.h | 1 - gsnv_patterns.cpp | 120 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 30 deletions(-) create mode 100644 gsnv_patterns.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f2773ff..ea83cfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(gs_patterns_core STATIC gs_patterns.h gs_patterns_core.h gs_patterns_core.cpp + gsnv_patterns.cpp ) add_executable( gs_patterns diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 8dc86a2..3f5ad57 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -11,6 +11,7 @@ #include "gs_patterns.h" #include "gs_patterns_core.h" +#include "utils.h" //Terminal colors #define KNRM "\x1B[0m" @@ -33,12 +34,6 @@ int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; int64_t InstrWindow::w_cnt[2][IWINDOW]; #endif -// Forward declarations -class MemPatternsForPin; - -void update_source_lines(MemPatternsForPin & mp); -void process_second_pass(gzFile & fp_drtrace, MemPatternsForPin & mp); - gzFile open_trace_file(const std::string & trace_file_name) { gzFile fp; @@ -89,6 +84,9 @@ class MemPatternsForPin : public MemPatterns void handle_trace_entry(const trace_entry_t * tentry) override; void generate_patterns() override; + Metrics & get_metrics(metrics_type) override; + InstrInfo & get_iinfo(metrics_type) override; + Metrics & get_gather_metrics() override { return _metrics.first; } Metrics & get_scatter_metrics() override { return _metrics.second; } InstrInfo & get_gather_iinfo () override { return _iinfo.first; } @@ -106,6 +104,11 @@ class MemPatternsForPin : public MemPatterns std::string get_trace_file_prefix (); + void process_traces(); + void update_source_lines(); + double update_source_lines_from_binary(metrics_type); + void process_second_pass(gzFile & fp_drtrace); + private: std::pair _metrics; std::pair _iinfo; @@ -116,6 +119,32 @@ class MemPatternsForPin : public MemPatterns std::string _binary_file_name; }; +Metrics & MemPatternsForPin::get_metrics(metrics_type m) +{ + switch (m) + { + case GATHER : return _metrics.first; + break; + case SCATTER : return _metrics.second; + break; + default: + throw GSError("Unable to get Metrics - Invalid Metrics Type: " + m); + } +} + +InstrInfo & MemPatternsForPin::get_iinfo(metrics_type m) +{ + switch (m) + { + case GATHER : return _iinfo.first; + break; + case SCATTER : return _iinfo.second; + break; + default: + throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + m); + } +} + void MemPatternsForPin::handle_trace_entry(const trace_entry_t *tentry) { // Call libgs_patterns @@ -126,7 +155,7 @@ void MemPatternsForPin::generate_patterns() { // ----------------- Update Source Lines ----------------- - ::update_source_lines(*this); + update_source_lines(); // ----------------- Update Metrics ----------------- @@ -150,8 +179,7 @@ void MemPatternsForPin::update_metrics() // ----------------- Second Pass ----------------- - //::second_pass(fp_drtrace, get_gather_metrics(), get_scatter_metrics()); - ::process_second_pass(fp_drtrace, *this); + process_second_pass(fp_drtrace); // ----------------- Normalize ----------------- @@ -172,17 +200,20 @@ std::string MemPatternsForPin::get_trace_file_prefix() return prefix; } -double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & target_metrics, const std::string & binary_file_name) +double MemPatternsForPin::update_source_lines_from_binary(metrics_type mType) { double scatter_cnt = 0.0; + InstrInfo & target_iinfo = get_iinfo(mType); + Metrics & target_metrics = get_metrics(mType); + //Check it is not a library for (int k = 0; k < NGS; k++) { if (0 == target_iinfo.get_iaddrs()[k]) { break; } - translate_iaddr(binary_file_name, target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); + translate_iaddr(get_binary_file_name(), target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); if (startswith(target_metrics.get_srcline()[k], "?")) target_iinfo.get_icnt()[k] = 0; @@ -194,13 +225,13 @@ double update_source_lines_from_binary(InstrInfo & target_iinfo, Metrics & targe } // First Pass -void process_traces(MemPatternsForPin & mp) +void MemPatternsForPin::process_traces() { int iret = 0; trace_entry_t *drline; InstrWindow iw; - gzFile fp_drtrace = open_trace_file(mp.get_trace_file_name()); + gzFile fp_drtrace = open_trace_file(get_trace_file_name()); printf("First pass to find top gather / scatter iaddresses\n"); fflush(stdout); @@ -213,7 +244,7 @@ void process_traces(MemPatternsForPin & mp) drline = p_drtrace; //handle_trace_entry(drline, trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, iw); - mp.handle_trace_entry(drline); + handle_trace_entry(drline); p_drtrace++; } @@ -221,14 +252,14 @@ void process_traces(MemPatternsForPin & mp) close_trace_file(fp_drtrace); //metrics - mp.get_trace_info().gather_occ_avg /= mp.get_gather_metrics().cnt; - mp.get_trace_info().scatter_occ_avg /= mp.get_scatter_metrics().cnt; + get_trace_info().gather_occ_avg /= get_gather_metrics().cnt; + get_trace_info().scatter_occ_avg /= get_scatter_metrics().cnt; - display_stats(mp); + display_stats(*this); } -void process_second_pass(gzFile & fp_drtrace, MemPatternsForPin & mp) +void MemPatternsForPin::process_second_pass(gzFile & fp_drtrace) { uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. int iret = 0; @@ -252,26 +283,26 @@ void process_second_pass(gzFile & fp_drtrace, MemPatternsForPin & mp) //decode drtrace drline = p_drtrace; - breakout = ::handle_2nd_pass_trace_entry(drline, mp.get_gather_metrics(), mp.get_scatter_metrics(), + breakout = ::handle_2nd_pass_trace_entry(drline, get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); p_drtrace++; } //while drtrace } -void update_source_lines(MemPatternsForPin & mp) +void MemPatternsForPin::update_source_lines() { // Find source lines for gathers - Must have symbol printf("\nSymbol table lookup for gathers..."); fflush(stdout); - mp.get_gather_metrics().cnt = update_source_lines_from_binary(mp.get_gather_iinfo(), mp.get_gather_metrics(), mp.get_binary_file_name()); + get_gather_metrics().cnt = update_source_lines_from_binary(GATHER); // Find source lines for scatters printf("Symbol table lookup for scatters..."); fflush(stdout); - mp.get_scatter_metrics().cnt = update_source_lines_from_binary(mp.get_scatter_iinfo(), mp.get_scatter_metrics(), mp.get_binary_file_name()); + get_scatter_metrics().cnt = update_source_lines_from_binary(SCATTER); } int main(int argc, char **argv) @@ -289,7 +320,7 @@ int main(int argc, char **argv) // ----------------- Process Traces ----------------- - process_traces(mp); + mp.process_traces(); // ----------------- Generate Patterns ----------------- diff --git a/gs_patterns.h b/gs_patterns.h index ba59160..39a004d 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -233,11 +233,14 @@ class MemPatterns virtual void handle_trace_entry(const trace_entry_t * te) = 0; virtual void generate_patterns() = 0; - virtual Metrics & get_gather_metrics() = 0; - virtual Metrics & get_scatter_metrics() = 0; - virtual InstrInfo & get_gather_iinfo() = 0; - virtual InstrInfo & get_scatter_iinfo() = 0; - virtual TraceInfo & get_trace_info() = 0; - virtual InstrWindow & get_instr_window() = 0; + virtual Metrics & get_metrics(metrics_type) = 0; + virtual InstrInfo & get_iinfo(metrics_type) = 0; + + virtual Metrics & get_gather_metrics() = 0; + virtual Metrics & get_scatter_metrics() = 0; + virtual InstrInfo & get_gather_iinfo() = 0; + virtual InstrInfo & get_scatter_iinfo() = 0; + virtual TraceInfo & get_trace_info() = 0; + virtual InstrWindow & get_instr_window() = 0; }; \ No newline at end of file diff --git a/gs_patterns_core.h b/gs_patterns_core.h index ccd64b5..e5f0cc5 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -8,7 +8,6 @@ #include #include -#include "utils.h" #include "gs_patterns.h" int drline_read(gzFile fp, diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp new file mode 100644 index 0000000..3dec826 --- /dev/null +++ b/gsnv_patterns.cpp @@ -0,0 +1,120 @@ +// +// Created by christopher on 4/3/24. +// + +#include "gs_patterns.h" +#include "gs_patterns_core.h" + +class MemPatternsForNV : public MemPatterns +{ +public: + MemPatternsForNV(): _metrics(GATHER, SCATTER), + _iinfo(GATHER, SCATTER) + { } + + virtual ~MemPatternsForNV() override { } + + void handle_trace_entry(const trace_entry_t * tentry) override; + void generate_patterns() override; + + Metrics & get_metrics(metrics_type) override; + InstrInfo & get_iinfo(metrics_type) override; + + Metrics & get_gather_metrics() override { return _metrics.first; } + Metrics & get_scatter_metrics() override { return _metrics.second; } + InstrInfo & get_gather_iinfo () override { return _iinfo.first; } + InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } + TraceInfo & get_trace_info() override { return _trace_info; } + InstrWindow & get_instr_window() override { return _iw; } + + //void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } + //const std::string & get_binary_file_name() { return _binary_file_name; } + + void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } + const std::string & get_file_prefix() { return _file_prefix; } + + void update_metrics(); + + void process_traces(); + void update_source_lines(); + double update_source_lines_from_binary(metrics_type); + void process_second_pass(); + +private: + std::pair _metrics; + std::pair _iinfo; + TraceInfo _trace_info; + InstrWindow _iw; + + std::string _binary_file_name; + std::string _file_prefix; +}; + + +Metrics & MemPatternsForNV::get_metrics(metrics_type m) +{ + switch (m) + { + case GATHER : return _metrics.first; + break; + case SCATTER : return _metrics.second; + break; + default: + throw GSError("Unable to get Metrics - Invalid Metrics Type: " + m); + } +} + +InstrInfo & MemPatternsForNV::get_iinfo(metrics_type m) +{ + switch (m) + { + case GATHER : return _iinfo.first; + break; + case SCATTER : return _iinfo.second; + break; + default: + throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + m); + } +} + +void MemPatternsForNV::handle_trace_entry(const trace_entry_t *tentry) +{ + // Call libgs_patterns + ::handle_trace_entry(*this, tentry); +} + +void MemPatternsForNV::generate_patterns() +{ + // ----------------- Update Source Lines ----------------- + + update_source_lines(); + + // ----------------- Update Metrics ----------------- + + update_metrics(); + + // ----------------- Create Spatter File ----------------- + + ::create_spatter_file(*this, get_file_prefix()); + +} + +void MemPatternsForNV::update_metrics() +{ + // Get top gathers + get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); + + // Get top scatters + get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics()); + + // ----------------- Second Pass ----------------- + + process_second_pass(); + + // ----------------- Normalize ----------------- + + ::normalize_stats(get_gather_metrics()); + ::normalize_stats(get_scatter_metrics()); +} + + From 375e736a7194419b90f438d5aeb3fcd71feafb0a Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 4 Apr 2024 00:06:39 -0400 Subject: [PATCH 20/76] Implemented a proposed way to do second_pass. --- gsnv_patterns.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 3dec826..ae6a249 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -2,6 +2,8 @@ // Created by christopher on 4/3/24. // +#include + #include "gs_patterns.h" #include "gs_patterns_core.h" @@ -35,7 +37,7 @@ class MemPatternsForNV : public MemPatterns void update_metrics(); - void process_traces(); + //void process_traces(); void update_source_lines(); double update_source_lines_from_binary(metrics_type); void process_second_pass(); @@ -48,6 +50,8 @@ class MemPatternsForNV : public MemPatterns std::string _binary_file_name; std::string _file_prefix; + + std::vector _traces; }; @@ -81,6 +85,10 @@ void MemPatternsForNV::handle_trace_entry(const trace_entry_t *tentry) { // Call libgs_patterns ::handle_trace_entry(*this, tentry); + + _traces.push_back(*tentry); + + // TODO: Determine how to get source lines } void MemPatternsForNV::generate_patterns() @@ -118,3 +126,28 @@ void MemPatternsForNV::update_metrics() } +void MemPatternsForNV::process_second_pass() +{ + uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. + int iret = 0; + trace_entry_t *drline; + + // State carried thru + addr_t iaddr; + int64_t maddr; + addr_t gather_base[NTOP] = {0}; + addr_t scatter_base[NTOP] = {0}; + + bool breakout = false; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + + for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) + { + trace_entry_t & drline = *itr; + + breakout = ::handle_2nd_pass_trace_entry(&drline, get_gather_metrics(), get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); + } +} + From c5c16e01f366138a9763201332b503ac611fbf22 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 4 Apr 2024 23:43:30 -0400 Subject: [PATCH 21/76] Removed dependency on trace_entry_t in gs_patterns_core.cpp by providing an adapter. --- gs_patterns.cpp | 63 +++++++++++++++++++++++++++++++++----------- gs_patterns.h | 43 ++++++++++++++++++++++++------ gs_patterns_core.cpp | 49 ++++++++++++++++++++++------------ gs_patterns_core.h | 31 ++++++++-------------- gsnv_patterns.cpp | 52 ++++++++++++++++++++++++++++++------ 5 files changed, 170 insertions(+), 68 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index 3f5ad57..fab39dc 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -71,21 +71,56 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) return 1; } -class MemPatternsForPin : public MemPatterns +// An adapter for trace_entry_t +class InstrAddressInfoForPin : public InstrAddressInfo { public: - MemPatternsForPin() : _metrics(GATHER, SCATTER), - _iinfo(GATHER, SCATTER) + InstrAddressInfoForPin(const trace_entry_t * te) { + /// TODO: do we need to copy this, will we outlive trace_entry_t which is passed in ? + _te.type = te->type; + _te.size = te->size; + _te.addr = te->addr; + } + InstrAddressInfoForPin(const trace_entry_t te) : _te(te) { } + + virtual ~InstrAddressInfoForPin() { } + + virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } + virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } + virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } + + virtual mem_access_type get_mem_instr_type() const override { + if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Instruction"); + // Must be 0x0 or 0x1 + if (_te.type == 0x0) return GATHER; + else return SCATTER; + } + + virtual size_t get_size() const override { return _te.size; } // TODO: FIX conversion <---------------------------------------- + virtual addr_t get_address() const override { return _te.addr; }; + virtual unsigned short get_type() const override { return _te.type; } + + virtual void output(std::ostream & os) const override { + os << "InstrAddressInfoForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } +private: + trace_entry_t _te; +}; + +class MemPatternsForPin : public MemPatterns +{ +public: + MemPatternsForPin() : _metrics(GATHER, SCATTER), + _iinfo(GATHER, SCATTER) { } virtual ~MemPatternsForPin() override { } void handle_trace_entry(const trace_entry_t * tentry) override; void generate_patterns() override; - Metrics & get_metrics(metrics_type) override; - InstrInfo & get_iinfo(metrics_type) override; + Metrics & get_metrics(mem_access_type) override; + InstrInfo & get_iinfo(mem_access_type) override; Metrics & get_gather_metrics() override { return _metrics.first; } Metrics & get_scatter_metrics() override { return _metrics.second; } @@ -106,7 +141,7 @@ class MemPatternsForPin : public MemPatterns void process_traces(); void update_source_lines(); - double update_source_lines_from_binary(metrics_type); + double update_source_lines_from_binary(mem_access_type); void process_second_pass(gzFile & fp_drtrace); private: @@ -119,7 +154,7 @@ class MemPatternsForPin : public MemPatterns std::string _binary_file_name; }; -Metrics & MemPatternsForPin::get_metrics(metrics_type m) +Metrics & MemPatternsForPin::get_metrics(mem_access_type m) { switch (m) { @@ -132,7 +167,7 @@ Metrics & MemPatternsForPin::get_metrics(metrics_type m) } } -InstrInfo & MemPatternsForPin::get_iinfo(metrics_type m) +InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) { switch (m) { @@ -145,10 +180,10 @@ InstrInfo & MemPatternsForPin::get_iinfo(metrics_type m) } } -void MemPatternsForPin::handle_trace_entry(const trace_entry_t *tentry) +void MemPatternsForPin::handle_trace_entry(const trace_entry_t * tentry) { // Call libgs_patterns - ::handle_trace_entry(*this, tentry); + ::handle_trace_entry(*this, InstrAddressInfoForPin(tentry)); } void MemPatternsForPin::generate_patterns() @@ -200,7 +235,7 @@ std::string MemPatternsForPin::get_trace_file_prefix() return prefix; } -double MemPatternsForPin::update_source_lines_from_binary(metrics_type mType) +double MemPatternsForPin::update_source_lines_from_binary(mem_access_type mType) { double scatter_cnt = 0.0; @@ -243,7 +278,6 @@ void MemPatternsForPin::process_traces() //decode drtrace drline = p_drtrace; - //handle_trace_entry(drline, trace_info, gather_iinfo, scatter_iinfo, gather_metrics, scatter_metrics, iw); handle_trace_entry(drline); p_drtrace++; @@ -279,15 +313,14 @@ void MemPatternsForPin::process_second_pass(gzFile & fp_drtrace) trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { - //decode drtrace drline = p_drtrace; - breakout = ::handle_2nd_pass_trace_entry(drline, get_gather_metrics(), get_scatter_metrics(), + breakout = ::handle_2nd_pass_trace_entry(InstrAddressInfoForPin(drline), get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); p_drtrace++; - } //while drtrace + } } void MemPatternsForPin::update_source_lines() diff --git a/gs_patterns.h b/gs_patterns.h index 39a004d..75b37d0 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -54,7 +54,7 @@ struct _trace_entry_t { } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; -typedef enum { GATHER=0, SCATTER } metrics_type; +typedef enum { GATHER=0, SCATTER } mem_access_type; class GSError : public std::exception { @@ -88,10 +88,38 @@ class GSAllocError : public GSError ~GSAllocError() {} }; +class InstrAddressInfo +{ +public: + InstrAddressInfo() { } + virtual ~InstrAddressInfo() { } + + virtual bool is_valid() const = 0; + virtual bool is_mem_instr() const = 0; + virtual bool is_other_instr() const = 0; + virtual mem_access_type get_mem_instr_type() const = 0; + + virtual size_t get_size() const = 0; + virtual addr_t get_address() const = 0; + virtual unsigned short get_type() const = 0; + // multiple? + + virtual bool is_gather() const + { return (is_valid() && is_mem_instr() && GATHER == get_mem_instr_type()) ? true : false; } + + virtual bool is_scatter() const + { return (is_valid() && is_mem_instr() && SCATTER == get_mem_instr_type()) ? true : false; } + + virtual void output(std::ostream & os) const = 0; +}; + +std::ostream & operator<<(std::ostream & os, const InstrAddressInfo & ia); + + class Metrics { public: - Metrics(metrics_type mType) : _mType(mType) + Metrics(mem_access_type mType) : _mType(mType) { /// TODO: Convert to new/free for (int j = 0; j < NTOP; j++) { @@ -133,14 +161,14 @@ class Metrics private: static char srcline[2][NGS][MAX_LINE_LENGTH]; // was static (may move out and have 1 per type) - metrics_type _mType; + mem_access_type _mType; }; class InstrInfo { public: - InstrInfo(metrics_type mType) : _mType(mType) { } + InstrInfo(mem_access_type mType) : _mType(mType) { } ~InstrInfo() { } InstrInfo(const InstrInfo &) = delete; @@ -155,7 +183,7 @@ class InstrInfo static int64_t icnt[2][NGS]; static int64_t occ[2][NGS]; - metrics_type _mType; + mem_access_type _mType; }; class TraceInfo // Stats @@ -233,8 +261,8 @@ class MemPatterns virtual void handle_trace_entry(const trace_entry_t * te) = 0; virtual void generate_patterns() = 0; - virtual Metrics & get_metrics(metrics_type) = 0; - virtual InstrInfo & get_iinfo(metrics_type) = 0; + virtual Metrics & get_metrics(mem_access_type) = 0; + virtual InstrInfo & get_iinfo(mem_access_type) = 0; virtual Metrics & get_gather_metrics() = 0; virtual Metrics & get_scatter_metrics() = 0; @@ -242,5 +270,4 @@ class MemPatterns virtual InstrInfo & get_scatter_iinfo() = 0; virtual TraceInfo & get_trace_info() = 0; virtual InstrWindow & get_instr_window() = 0; - }; \ No newline at end of file diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index eeaaa8c..8a52fde 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -205,7 +205,7 @@ void normalize_stats(Metrics & target_metrics) } } -void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline) +void handle_trace_entry(MemPatterns & mp, const InstrAddressInfo & ia) { int i, j, k, w; int w_rw_idx; @@ -219,18 +219,18 @@ void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline) auto & scatter_metrics = mp.get_scatter_metrics(); auto & iw = mp.get_instr_window(); - if (0 == drline->type && 0 == drline->size) { + if (!ia.is_valid()) { std::ostringstream os; - os << "Invalid trace entry: type: [" << drline->type << "] size: [" << drline->size << "]"; + os << "Invalid " << ia; throw GSDataError(os.str()); } /*****************************/ /** INSTR 0xa-0x10 and 0x1e **/ /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { + if (ia.is_other_instr()) { - iw.iaddr = drline->addr; + iw.iaddr = ia.get_address(); //nops trace_info.opcodes++; @@ -239,9 +239,9 @@ void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline) /***********************/ /** MEM 0x00 and 0x01 **/ /***********************/ - } else if ((drline->type == 0x0) || (drline->type == 0x1)) { + } else if (ia.is_mem_instr()) { - w_rw_idx = drline->type; + w_rw_idx = ia.get_type(); //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", // iaddr, drline->addr, drline->addr % 64, drline->size); @@ -375,8 +375,8 @@ void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline) //Set window values iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; - iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = drline->addr / drline->size; - iw.w_bytes[w_rw_idx][w_idx] += drline->size; + iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_address() / ia.get_size(); + iw.w_bytes[w_rw_idx][w_idx] += ia.get_size(); //num access per iaddr in loop iw.w_cnt[w_rw_idx][w_idx]++; @@ -466,7 +466,7 @@ int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) return target_ntop; } -bool handle_2nd_pass_trace_entry(trace_entry_t * drline, +bool handle_2nd_pass_trace_entry(const InstrAddressInfo & ia, Metrics & gather_metrics, Metrics & scatter_metrics, addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, addr_t * gather_base, addr_t * scatter_base) @@ -479,16 +479,22 @@ bool handle_2nd_pass_trace_entry(trace_entry_t * drline, /*****************************/ /** INSTR 0xa-0x10 and 0x1e **/ /*****************************/ - if (((drline->type >= 0xa) && (drline->type <= 0x10)) || (drline->type == 0x1e)) { - iaddr = drline->addr; + if (!ia.is_valid()) { + std::ostringstream os; + os << "Invalid " << ia; + throw GSDataError(os.str()); + } + + if (ia.is_other_instr()) { + iaddr = ia.get_address(); /***********************/ /** MEM 0x00 and 0x01 **/ /***********************/ } - else if ((drline->type == 0x0) || (drline->type == 0x1)) { + else if (ia.is_mem_instr()) { - maddr = drline->addr / drline->size; + maddr = ia.get_address() / ia.get_size(); if ((++mcnt % PERSAMPLE) == 0) { #if SAMPLE @@ -499,7 +505,7 @@ bool handle_2nd_pass_trace_entry(trace_entry_t * drline, } // gather ? - if (drline->type == 0x0) { + if (ia.get_mem_instr_type() == GATHER) { for (i = 0; i < gather_metrics.ntop; i++) { @@ -521,8 +527,8 @@ bool handle_2nd_pass_trace_entry(trace_entry_t * drline, } } } - // scatter ? - else { + // scatter ? + else if (ia.get_mem_instr_type() == SCATTER) { for (i = 0; i < scatter_metrics.ntop; i++) { @@ -543,7 +549,16 @@ bool handle_2nd_pass_trace_entry(trace_entry_t * drline, } } } + else { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo + throw GSDataError("Unknown Memory Instruction Type: " + ia.get_mem_instr_type()); + } } // MEM return breakout; } + +std::ostream & operator<<(std::ostream & os, const InstrAddressInfo & ia) +{ + ia.output(os); + return os; +} diff --git a/gs_patterns_core.h b/gs_patterns_core.h index e5f0cc5..a90ec79 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -10,34 +10,25 @@ #include "gs_patterns.h" -int drline_read(gzFile fp, - trace_entry_t * val, - trace_entry_t ** p_val, - int * edx); +void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); -void translate_iaddr(const std::string & binary, - char * source_line, - addr_t iaddr); - -void create_metrics_file(FILE * fp, - FILE * fp2, - const std::string & file_prefix, - Metrics & target_metrics, - bool & first_spatter); - -void create_spatter_file(MemPatterns & mp, const std::string & file_prefix); - -void handle_trace_entry(MemPatterns & mp, const trace_entry_t *drline); +void handle_trace_entry(MemPatterns & mp, const InstrAddressInfo & ia); void display_stats(MemPatterns & mp); -void update_metrics(MemPatterns & mp, gzFile & fp_drtrace); - int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); void normalize_stats(Metrics & target_metrics); -bool handle_2nd_pass_trace_entry(trace_entry_t * drline, +bool handle_2nd_pass_trace_entry(const InstrAddressInfo & ia, Metrics & gather_metrics, Metrics & scatter_metrics, addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, addr_t * gather_base, addr_t * scatter_base); + +void create_metrics_file(FILE * fp, + FILE * fp2, + const std::string & file_prefix, + Metrics & target_metrics, + bool & first_spatter); + +void create_spatter_file(MemPatterns & mp, const std::string & file_prefix); diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index ae6a249..52db178 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -3,10 +3,46 @@ // #include +#include #include "gs_patterns.h" #include "gs_patterns_core.h" +class InstrAddressInfoForNV : public InstrAddressInfo +{ +public: + InstrAddressInfoForNV(const trace_entry_t * te) + { + _te.type = te->type; + _te.size = te->size; + _te.addr = te->addr; + } + InstrAddressInfoForNV(const trace_entry_t te) : _te(te) { } + + virtual ~InstrAddressInfoForNV() { } + + virtual bool is_valid() const override { return false; } + virtual bool is_mem_instr() const override { return false; } + virtual bool is_other_instr() const override { return false; } + + + virtual mem_access_type get_mem_instr_type() const override + { + return GATHER; // UNSUPPORTED <=-=-------------------------------------- FIX ME + } + + virtual size_t get_size() const override { return _te.size; } // TODO: FIX conversion <---------------------------------------- + virtual addr_t get_address() const override { return _te.addr; }; + virtual unsigned short get_type() const override { return _te.type; } + + virtual void output(std::ostream & os) const override { + os << "InstrAddressInfoForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; + } + +private: + trace_entry_t _te; +}; + class MemPatternsForNV : public MemPatterns { public: @@ -19,8 +55,8 @@ class MemPatternsForNV : public MemPatterns void handle_trace_entry(const trace_entry_t * tentry) override; void generate_patterns() override; - Metrics & get_metrics(metrics_type) override; - InstrInfo & get_iinfo(metrics_type) override; + Metrics & get_metrics(mem_access_type) override; + InstrInfo & get_iinfo(mem_access_type) override; Metrics & get_gather_metrics() override { return _metrics.first; } Metrics & get_scatter_metrics() override { return _metrics.second; } @@ -39,7 +75,7 @@ class MemPatternsForNV : public MemPatterns //void process_traces(); void update_source_lines(); - double update_source_lines_from_binary(metrics_type); + double update_source_lines_from_binary(mem_access_type); void process_second_pass(); private: @@ -55,7 +91,7 @@ class MemPatternsForNV : public MemPatterns }; -Metrics & MemPatternsForNV::get_metrics(metrics_type m) +Metrics & MemPatternsForNV::get_metrics(mem_access_type m) { switch (m) { @@ -68,7 +104,7 @@ Metrics & MemPatternsForNV::get_metrics(metrics_type m) } } -InstrInfo & MemPatternsForNV::get_iinfo(metrics_type m) +InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) { switch (m) { @@ -81,10 +117,10 @@ InstrInfo & MemPatternsForNV::get_iinfo(metrics_type m) } } -void MemPatternsForNV::handle_trace_entry(const trace_entry_t *tentry) +void MemPatternsForNV::handle_trace_entry(const trace_entry_t * tentry) { // Call libgs_patterns - ::handle_trace_entry(*this, tentry); + ::handle_trace_entry(*this, InstrAddressInfoForNV(tentry)); _traces.push_back(*tentry); @@ -146,7 +182,7 @@ void MemPatternsForNV::process_second_pass() { trace_entry_t & drline = *itr; - breakout = ::handle_2nd_pass_trace_entry(&drline, get_gather_metrics(), get_scatter_metrics(), + breakout = ::handle_2nd_pass_trace_entry(InstrAddressInfoForNV(&drline), get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); } } From 8ca5c83e13cee34239be336d04a677cd9094a197 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 5 Apr 2024 00:13:04 -0400 Subject: [PATCH 22/76] Removed remaining references to trace_entry_t out of libgs_patterns. --- gs_patterns.cpp | 20 ++++++++++++++++---- gs_patterns.h | 14 +------------- gsnv_patterns.cpp | 38 +++++++++++++++++++++++++------------- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index fab39dc..f615c8e 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -34,6 +34,18 @@ int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; int64_t InstrWindow::w_cnt[2][IWINDOW]; #endif +//FROM DR SOURCE +//DR trace +struct _trace_entry_t { + unsigned short type; // 2 bytes: trace_type_t + unsigned short size; + union { + addr_t addr; + unsigned char length[sizeof(addr_t)]; + }; +} __attribute__((packed)); +typedef struct _trace_entry_t trace_entry_t; + gzFile open_trace_file(const std::string & trace_file_name) { gzFile fp; @@ -116,7 +128,7 @@ class MemPatternsForPin : public MemPatterns _iinfo(GATHER, SCATTER) { } virtual ~MemPatternsForPin() override { } - void handle_trace_entry(const trace_entry_t * tentry) override; + void handle_trace_entry(const InstrAddressInfo & ia) override; void generate_patterns() override; Metrics & get_metrics(mem_access_type) override; @@ -180,10 +192,10 @@ InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) } } -void MemPatternsForPin::handle_trace_entry(const trace_entry_t * tentry) +void MemPatternsForPin::handle_trace_entry(const InstrAddressInfo & ia) { // Call libgs_patterns - ::handle_trace_entry(*this, InstrAddressInfoForPin(tentry)); + ::handle_trace_entry(*this, ia); } void MemPatternsForPin::generate_patterns() @@ -278,7 +290,7 @@ void MemPatternsForPin::process_traces() //decode drtrace drline = p_drtrace; - handle_trace_entry(drline); + handle_trace_entry(InstrAddressInfoForPin(drline)); p_drtrace++; } diff --git a/gs_patterns.h b/gs_patterns.h index 75b37d0..fd1fa61 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -42,18 +42,6 @@ typedef uintptr_t addr_t; -//FROM DR SOURCE -//DR trace -struct _trace_entry_t { - unsigned short type; // 2 bytes: trace_type_t - unsigned short size; - union { - addr_t addr; - unsigned char length[sizeof(addr_t)]; - }; -} __attribute__((packed)); -typedef struct _trace_entry_t trace_entry_t; - typedef enum { GATHER=0, SCATTER } mem_access_type; class GSError : public std::exception @@ -258,7 +246,7 @@ class MemPatterns MemPatterns(const MemPatterns &) = delete; MemPatterns & operator=(const MemPatterns &) = delete; - virtual void handle_trace_entry(const trace_entry_t * te) = 0; + virtual void handle_trace_entry(const InstrAddressInfo & ia) = 0; virtual void generate_patterns() = 0; virtual Metrics & get_metrics(mem_access_type) = 0; diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 52db178..cc4516f 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -8,6 +8,17 @@ #include "gs_patterns.h" #include "gs_patterns_core.h" +struct _trace_entry_t { + unsigned short type; // 2 bytes: trace_type_t + unsigned short size; + union { + addr_t addr; + unsigned char length[sizeof(addr_t)]; + }; +} __attribute__((packed)); +typedef struct _trace_entry_t trace_entry_t; + +// An adapter for trace_entry_t (temporaritly untl replaced with nvbit memory detail type) class InstrAddressInfoForNV : public InstrAddressInfo { public: @@ -52,7 +63,7 @@ class MemPatternsForNV : public MemPatterns virtual ~MemPatternsForNV() override { } - void handle_trace_entry(const trace_entry_t * tentry) override; + void handle_trace_entry(const InstrAddressInfo & ia) override; void generate_patterns() override; Metrics & get_metrics(mem_access_type) override; @@ -79,15 +90,15 @@ class MemPatternsForNV : public MemPatterns void process_second_pass(); private: - std::pair _metrics; - std::pair _iinfo; - TraceInfo _trace_info; - InstrWindow _iw; + std::pair _metrics; + std::pair _iinfo; + TraceInfo _trace_info; + InstrWindow _iw; - std::string _binary_file_name; - std::string _file_prefix; + std::string _binary_file_name; + std::string _file_prefix; - std::vector _traces; + std::vector _traces; }; @@ -117,12 +128,13 @@ InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) } } -void MemPatternsForNV::handle_trace_entry(const trace_entry_t * tentry) +void MemPatternsForNV::handle_trace_entry(const InstrAddressInfo & ia) { // Call libgs_patterns - ::handle_trace_entry(*this, InstrAddressInfoForNV(tentry)); + ::handle_trace_entry(*this, ia); - _traces.push_back(*tentry); + const InstrAddressInfoForNV & ianv = dynamic_cast (ia); + _traces.push_back(ianv); // TODO: Determine how to get source lines } @@ -180,9 +192,9 @@ void MemPatternsForNV::process_second_pass() for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) { - trace_entry_t & drline = *itr; + InstrAddressInfo & ia = *itr; - breakout = ::handle_2nd_pass_trace_entry(InstrAddressInfoForNV(&drline), get_gather_metrics(), get_scatter_metrics(), + breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); } } From 32c28d37b5cd237991cac7998fa7621ea2a7134d Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 5 Apr 2024 00:19:23 -0400 Subject: [PATCH 23/76] cleanup --- gs_patterns.cpp | 4 ++++ gs_patterns.h | 9 +-------- gs_patterns_core.h | 1 - 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/gs_patterns.cpp b/gs_patterns.cpp index f615c8e..755403c 100644 --- a/gs_patterns.cpp +++ b/gs_patterns.cpp @@ -21,6 +21,10 @@ #define KMAG "\x1B[35m" #define KCYN "\x1B[36m" +//address status +#define ADDREND (0xFFFFFFFFFFFFFFFFUL) +#define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) + // Class Static data initialization char Metrics::srcline[2][NGS][MAX_LINE_LENGTH]; addr_t InstrInfo::iaddrs[2][NGS]; diff --git a/gs_patterns.h b/gs_patterns.h index fd1fa61..8be02a3 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -34,14 +34,9 @@ //DONT CHANGE #define VBYTES (VBITS/8) -//address status -#define ADDREND (0xFFFFFFFFFFFFFFFFUL) -#define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) - #define MAX_LINE_LENGTH 1024 typedef uintptr_t addr_t; - typedef enum { GATHER=0, SCATTER } mem_access_type; class GSError : public std::exception @@ -135,7 +130,6 @@ class Metrics auto get_srcline() { return srcline[_mType]; } -//private: int ntop = 0; double cnt = 0.0; int offset[NTOP] = {0}; @@ -193,7 +187,6 @@ class TraceInfo // Stats double scatter_occ_avg = 0.0; uint64_t mcnt = 0; - }; class InstrWindow @@ -258,4 +251,4 @@ class MemPatterns virtual InstrInfo & get_scatter_iinfo() = 0; virtual TraceInfo & get_trace_info() = 0; virtual InstrWindow & get_instr_window() = 0; -}; \ No newline at end of file +}; diff --git a/gs_patterns_core.h b/gs_patterns_core.h index a90ec79..f837a66 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -5,7 +5,6 @@ #pragma once #include -#include #include #include "gs_patterns.h" From 7fb401f4215edc44baff4d29bdf3cbbd900c6e2e Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 5 Apr 2024 12:24:01 -0400 Subject: [PATCH 24/76] Renamed a few classes/files to improve readability. --- CMakeLists.txt | 2 +- gs_patterns.h | 10 +++++----- gs_patterns_core.cpp | 6 +++--- gs_patterns_core.h | 4 ++-- gsnv_patterns.cpp | 20 ++++++++++---------- gs_patterns.cpp => gspin_patterns.cpp | 20 ++++++++++---------- 6 files changed, 31 insertions(+), 31 deletions(-) rename gs_patterns.cpp => gspin_patterns.cpp (93%) diff --git a/CMakeLists.txt b/CMakeLists.txt index ea83cfe..3ba957a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ add_library(gs_patterns_core STATIC ) add_executable( gs_patterns - gs_patterns.cpp ) + gspin_patterns.cpp) target_link_libraries(gs_patterns gs_patterns_core) diff --git a/gs_patterns.h b/gs_patterns.h index 8be02a3..66a024a 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -71,11 +71,11 @@ class GSAllocError : public GSError ~GSAllocError() {} }; -class InstrAddressInfo +class InstrAddrAdapter { public: - InstrAddressInfo() { } - virtual ~InstrAddressInfo() { } + InstrAddrAdapter() { } + virtual ~InstrAddrAdapter() { } virtual bool is_valid() const = 0; virtual bool is_mem_instr() const = 0; @@ -96,7 +96,7 @@ class InstrAddressInfo virtual void output(std::ostream & os) const = 0; }; -std::ostream & operator<<(std::ostream & os, const InstrAddressInfo & ia); +std::ostream & operator<<(std::ostream & os, const InstrAddrAdapter & ia); class Metrics @@ -239,7 +239,7 @@ class MemPatterns MemPatterns(const MemPatterns &) = delete; MemPatterns & operator=(const MemPatterns &) = delete; - virtual void handle_trace_entry(const InstrAddressInfo & ia) = 0; + virtual void handle_trace_entry(const InstrAddrAdapter & ia) = 0; virtual void generate_patterns() = 0; virtual Metrics & get_metrics(mem_access_type) = 0; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 8a52fde..ff6ed41 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -205,7 +205,7 @@ void normalize_stats(Metrics & target_metrics) } } -void handle_trace_entry(MemPatterns & mp, const InstrAddressInfo & ia) +void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) { int i, j, k, w; int w_rw_idx; @@ -466,7 +466,7 @@ int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) return target_ntop; } -bool handle_2nd_pass_trace_entry(const InstrAddressInfo & ia, +bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, Metrics & gather_metrics, Metrics & scatter_metrics, addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, addr_t * gather_base, addr_t * scatter_base) @@ -557,7 +557,7 @@ bool handle_2nd_pass_trace_entry(const InstrAddressInfo & ia, return breakout; } -std::ostream & operator<<(std::ostream & os, const InstrAddressInfo & ia) +std::ostream & operator<<(std::ostream & os, const InstrAddrAdapter & ia) { ia.output(os); return os; diff --git a/gs_patterns_core.h b/gs_patterns_core.h index f837a66..82555e1 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -11,7 +11,7 @@ void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); -void handle_trace_entry(MemPatterns & mp, const InstrAddressInfo & ia); +void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia); void display_stats(MemPatterns & mp); @@ -19,7 +19,7 @@ int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); void normalize_stats(Metrics & target_metrics); -bool handle_2nd_pass_trace_entry(const InstrAddressInfo & ia, +bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, Metrics & gather_metrics, Metrics & scatter_metrics, addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, addr_t * gather_base, addr_t * scatter_base); diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index cc4516f..4fdab87 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -19,18 +19,18 @@ struct _trace_entry_t { typedef struct _trace_entry_t trace_entry_t; // An adapter for trace_entry_t (temporaritly untl replaced with nvbit memory detail type) -class InstrAddressInfoForNV : public InstrAddressInfo +class InstrAddrAdapterForNV : public InstrAddrAdapter { public: - InstrAddressInfoForNV(const trace_entry_t * te) + InstrAddrAdapterForNV(const trace_entry_t * te) { _te.type = te->type; _te.size = te->size; _te.addr = te->addr; } - InstrAddressInfoForNV(const trace_entry_t te) : _te(te) { } + InstrAddrAdapterForNV(const trace_entry_t te) : _te(te) { } - virtual ~InstrAddressInfoForNV() { } + virtual ~InstrAddrAdapterForNV() { } virtual bool is_valid() const override { return false; } virtual bool is_mem_instr() const override { return false; } @@ -47,7 +47,7 @@ class InstrAddressInfoForNV : public InstrAddressInfo virtual unsigned short get_type() const override { return _te.type; } virtual void output(std::ostream & os) const override { - os << "InstrAddressInfoForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; + os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } private: @@ -63,7 +63,7 @@ class MemPatternsForNV : public MemPatterns virtual ~MemPatternsForNV() override { } - void handle_trace_entry(const InstrAddressInfo & ia) override; + void handle_trace_entry(const InstrAddrAdapter & ia) override; void generate_patterns() override; Metrics & get_metrics(mem_access_type) override; @@ -98,7 +98,7 @@ class MemPatternsForNV : public MemPatterns std::string _binary_file_name; std::string _file_prefix; - std::vector _traces; + std::vector _traces; }; @@ -128,12 +128,12 @@ InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) } } -void MemPatternsForNV::handle_trace_entry(const InstrAddressInfo & ia) +void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) { // Call libgs_patterns ::handle_trace_entry(*this, ia); - const InstrAddressInfoForNV & ianv = dynamic_cast (ia); + const InstrAddrAdapterForNV & ianv = dynamic_cast (ia); _traces.push_back(ianv); // TODO: Determine how to get source lines @@ -192,7 +192,7 @@ void MemPatternsForNV::process_second_pass() for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) { - InstrAddressInfo & ia = *itr; + InstrAddrAdapter & ia = *itr; breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); diff --git a/gs_patterns.cpp b/gspin_patterns.cpp similarity index 93% rename from gs_patterns.cpp rename to gspin_patterns.cpp index 755403c..f3bceec 100644 --- a/gs_patterns.cpp +++ b/gspin_patterns.cpp @@ -88,19 +88,19 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) } // An adapter for trace_entry_t -class InstrAddressInfoForPin : public InstrAddressInfo +class InstrAddrAdapterForPin : public InstrAddrAdapter { public: - InstrAddressInfoForPin(const trace_entry_t * te) + InstrAddrAdapterForPin(const trace_entry_t * te) { /// TODO: do we need to copy this, will we outlive trace_entry_t which is passed in ? _te.type = te->type; _te.size = te->size; _te.addr = te->addr; } - InstrAddressInfoForPin(const trace_entry_t te) : _te(te) { } + InstrAddrAdapterForPin(const trace_entry_t te) : _te(te) { } - virtual ~InstrAddressInfoForPin() { } + virtual ~InstrAddrAdapterForPin() { } virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } @@ -118,7 +118,7 @@ class InstrAddressInfoForPin : public InstrAddressInfo virtual unsigned short get_type() const override { return _te.type; } virtual void output(std::ostream & os) const override { - os << "InstrAddressInfoForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; + os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } private: @@ -132,7 +132,7 @@ class MemPatternsForPin : public MemPatterns _iinfo(GATHER, SCATTER) { } virtual ~MemPatternsForPin() override { } - void handle_trace_entry(const InstrAddressInfo & ia) override; + void handle_trace_entry(const InstrAddrAdapter & ia) override; void generate_patterns() override; Metrics & get_metrics(mem_access_type) override; @@ -196,7 +196,7 @@ InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) } } -void MemPatternsForPin::handle_trace_entry(const InstrAddressInfo & ia) +void MemPatternsForPin::handle_trace_entry(const InstrAddrAdapter & ia) { // Call libgs_patterns ::handle_trace_entry(*this, ia); @@ -294,7 +294,7 @@ void MemPatternsForPin::process_traces() //decode drtrace drline = p_drtrace; - handle_trace_entry(InstrAddressInfoForPin(drline)); + handle_trace_entry(InstrAddrAdapterForPin(drline)); p_drtrace++; } @@ -332,8 +332,8 @@ void MemPatternsForPin::process_second_pass(gzFile & fp_drtrace) //decode drtrace drline = p_drtrace; - breakout = ::handle_2nd_pass_trace_entry(InstrAddressInfoForPin(drline), get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); + breakout = ::handle_2nd_pass_trace_entry(InstrAddrAdapterForPin(drline), get_gather_metrics(), get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); p_drtrace++; } From e5bd585e504a811178bf15adb0d707e023d1a5b3 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 5 Apr 2024 19:27:27 -0400 Subject: [PATCH 25/76] Removed remaining statics. Build libgs_patterns_core as shared lib - required for nvbit linkage. gsnv_patterns converted to header file to simplify nvbit build. Some missing default implemetations for gsnv_patterns. --- CMakeLists.txt | 6 +++--- gs_patterns.h | 26 +++++++++++--------------- gs_patterns_core.cpp | 3 --- gs_patterns_core.h | 3 --- gsnv_patterns.cpp => gsnv_patterns.h | 10 ++++------ gspin_patterns.cpp | 13 ------------- 6 files changed, 18 insertions(+), 43 deletions(-) rename gsnv_patterns.cpp => gsnv_patterns.h (97%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ba957a..427c4f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,17 +4,17 @@ set (CMAKE_VERBOSE_MAKEFILE "1") project( gs_patterns VERSION 1.0 LANGUAGES CXX) -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 17) # was 20 set(CMAKE_CXX_STANDARD_REQUIRED On) #set(CMAKE_CXX_EXTENSIONS Off) -add_library(gs_patterns_core STATIC +add_library(gs_patterns_core SHARED utils.h utils.cpp gs_patterns.h gs_patterns_core.h gs_patterns_core.cpp - gsnv_patterns.cpp + gsnv_patterns.h ) add_executable( gs_patterns diff --git a/gs_patterns.h b/gs_patterns.h index 66a024a..19ff61c 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -1,6 +1,3 @@ -// -// Created by christopher on 3/31/24. -// #pragma once @@ -119,6 +116,8 @@ class Metrics for (int i = 0; i < NTOP; i++) { free(patterns[i]); } + + delete [] srcline; } Metrics(const Metrics &) = delete; @@ -141,7 +140,7 @@ class Metrics int64_t* patterns[NTOP] = {0}; private: - static char srcline[2][NGS][MAX_LINE_LENGTH]; // was static (may move out and have 1 per type) + char (*srcline)[NGS][MAX_LINE_LENGTH] = new char[2][NGS][MAX_LINE_LENGTH]; mem_access_type _mType; }; @@ -151,7 +150,11 @@ class InstrInfo { public: InstrInfo(mem_access_type mType) : _mType(mType) { } - ~InstrInfo() { } + ~InstrInfo() { + delete [] iaddrs; + delete [] icnt; + delete [] occ; + } InstrInfo(const InstrInfo &) = delete; InstrInfo & operator=(const InstrInfo & right) = delete; @@ -161,9 +164,9 @@ class InstrInfo int64_t* get_occ() { return occ[_mType]; } private: - static addr_t iaddrs[2][NGS]; - static int64_t icnt[2][NGS]; - static int64_t occ[2][NGS]; + addr_t (*iaddrs)[NGS] = new addr_t[2][NGS]; + int64_t (*icnt)[NGS] = new int64_t[2][NGS]; + int64_t (*occ)[NGS] = new int64_t[2][NGS]; mem_access_type _mType; }; @@ -210,19 +213,12 @@ class InstrWindow InstrWindow(const InstrWindow &) = delete; InstrWindow & operator=(const InstrWindow & right) = delete; -#if 0 - static int64_t w_iaddrs[2][IWINDOW]; - static int64_t w_bytes[2][IWINDOW]; - static int64_t w_maddr[2][IWINDOW][VBYTES]; - static int64_t w_cnt[2][IWINDOW]; -#else // moved from static storage to instance variables (watch out for stack overflow) // Revisit and move to heap if an issue - estimate of 2k*3 + 128k int64_t w_iaddrs[2][IWINDOW]; int64_t w_bytes[2][IWINDOW]; int64_t w_maddr[2][IWINDOW][VBYTES]; int64_t w_cnt[2][IWINDOW]; -#endif // State which must be carried with each call to handle a trace addr_t iaddr; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index ff6ed41..4e483b5 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -1,6 +1,3 @@ -// -// Created by christopher on 4/2/24. -// #include /// TODO: use cassert instead #include diff --git a/gs_patterns_core.h b/gs_patterns_core.h index 82555e1..b833e23 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -1,6 +1,3 @@ -// -// Created by christopher on 4/2/24. -// #pragma once diff --git a/gsnv_patterns.cpp b/gsnv_patterns.h similarity index 97% rename from gsnv_patterns.cpp rename to gsnv_patterns.h index 4fdab87..29d6921 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.h @@ -1,6 +1,5 @@ -// -// Created by christopher on 4/3/24. -// + +#pragma once #include #include @@ -85,8 +84,8 @@ class MemPatternsForNV : public MemPatterns void update_metrics(); //void process_traces(); - void update_source_lines(); - double update_source_lines_from_binary(mem_access_type); + void update_source_lines() { } + double update_source_lines_from_binary(mem_access_type) { return 0.0; } void process_second_pass(); private: @@ -198,4 +197,3 @@ void MemPatternsForNV::process_second_pass() iaddr, maddr, mcnt, gather_base, scatter_base); } } - diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index f3bceec..6b12019 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -25,19 +25,6 @@ #define ADDREND (0xFFFFFFFFFFFFFFFFUL) #define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) -// Class Static data initialization -char Metrics::srcline[2][NGS][MAX_LINE_LENGTH]; -addr_t InstrInfo::iaddrs[2][NGS]; -int64_t InstrInfo::icnt[2][NGS]; -int64_t InstrInfo::occ[2][NGS]; - -#if 0 -int64_t InstrWindow::w_iaddrs[2][IWINDOW]; -int64_t InstrWindow::w_bytes[2][IWINDOW]; -int64_t InstrWindow::w_maddr[2][IWINDOW][VBYTES]; -int64_t InstrWindow::w_cnt[2][IWINDOW]; -#endif - //FROM DR SOURCE //DR trace struct _trace_entry_t { From 88d7849d89ab1c20da658ddce0750c2620a02dec Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 10 Apr 2024 23:16:04 -0400 Subject: [PATCH 26/76] Simply some logic in libgs_patterns handle_trace_entry() Instroduced a file runner for gsnv_patterns which can write and read an nvbit trace binary output file checkpoint for nvbit --- CMakeLists.txt | 4 + gs_patterns.h | 3 +- gs_patterns_core.cpp | 68 +++----- gsnv_patterns.h | 389 +++++++++++++++++++++++++++++++++++++++---- gsnv_test.cpp | 59 +++++++ gspin_patterns.cpp | 12 +- 6 files changed, 458 insertions(+), 77 deletions(-) create mode 100644 gsnv_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 427c4f0..3ae8f1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,12 +15,16 @@ add_library(gs_patterns_core SHARED gs_patterns_core.h gs_patterns_core.cpp gsnv_patterns.h + gsnv_test.cpp ) add_executable( gs_patterns gspin_patterns.cpp) +add_executable(test gsnv_test.cpp) + target_link_libraries(gs_patterns gs_patterns_core) +target_link_libraries(test gs_patterns_core) set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gs_patterns.h b/gs_patterns.h index 19ff61c..3a7f8bc 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -81,7 +81,7 @@ class InstrAddrAdapter virtual size_t get_size() const = 0; virtual addr_t get_address() const = 0; - virtual unsigned short get_type() const = 0; + virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! // multiple? virtual bool is_gather() const @@ -215,6 +215,7 @@ class InstrWindow // moved from static storage to instance variables (watch out for stack overflow) // Revisit and move to heap if an issue - estimate of 2k*3 + 128k + // First dimension is 0=GATHER/1=SCATTER int64_t w_iaddrs[2][IWINDOW]; int64_t w_bytes[2][IWINDOW]; int64_t w_maddr[2][IWINDOW][VBYTES]; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 4e483b5..9aa0e3a 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -37,7 +37,7 @@ void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr } -void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) +static void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) { int i = 0; int j = 0; @@ -49,6 +49,8 @@ void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, M int64_t n_stride[1027]; double outbounds; + if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); + if (first_spatter) printf("\n"); printf("\n"); @@ -153,6 +155,8 @@ void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) // Create spatter file FILE *fp, *fp2; + if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); + std::string json_name = file_prefix + ".json"; fp = fopen(json_name.c_str(), "w"); if (NULL == fp) { @@ -204,8 +208,8 @@ void normalize_stats(Metrics & target_metrics) void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) { - int i, j, k, w; - int w_rw_idx; + int i, j, k, w = 0; + int w_rw_idx; // Index into instruction window first dimension (RW: 0=Gather(R) or 1=Scatter(W)) int w_idx; int gs; @@ -300,57 +304,39 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) //gather / scatter if (iw.maddr != iw.maddr_prev) { - if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) + if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) // ? > 1 stride (non-contiguous) <-------------------- gs = w; } iw.maddr_prev = iw.maddr; } - for (j = 0; j < iw.w_cnt[w][i]; j++) { + // Update other_cnt + if (gs == -1) trace_info.other_cnt += iw.w_cnt[w][i]; - if (gs == -1) { - trace_info.other_cnt++; - continue; - } - } - - if (gs == 0) { // GATHER - - trace_info.gather_occ_avg += iw.w_cnt[w][i]; - gather_metrics.cnt += 1.0; - - for (k = 0; k < NGS; k++) { - if (gather_iinfo.get_iaddrs()[k] == 0) { - gather_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; - (gather_iinfo.get_icnt()[k])++; - gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - - if (gather_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { - (gather_iinfo.get_icnt()[k])++; - gather_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } + // GATHER or SCATTER handling + if (gs == 0 || gs == 1) { + InstrInfo & target_iinfo = (gs == 0) ? gather_iinfo : scatter_iinfo; + if (gs == 0) { + trace_info.gather_occ_avg += iw.w_cnt[w][i]; + gather_metrics.cnt += 1.0; + } + else { + trace_info.scatter_occ_avg += iw.w_cnt[w][i]; + scatter_metrics.cnt += 1.0; } - - } else if (gs == 1) { // SCATTER - - trace_info.scatter_occ_avg += iw.w_cnt[w][i]; - scatter_metrics.cnt += 1.0; for (k = 0; k < NGS; k++) { - if (scatter_iinfo.get_iaddrs()[k] == 0) { - scatter_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; - (scatter_iinfo.get_icnt()[k])++; - scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + if (target_iinfo.get_iaddrs()[k] == 0) { + target_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; + (target_iinfo.get_icnt()[k])++; + target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; break; } - if (scatter_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { - (scatter_iinfo.get_icnt()[k])++; - scatter_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + if (target_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { + (target_iinfo.get_icnt()[k])++; + target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; break; } } diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 29d6921..30518c8 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -2,10 +2,35 @@ #pragma once #include -#include +#include +#include +#include +#include +#include +#include +#include #include "gs_patterns.h" #include "gs_patterns_core.h" +#include "utils.h" + +#define HEX(x) \ + "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ + << std::dec + +//#include "common.h" +// Copying to redudce dependency on nvgs_patterns +#if 1 +typedef struct { + uint64_t grid_launch_id; + int cta_id_x; + int cta_id_y; + int cta_id_z; + int warp_id; + int opcode_id; + uint64_t addrs[32]; +} mem_access_t; +#endif struct _trace_entry_t { unsigned short type; // 2 bytes: trace_type_t @@ -17,50 +42,91 @@ struct _trace_entry_t { } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; +gzFile open_trace_file(const std::string & trace_file_name) +{ + gzFile fp; + + fp = gzopen(trace_file_name.c_str(), "hrb"); + if (NULL == fp) { + throw GSFileError("Could not open " + trace_file_name + "!"); + } + return fp; +} + +void close_trace_file (gzFile & fp) +{ + gzclose(fp); +} + +int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_entry_t); + //first read + if (NULL == *p_val) { + *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); + *p_val = val; + + } else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); + *p_val = val; + } + + if (0 == *edx) + return 0; + + return 1; +} + // An adapter for trace_entry_t (temporaritly untl replaced with nvbit memory detail type) class InstrAddrAdapterForNV : public InstrAddrAdapter { public: - InstrAddrAdapterForNV(const trace_entry_t * te) - { - _te.type = te->type; - _te.size = te->size; - _te.addr = te->addr; - } + InstrAddrAdapterForNV(const trace_entry_t * te) : _te(*te) { } InstrAddrAdapterForNV(const trace_entry_t te) : _te(te) { } virtual ~InstrAddrAdapterForNV() { } - virtual bool is_valid() const override { return false; } - virtual bool is_mem_instr() const override { return false; } + virtual bool is_valid() const override { return true; } + virtual bool is_mem_instr() const override { return true; } virtual bool is_other_instr() const override { return false; } + virtual mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual mem_access_type get_mem_instr_type() const override - { - return GATHER; // UNSUPPORTED <=-=-------------------------------------- FIX ME - } + virtual size_t get_size() const override { return _te.size / 8; } // in bytes + virtual addr_t get_address() const override { return _te.addr; } + virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual size_t get_size() const override { return _te.size; } // TODO: FIX conversion <---------------------------------------- - virtual addr_t get_address() const override { return _te.addr; }; - virtual unsigned short get_type() const override { return _te.type; } - - virtual void output(std::ostream & os) const override { - os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; - } + virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]";} private: trace_entry_t _te; + //mem_access_t _ma; }; class MemPatternsForNV : public MemPatterns { public: MemPatternsForNV(): _metrics(GATHER, SCATTER), - _iinfo(GATHER, SCATTER) + _iinfo(GATHER, SCATTER), + _ofs() { } - virtual ~MemPatternsForNV() override { } + virtual ~MemPatternsForNV() override { + if (_write_trace_file) { + _ofs.flush(); + _ofs.close(); + } + + /// TODO: COMPRESS trace_file on exit +#if 1 + std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; + for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { + std::cout << "OPCODE: " << itr->first << " -> " << itr->second << std::endl; + } +#endif + } void handle_trace_entry(const InstrAddrAdapter & ia) override; void generate_patterns() override; @@ -75,29 +141,143 @@ class MemPatternsForNV : public MemPatterns TraceInfo & get_trace_info() override { return _trace_info; } InstrWindow & get_instr_window() override { return _iw; } - //void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } - //const std::string & get_binary_file_name() { return _binary_file_name; } + void set_trace_file(const std::string & trace_file_name) { _trace_file_name = trace_file_name; } + const std::string & get_trace_file_name() { return _trace_file_name; } + + void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } + const std::string & get_binary_file_name() { return _binary_file_name; } void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } - const std::string & get_file_prefix() { return _file_prefix; } + std::string get_file_prefix(); void update_metrics(); - //void process_traces(); - void update_source_lines() { } - double update_source_lines_from_binary(mem_access_type) { return 0.0; } + std::string get_trace_file_prefix (); + + void process_traces(); + void update_source_lines(); + double update_source_lines_from_binary(mem_access_type); void process_second_pass(); + void set_trace_out_file(const std::string & trace_file_name) { + _trace_out_file_name = trace_file_name; + + try + { + _ofs.open(trace_file_name, std::ios::binary); + if (_ofs.is_open()) _write_trace_file = true; + } + catch (...) + { + throw GSFileError("Unable to open " + trace_file_name + " for writing"); + } + } + + // Handle an nvbit CTA memory update + void handle_cta_memory_access(const mem_access_t * ma); + // Validate cta stride is within minimum + bool valid_gs_stride(const std::vector & te_list, const uint32_t min_stride); + + // store opcode mappings + bool add_or_update_opcode(int opcode_id, const std::string & opcode) { + auto it = id_to_opcode_map.find(opcode_id); + if (it == id_to_opcode_map.end()) { + id_to_opcode_map[opcode_id] = opcode; + //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; + return true; + } + return false; + } + // retreive opcode mapping by opcode_id + const std::string & get_opcode(int opcode_id) { + auto result = id_to_opcode_map.find(opcode_id); + if (result != id_to_opcode_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown opcode_id: " << opcode_id; + throw GSDataError(ss.str()); + } + + std::vector convert_to_trace_entry(const mem_access_t & ma) + { + // opcode : forms LD.E.64, ST.E.64 + + std::string mem_type; + std::string mem_attr; + uint16_t mem_size = 0; + int count = 0; + uint16_t mem_type_code = 0; + uint16_t mem_attr_code = 0; + + //const char * m = reinterpret_cast(&ma.opcode); + //const std::string opcode(m, 8); + std::string opcode = get_opcode(ma.opcode_id); + + size_t start=0, pos = 0; + while (std::string::npos != (pos = opcode.find(".", start))) + { + count++; + std::string token = opcode.substr(start, pos-start); + uint64_t s; + switch (count) + { + case 1: mem_type = token; + if ("LD" == mem_type) { mem_type_code = 0; } + else if ("ST" == mem_type) { mem_type_code = 1; } + else throw GSDataError ("Invalid mem_type must be LD(1) or ST(1)"); + break; + + case 2: mem_attr = token; + if ("E" == mem_attr) { mem_attr_code = 1; } + else { mem_attr_code = 2; } + break; + + default: + throw GSDataError("Unsupported opcode: " + opcode); + } + start = pos+1; + } + // Snag the rest as mem_size + if (start < opcode.length()) { + std::string token = opcode.substr(start, opcode.length()); + int s = atoi(token.c_str()); + mem_size = (uint16_t) s; + } + else { + throw GSDataError("Unsupported opcode: " + opcode); + } + + std::vector te_list; + for (int i = 0; i < 32; i++) + { + if (ma.addrs[i] != 0) + { + trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; + te_list.push_back(te); + } + } + return std::move(te_list); + } + private: + std::pair _metrics; std::pair _iinfo; TraceInfo _trace_info; InstrWindow _iw; + std::string _trace_file_name; std::string _binary_file_name; std::string _file_prefix; + std::string _trace_out_file_name; + bool _write_trace_file = false; + std::ofstream _ofs; std::vector _traces; + + //std::map opcode_to_id_map; + std::map id_to_opcode_map; }; @@ -172,12 +352,111 @@ void MemPatternsForNV::update_metrics() ::normalize_stats(get_scatter_metrics()); } +std::string MemPatternsForNV::get_file_prefix() +{ + if (!_file_prefix.empty()) return _file_prefix; + + // If no file_prefix was set try extracting one from trace_file + std::string prefix = _trace_file_name; + size_t pos = std::string::npos; + while (std::string::npos != (pos = prefix.find(".gz"))) + { + prefix.replace(pos, 3, ""); + } + return prefix; +} + +// First Pass +void MemPatternsForNV::process_traces() +{ + int iret = 0; + mem_access_t * t_line; + InstrWindow iw; + + gzFile fp_trace = open_trace_file(get_trace_file_name()); + + printf("First pass to find top gather / scatter iaddresses\n"); + fflush(stdout); + + mem_access_t * p_trace = NULL; + mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) + + while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) { + //decode drtrace + t_line = p_trace; + + if (-1 == t_line->cta_id_x) { break; } + + try + { + handle_cta_memory_access(t_line); + + p_trace++; + } + catch (const GSError & ex) { + std::cerr << "ERROR: " << ex.what() << std::endl; + throw; + } + } + + close_trace_file(fp_trace); + + //metrics + get_trace_info().gather_occ_avg /= get_gather_metrics().cnt; + get_trace_info().scatter_occ_avg /= get_scatter_metrics().cnt; + + display_stats(*this); + +} + + +// TRY +void MemPatternsForNV::update_source_lines() +{ + // Find source lines for gathers - Must have symbol + printf("\nSymbol table lookup for gathers..."); + fflush(stdout); + + get_gather_metrics().cnt = update_source_lines_from_binary(GATHER); + + // Find source lines for scatters + printf("Symbol table lookup for scatters..."); + fflush(stdout); + + get_scatter_metrics().cnt = update_source_lines_from_binary(SCATTER); +} + +// TRY +double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) +{ + double scatter_cnt = 0.0; + + InstrInfo & target_iinfo = get_iinfo(mType); + Metrics & target_metrics = get_metrics(mType); + + //Check it is not a library + for (int k = 0; k < NGS; k++) { + + if (0 == target_iinfo.get_iaddrs()[k]) { + break; + } + translate_iaddr(get_binary_file_name(), target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); + if (startswith(target_metrics.get_srcline()[k], "?")) + target_iinfo.get_icnt()[k] = 0; + + scatter_cnt += target_iinfo.get_icnt()[k]; + } + printf("done.\n"); + + return scatter_cnt; + +} void MemPatternsForNV::process_second_pass() { uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. int iret = 0; - trace_entry_t *drline; +// trace_entry_t *drline; // State carried thru addr_t iaddr; @@ -197,3 +476,55 @@ void MemPatternsForNV::process_second_pass() iaddr, maddr, mcnt, gather_base, scatter_base); } } + +void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) +{ + if (_write_trace_file && _ofs.is_open()) { + // Write entry to trace_output file + _ofs.write(reinterpret_cast(ma), sizeof *ma); + } +#if 0 + std::stringstream ss; + //ss << "CTX " << HEX(ctx) << " - grid_launch_id " + ss << "GSNV_PATTERNS: CTX " << " - grid_launch_id " + << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z + << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) << " - "; + for (int i = 0; i < 32; i++) { + ss << HEX(ma->addrs[i]) << " "; + } + std::cout << ss.str() << std::endl; +#endif + + std::vector te_list = convert_to_trace_entry(*ma); + uint64_t min_size = !te_list.empty() ? (te_list[0].size / 8) + 1 : 0; + if (valid_gs_stride(te_list, min_size)) + { + for (auto it = te_list.begin(); it != te_list.end(); it++) + { + handle_trace_entry(InstrAddrAdapterForNV(*it)); + } + } +} + +bool MemPatternsForNV::valid_gs_stride(const std::vector & te_list, const uint32_t min_stride) +{ + bool valid_stride = false; + uint32_t min_stride_found = INT32_MAX; + uint64_t last_addr = 0; + bool first = true; + for (auto it = te_list.begin(); it != te_list.end(); it++) + { + const trace_entry_t & te = *it; + if (first) { + first = false; + last_addr = te.addr; + continue; + } + + uint64_t diff = std::labs (last_addr - (uint64_t)te.addr); + if (diff < min_stride_found) + min_stride_found = diff; + } + + return min_stride_found >= min_stride; +} \ No newline at end of file diff --git a/gsnv_test.cpp b/gsnv_test.cpp new file mode 100644 index 0000000..e4aa390 --- /dev/null +++ b/gsnv_test.cpp @@ -0,0 +1,59 @@ + +#include "gs_patterns.h" +#include "gsnv_patterns.h" + +int main(int argc, char **argv) +{ + try + { + if (argc != 3) { + throw GSError("Invalid arguments, should be: trace.gz binary_file_name"); + } + + MemPatternsForNV mp; + + mp.set_trace_file(argv[1]); + mp.set_binary_file(argv[2]); + + // ----------------- Process Traces ----------------- + + mp.add_or_update_opcode(0, "LD.E.64"); + mp.add_or_update_opcode(1, "ST.E.64"); + + mp.process_traces(); + + // ----------------- Generate Patterns ----------------- + + mp.generate_patterns(); + } + catch (const GSFileError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + catch (const GSAllocError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + catch (const GSDataError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(1); + } + catch (const GSError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(1); + } + catch (const std::exception & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + + return 0; +} + + + diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index 6b12019..b7fe439 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -100,9 +100,9 @@ class InstrAddrAdapterForPin : public InstrAddrAdapter else return SCATTER; } - virtual size_t get_size() const override { return _te.size; } // TODO: FIX conversion <---------------------------------------- - virtual addr_t get_address() const override { return _te.addr; }; - virtual unsigned short get_type() const override { return _te.type; } + virtual size_t get_size() const override { return _te.size; } + virtual addr_t get_address() const override { return _te.addr; } + virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; @@ -140,7 +140,7 @@ class MemPatternsForPin : public MemPatterns void update_metrics(); - std::string get_trace_file_prefix (); + std::string get_file_prefix (); void process_traces(); void update_source_lines(); @@ -201,7 +201,7 @@ void MemPatternsForPin::generate_patterns() // ----------------- Create Spatter File ----------------- - ::create_spatter_file(*this, get_trace_file_prefix()); + ::create_spatter_file(*this, get_file_prefix()); } @@ -227,7 +227,7 @@ void MemPatternsForPin::update_metrics() close_trace_file(fp_drtrace); } -std::string MemPatternsForPin::get_trace_file_prefix() +std::string MemPatternsForPin::get_file_prefix() { std::string prefix = _trace_file_name; size_t pos = std::string::npos; From eb59f7af6219bde21d83c47844a9127d87d9c6f1 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 10 Apr 2024 23:27:53 -0400 Subject: [PATCH 27/76] mem_trace initial version --- nvbit_tracing/nvgs_patterns/Makefile | 40 +++ nvbit_tracing/nvgs_patterns/common.h | 40 +++ nvbit_tracing/nvgs_patterns/inject_funcs.cu | 72 ++++ nvbit_tracing/nvgs_patterns/mem_trace.cu | 350 ++++++++++++++++++++ 4 files changed, 502 insertions(+) create mode 100644 nvbit_tracing/nvgs_patterns/Makefile create mode 100644 nvbit_tracing/nvgs_patterns/common.h create mode 100644 nvbit_tracing/nvgs_patterns/inject_funcs.cu create mode 100644 nvbit_tracing/nvgs_patterns/mem_trace.cu diff --git a/nvbit_tracing/nvgs_patterns/Makefile b/nvbit_tracing/nvgs_patterns/Makefile new file mode 100644 index 0000000..bee52fb --- /dev/null +++ b/nvbit_tracing/nvgs_patterns/Makefile @@ -0,0 +1,40 @@ +NVCC=nvcc -ccbin=$(CXX) -D_FORCE_INLINES + +NVCC_VER_REQ=10.1 +NVCC_VER=$(shell $(NVCC) --version | grep release | cut -f2 -d, | cut -f3 -d' ') +NVCC_VER_CHECK=$(shell echo "${NVCC_VER} >= $(NVCC_VER_REQ)" | bc) + +ifeq ($(NVCC_VER_CHECK),0) +$(error ERROR: nvcc version >= $(NVCC_VER_REQ) required to compile an nvbit tool! Instrumented applications can still use lower versions of nvcc.) +endif + +NVBIT_PATH=../../core +INCLUDES=-I$(NVBIT_PATH) + +LIBS=-L$(NVBIT_PATH) -lnvbit +NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /)) + +SOURCES=$(wildcard *.cu) + +OBJECTS=$(SOURCES:.cu=.o) +ARCH?=35 + +mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) +current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path)))) + +NVBIT_TOOL=$(current_dir).so + +all: $(NVBIT_TOOL) + +$(NVBIT_TOOL): $(OBJECTS) $(NVBIT_PATH)/libnvbit.a + $(NVCC) -arch=sm_$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@ + +%.o: %.cu + $(NVCC) -dc -c -std=c++11 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=sm_$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ + +inject_funcs.o: inject_funcs.cu + #$(NVCC) $(INCLUDES) -maxrregcount=24 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ + $(NVCC) $(INCLUDES) -maxrregcount=16 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ + +clean: + rm -f *.so *.o diff --git a/nvbit_tracing/nvgs_patterns/common.h b/nvbit_tracing/nvgs_patterns/common.h new file mode 100644 index 0000000..42bca3d --- /dev/null +++ b/nvbit_tracing/nvgs_patterns/common.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +/* information collected in the instrumentation function and passed + * on the channel from the GPU to the CPU */ +typedef struct { + uint64_t grid_launch_id; + int cta_id_x; + int cta_id_y; + int cta_id_z; + int warp_id; + int opcode_id; + uint64_t addrs[32]; +} mem_access_t; diff --git a/nvbit_tracing/nvgs_patterns/inject_funcs.cu b/nvbit_tracing/nvgs_patterns/inject_funcs.cu new file mode 100644 index 0000000..895ee75 --- /dev/null +++ b/nvbit_tracing/nvgs_patterns/inject_funcs.cu @@ -0,0 +1,72 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "utils/utils.h" + +/* for channel */ +#include "utils/channel.hpp" + +/* contains definition of the mem_access_t structure */ +#include "common.h" + +extern "C" __device__ __noinline__ void instrument_mem(int pred, int opcode_id, + uint64_t addr, + uint64_t grid_launch_id, + uint64_t pchannel_dev) { + /* if thread is predicated off, return */ + if (!pred) { + return; + } + + int active_mask = __ballot_sync(__activemask(), 1); + const int laneid = get_laneid(); + const int first_laneid = __ffs(active_mask) - 1; + + mem_access_t ma; + + /* collect memory address information from other threads */ + for (int i = 0; i < 32; i++) { + ma.addrs[i] = __shfl_sync(active_mask, addr, i); + } + + int4 cta = get_ctaid(); + ma.grid_launch_id = grid_launch_id; + ma.cta_id_x = cta.x; + ma.cta_id_y = cta.y; + ma.cta_id_z = cta.z; + ma.warp_id = get_warpid(); + ma.opcode_id = opcode_id; + + /* first active lane pushes information on the channel */ + if (first_laneid == laneid) { + ChannelDev* channel_dev = (ChannelDev*)pchannel_dev; + channel_dev->push(&ma, sizeof(mem_access_t)); + } +} diff --git a/nvbit_tracing/nvgs_patterns/mem_trace.cu b/nvbit_tracing/nvgs_patterns/mem_trace.cu new file mode 100644 index 0000000..f20f782 --- /dev/null +++ b/nvbit_tracing/nvgs_patterns/mem_trace.cu @@ -0,0 +1,350 @@ +/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* every tool needs to include this once */ +#include "nvbit_tool.h" + +/* nvbit interface file */ +#include "nvbit.h" + +/* for channel */ +#include "utils/channel.hpp" + +/* contains definition of the mem_access_t structure */ +#include "common.h" + +#define HEX(x) \ + "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ + << std::dec + +#define CHANNEL_SIZE (1l << 20) + +struct CTXstate { + /* context id */ + int id; + + /* Channel used to communicate from GPU to CPU receiving thread */ + ChannelDev* channel_dev; + ChannelHost channel_host; +}; + +/* lock */ +pthread_mutex_t mutex; + +/* map to store context state */ +std::unordered_map ctx_state_map; + +/* skip flag used to avoid re-entry on the nvbit_callback when issuing + * flush_channel kernel call */ +bool skip_callback_flag = false; + +/* global control variables for this tool */ +uint32_t instr_begin_interval = 0; +uint32_t instr_end_interval = UINT32_MAX; +int verbose = 0; + +/* opcode to id map and reverse map */ +std::map opcode_to_id_map; +std::map id_to_opcode_map; + +/* grid launch id, incremented at every launch */ +uint64_t grid_launch_id = 0; + +void nvbit_at_init() { + setenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC", "1", 1); + GET_VAR_INT( + instr_begin_interval, "INSTR_BEGIN", 0, + "Beginning of the instruction interval where to apply instrumentation"); + GET_VAR_INT( + instr_end_interval, "INSTR_END", UINT32_MAX, + "End of the instruction interval where to apply instrumentation"); + GET_VAR_INT(verbose, "TOOL_VERBOSE", 0, "Enable verbosity inside the tool"); + std::string pad(100, '-'); + printf("%s\n", pad.c_str()); + + /* set mutex as recursive */ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&mutex, &attr); +} + +/* Set used to avoid re-instrumenting the same functions multiple times */ +std::unordered_set already_instrumented; + +void instrument_function_if_needed(CUcontext ctx, CUfunction func) { + assert(ctx_state_map.find(ctx) != ctx_state_map.end()); + CTXstate* ctx_state = ctx_state_map[ctx]; + + /* Get related functions of the kernel (device function that can be + * called by the kernel) */ + std::vector related_functions = + nvbit_get_related_functions(ctx, func); + + /* add kernel itself to the related function vector */ + related_functions.push_back(func); + + /* iterate on function */ + for (auto f : related_functions) { + /* "recording" function was instrumented, if set insertion failed + * we have already encountered this function */ + if (!already_instrumented.insert(f).second) { + continue; + } + + /* get vector of instructions of function "f" */ + const std::vector& instrs = nvbit_get_instrs(ctx, f); + + if (verbose) { + printf( + "MEMTRACE: CTX %p, Inspecting CUfunction %p name %s at address " + "0x%lx\n", + ctx, f, nvbit_get_func_name(ctx, f), nvbit_get_func_addr(f)); + } + + uint32_t cnt = 0; + /* iterate on all the static instructions in the function */ + for (auto instr : instrs) { + if (cnt < instr_begin_interval || cnt >= instr_end_interval || + instr->getMemorySpace() == InstrType::MemorySpace::NONE || + instr->getMemorySpace() == InstrType::MemorySpace::CONSTANT) { + cnt++; + continue; + } + if (verbose) { + instr->printDecoded(); + } + + if (opcode_to_id_map.find(instr->getOpcode()) == + opcode_to_id_map.end()) { + int opcode_id = opcode_to_id_map.size(); + opcode_to_id_map[instr->getOpcode()] = opcode_id; + id_to_opcode_map[opcode_id] = std::string(instr->getOpcode()); + } + + int opcode_id = opcode_to_id_map[instr->getOpcode()]; + int mref_idx = 0; + /* iterate on the operands */ + for (int i = 0; i < instr->getNumOperands(); i++) { + /* get the operand "i" */ + const InstrType::operand_t* op = instr->getOperand(i); + + if (op->type == InstrType::OperandType::MREF) { + /* insert call to the instrumentation function with its + * arguments */ + nvbit_insert_call(instr, "instrument_mem", IPOINT_BEFORE); + /* predicate value */ + nvbit_add_call_arg_guard_pred_val(instr); + /* opcode id */ + nvbit_add_call_arg_const_val32(instr, opcode_id); + /* memory reference 64 bit address */ + nvbit_add_call_arg_mref_addr64(instr, mref_idx); + /* add "space" for kernel function pointer that will be set + * at launch time (64 bit value at offset 0 of the dynamic + * arguments)*/ + nvbit_add_call_arg_launch_val64(instr, 0); + /* add pointer to channel_dev*/ + nvbit_add_call_arg_const_val64( + instr, (uint64_t)ctx_state->channel_dev); + mref_idx++; + } + } + cnt++; + } + } +} + +__global__ void flush_channel(ChannelDev* ch_dev) { + /* set a CTA id = -1 to indicate communication thread that this is the + * termination flag */ + mem_access_t ma; + ma.cta_id_x = -1; + ch_dev->push(&ma, sizeof(mem_access_t)); + /* flush channel */ + ch_dev->flush(); +} + +void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, + const char* name, void* params, CUresult* pStatus) { + pthread_mutex_lock(&mutex); + + /* we prevent re-entry on this callback when issuing CUDA functions inside + * this function */ + if (skip_callback_flag) { + pthread_mutex_unlock(&mutex); + return; + } + skip_callback_flag = true; + + assert(ctx_state_map.find(ctx) != ctx_state_map.end()); + CTXstate* ctx_state = ctx_state_map[ctx]; + + if (cbid == API_CUDA_cuLaunchKernel_ptsz || + cbid == API_CUDA_cuLaunchKernel) { + cuLaunchKernel_params* p = (cuLaunchKernel_params*)params; + + /* Make sure GPU is idle */ + cudaDeviceSynchronize(); + assert(cudaGetLastError() == cudaSuccess); + + if (!is_exit) { + /* instrument */ + instrument_function_if_needed(ctx, p->f); + + int nregs = 0; + CUDA_SAFECALL( + cuFuncGetAttribute(&nregs, CU_FUNC_ATTRIBUTE_NUM_REGS, p->f)); + + int shmem_static_nbytes = 0; + CUDA_SAFECALL( + cuFuncGetAttribute(&shmem_static_nbytes, + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, p->f)); + + /* get function name and pc */ + const char* func_name = nvbit_get_func_name(ctx, p->f); + uint64_t pc = nvbit_get_func_addr(p->f); + + /* set grid launch id at launch time */ + nvbit_set_at_launch(ctx, p->f, &grid_launch_id, sizeof(uint64_t)); + /* increment grid launch id for next launch */ + grid_launch_id++; + + /* enable instrumented code to run */ + nvbit_enable_instrumented(ctx, p->f, true); + + printf( + "MEMTRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - Kernel " + "name %s - grid launch id %ld - grid size %d,%d,%d - block " + "size %d,%d,%d - nregs %d - shmem %d - cuda stream id %ld\n", + (uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX, + p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY, + p->blockDimZ, nregs, shmem_static_nbytes + p->sharedMemBytes, + (uint64_t)p->hStream); + } + } + skip_callback_flag = false; + pthread_mutex_unlock(&mutex); +} + +void* recv_thread_fun(void* args) { + CUcontext ctx = (CUcontext)args; + + pthread_mutex_lock(&mutex); + /* get context state from map */ + assert(ctx_state_map.find(ctx) != ctx_state_map.end()); + CTXstate* ctx_state = ctx_state_map[ctx]; + + ChannelHost* ch_host = &ctx_state->channel_host; + pthread_mutex_unlock(&mutex); + char* recv_buffer = (char*)malloc(CHANNEL_SIZE); + + bool done = false; + while (!done) { + /* receive buffer from channel */ + uint32_t num_recv_bytes = ch_host->recv(recv_buffer, CHANNEL_SIZE); + if (num_recv_bytes > 0) { + uint32_t num_processed_bytes = 0; + while (num_processed_bytes < num_recv_bytes) { + mem_access_t* ma = + (mem_access_t*)&recv_buffer[num_processed_bytes]; + + /* when we receive a CTA_id_x it means all the kernels + * completed, this is the special token we receive from the + * flush channel kernel that is issues at the end of the + * context */ + if (ma->cta_id_x == -1) { + done = true; + break; + } + + std::stringstream ss; + ss << "CTX " << HEX(ctx) << " - grid_launch_id " + << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," + << ma->cta_id_y << "," << ma->cta_id_z << " - warp " + << ma->warp_id << " - " << id_to_opcode_map[ma->opcode_id] + << " - "; + + for (int i = 0; i < 32; i++) { + ss << HEX(ma->addrs[i]) << " "; + } + + printf("MEMTRACE: %s\n", ss.str().c_str()); + num_processed_bytes += sizeof(mem_access_t); + } + } + } + free(recv_buffer); + return NULL; +} + +void nvbit_at_ctx_init(CUcontext ctx) { + pthread_mutex_lock(&mutex); + if (verbose) { + printf("MEMTRACE: STARTING CONTEXT %p\n", ctx); + } + CTXstate* ctx_state = new CTXstate; + assert(ctx_state_map.find(ctx) == ctx_state_map.end()); + ctx_state_map[ctx] = ctx_state; + cudaMallocManaged(&ctx_state->channel_dev, sizeof(ChannelDev)); + ctx_state->channel_host.init((int)ctx_state_map.size() - 1, CHANNEL_SIZE, + ctx_state->channel_dev, recv_thread_fun, ctx); + nvbit_set_tool_pthread(ctx_state->channel_host.get_thread()); + pthread_mutex_unlock(&mutex); +} + +void nvbit_at_ctx_term(CUcontext ctx) { + pthread_mutex_lock(&mutex); + skip_callback_flag = true; + if (verbose) { + printf("MEMTRACE: TERMINATING CONTEXT %p\n", ctx); + } + /* get context state from map */ + assert(ctx_state_map.find(ctx) != ctx_state_map.end()); + CTXstate* ctx_state = ctx_state_map[ctx]; + + /* flush channel */ + flush_channel<<<1, 1>>>(ctx_state->channel_dev); + /* Make sure flush of channel is complete */ + cudaDeviceSynchronize(); + assert(cudaGetLastError() == cudaSuccess); + + ctx_state->channel_host.destroy(false); + cudaFree(ctx_state->channel_dev); + skip_callback_flag = false; + delete ctx_state; + pthread_mutex_unlock(&mutex); +} From 8983418f3cad64b50e21877166a6e2ae20bce7f8 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 10 Apr 2024 23:30:25 -0400 Subject: [PATCH 28/76] Link with libgs_patterns Call nvgs_patterns on each mem_trace callback --- nvbit_tracing/nvgs_patterns/Makefile | 12 ++++-- nvbit_tracing/nvgs_patterns/mem_trace.cu | 52 ++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/nvbit_tracing/nvgs_patterns/Makefile b/nvbit_tracing/nvgs_patterns/Makefile index bee52fb..5971afa 100644 --- a/nvbit_tracing/nvgs_patterns/Makefile +++ b/nvbit_tracing/nvgs_patterns/Makefile @@ -9,9 +9,11 @@ $(error ERROR: nvcc version >= $(NVCC_VER_REQ) required to compile an nvbit tool endif NVBIT_PATH=../../core -INCLUDES=-I$(NVBIT_PATH) +GSPATTERNS_CORE_PATH=../../../gs_patterns +INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_PATH) -LIBS=-L$(NVBIT_PATH) -lnvbit + +LIBS=-L$(NVBIT_PATH) -lnvbit -L$(GSPATTERNS_CORE_PATH)/build -lgs_patterns_core NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /)) SOURCES=$(wildcard *.cu) @@ -28,13 +30,15 @@ all: $(NVBIT_TOOL) $(NVBIT_TOOL): $(OBJECTS) $(NVBIT_PATH)/libnvbit.a $(NVCC) -arch=sm_$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@ +# $(NVCC) -arch=sm_$(ARCH) --device-debug -O3 -g $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@ %.o: %.cu - $(NVCC) -dc -c -std=c++11 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=sm_$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ + $(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=sm_$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ +# $(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=sm_$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ inject_funcs.o: inject_funcs.cu - #$(NVCC) $(INCLUDES) -maxrregcount=24 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ $(NVCC) $(INCLUDES) -maxrregcount=16 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ +# $(NVCC) $(INCLUDES) -maxrregcount=24 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ clean: rm -f *.so *.o diff --git a/nvbit_tracing/nvgs_patterns/mem_trace.cu b/nvbit_tracing/nvgs_patterns/mem_trace.cu index f20f782..bda59d5 100644 --- a/nvbit_tracing/nvgs_patterns/mem_trace.cu +++ b/nvbit_tracing/nvgs_patterns/mem_trace.cu @@ -45,7 +45,12 @@ #include "utils/channel.hpp" /* contains definition of the mem_access_t structure */ -#include "common.h" +//#include "common.h" + +#include +#include +#include + #define HEX(x) \ "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ @@ -81,6 +86,11 @@ int verbose = 0; std::map opcode_to_id_map; std::map id_to_opcode_map; +// Instantiate GSPatterns for NVBit +std::unique_ptr mp(new MemPatternsForNV); + + + /* grid launch id, incremented at every launch */ uint64_t grid_launch_id = 0; @@ -101,6 +111,8 @@ void nvbit_at_init() { pthread_mutexattr_init(&attr); pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); pthread_mutex_init(&mutex, &attr); + + // -- init #1 } /* Set used to avoid re-instrumenting the same functions multiple times */ @@ -149,14 +161,16 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { instr->printDecoded(); } - if (opcode_to_id_map.find(instr->getOpcode()) == - opcode_to_id_map.end()) { + if (opcode_to_id_map.find(instr->getOpcode()) == opcode_to_id_map.end()) { int opcode_id = opcode_to_id_map.size(); opcode_to_id_map[instr->getOpcode()] = opcode_id; id_to_opcode_map[opcode_id] = std::string(instr->getOpcode()); } int opcode_id = opcode_to_id_map[instr->getOpcode()]; + + mp->add_or_update_opcode(opcode_id, instr->getOpcode()); + int mref_idx = 0; /* iterate on the operands */ for (int i = 0; i < instr->getNumOperands(); i++) { @@ -254,6 +268,11 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY, p->blockDimZ, nregs, shmem_static_nbytes + p->sharedMemBytes, (uint64_t)p->hStream); + + // Dont add NVBit here + //trace_entry_t te { 1, 0, 0 }; + //mp->handle_trace_entry(InstrAddrAdapterForNV( te )); + } } skip_callback_flag = false; @@ -302,8 +321,22 @@ void* recv_thread_fun(void* args) { ss << HEX(ma->addrs[i]) << " "; } - printf("MEMTRACE: %s\n", ss.str().c_str()); + //printf("MEMTRACE: %s\n", ss.str().c_str()); num_processed_bytes += sizeof(mem_access_t); + + try + { + // Handle trace update here >> --- + mp->add_or_update_opcode(ma->opcode_id, id_to_opcode_map[ma->opcode_id]); + mp->handle_cta_memory_access(ma); + } + catch (std::exception & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + } + + // << ---------------------------- + } } } @@ -324,6 +357,13 @@ void nvbit_at_ctx_init(CUcontext ctx) { ctx_state->channel_dev, recv_thread_fun, ctx); nvbit_set_tool_pthread(ctx_state->channel_host.get_thread()); pthread_mutex_unlock(&mutex); + + // -- init #2 - whats the difference + /// TODO: pull from env variables and set + if (1) + { + mp->set_trace_out_file("./trace_file.nvbit"); + } } void nvbit_at_ctx_term(CUcontext ctx) { @@ -347,4 +387,8 @@ void nvbit_at_ctx_term(CUcontext ctx) { skip_callback_flag = false; delete ctx_state; pthread_mutex_unlock(&mutex); + + // Generate GS Pattern output fle + mp->generate_patterns(); + } From 833970741a8c1fe09b96897449c2ecf88ebc3836 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 11 Apr 2024 11:10:22 -0400 Subject: [PATCH 29/76] Improved naming to beter descrive nvbit trace vs nvgs_patterns --- nvbit_tracing/{nvgs_patterns => nvgs_trace}/Makefile | 0 nvbit_tracing/{nvgs_patterns => nvgs_trace}/common.h | 0 nvbit_tracing/{nvgs_patterns => nvgs_trace}/inject_funcs.cu | 0 .../{nvgs_patterns/mem_trace.cu => nvgs_trace/nvgs_trace.cu} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename nvbit_tracing/{nvgs_patterns => nvgs_trace}/Makefile (100%) rename nvbit_tracing/{nvgs_patterns => nvgs_trace}/common.h (100%) rename nvbit_tracing/{nvgs_patterns => nvgs_trace}/inject_funcs.cu (100%) rename nvbit_tracing/{nvgs_patterns/mem_trace.cu => nvgs_trace/nvgs_trace.cu} (100%) diff --git a/nvbit_tracing/nvgs_patterns/Makefile b/nvbit_tracing/nvgs_trace/Makefile similarity index 100% rename from nvbit_tracing/nvgs_patterns/Makefile rename to nvbit_tracing/nvgs_trace/Makefile diff --git a/nvbit_tracing/nvgs_patterns/common.h b/nvbit_tracing/nvgs_trace/common.h similarity index 100% rename from nvbit_tracing/nvgs_patterns/common.h rename to nvbit_tracing/nvgs_trace/common.h diff --git a/nvbit_tracing/nvgs_patterns/inject_funcs.cu b/nvbit_tracing/nvgs_trace/inject_funcs.cu similarity index 100% rename from nvbit_tracing/nvgs_patterns/inject_funcs.cu rename to nvbit_tracing/nvgs_trace/inject_funcs.cu diff --git a/nvbit_tracing/nvgs_patterns/mem_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu similarity index 100% rename from nvbit_tracing/nvgs_patterns/mem_trace.cu rename to nvbit_tracing/nvgs_trace/nvgs_trace.cu From 92af6f5692c8a3e1085c6e4de9d756a1887e9ebc Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 11 Apr 2024 13:47:07 -0400 Subject: [PATCH 30/76] Add support for opcode_short_id, is_load, is_store and size from nvbit --- CMakeLists.txt | 1 + gsnv_patterns.h | 101 ++++++++++++++++++++++- nvbit_tracing/nvgs_trace/common.h | 4 + nvbit_tracing/nvgs_trace/inject_funcs.cu | 12 ++- nvbit_tracing/nvgs_trace/nvgs_trace.cu | 21 ++++- 5 files changed, 133 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ae8f1e..be1234a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ add_library(gs_patterns_core SHARED gs_patterns_core.cpp gsnv_patterns.h gsnv_test.cpp +# nv_opcodes.h ) add_executable( gs_patterns diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 30518c8..9e530a2 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -18,9 +18,9 @@ "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ << std::dec -//#include "common.h" +#include "nvbit_tracing/nvgs_trace/common.h" // Copying to redudce dependency on nvgs_patterns -#if 1 +#if 0 typedef struct { uint64_t grid_launch_id; int cta_id_x; @@ -28,6 +28,10 @@ typedef struct { int cta_id_z; int warp_id; int opcode_id; + int opcode_short_id; + int is_load; + int is_store; + int size; uint64_t addrs[32]; } mem_access_t; #endif @@ -108,6 +112,8 @@ class InstrAddrAdapterForNV : public InstrAddrAdapter class MemPatternsForNV : public MemPatterns { public: + static const uint8_t CTA_LENGTH = 32; + MemPatternsForNV(): _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), _ofs() @@ -199,6 +205,27 @@ class MemPatternsForNV : public MemPatterns throw GSDataError(ss.str()); } + // store opcode_short mappings + bool add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { + auto it = id_to_opcode_short_map.find(opcode_short_id); + if (it == id_to_opcode_short_map.end()) { + id_to_opcode_short_map[opcode_short_id] = opcode_short; + //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; + return true; + } + return false; + } + // retreive opcode_short mapping by opcode_short_id + const std::string & get_opcode_short(int opcode_short_id) { + auto result = id_to_opcode_short_map.find(opcode_short_id); + if (result != id_to_opcode_short_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown opcode_short_id: " << opcode_short_id; + throw GSDataError(ss.str()); + } + std::vector convert_to_trace_entry(const mem_access_t & ma) { // opcode : forms LD.E.64, ST.E.64 @@ -248,8 +275,73 @@ class MemPatternsForNV : public MemPatterns throw GSDataError("Unsupported opcode: " + opcode); } + // TODO: This is a SLOW way of doing this + std::vector te_list; + te_list.reserve(MemPatternsForNV::CTA_LENGTH); + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) + { + if (ma.addrs[i] != 0) + { + trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; + te_list.push_back(te); + } + } + return std::move(te_list); + } + + std::vector convert_to_trace_entry_2(const mem_access_t & ma) + { + // opcode : forms LD.E.64, ST.E.64 + + std::string mem_type; + std::string mem_attr; + uint16_t mem_size = 0; + int count = 0; + uint16_t mem_type_code = 0; + uint16_t mem_attr_code = 0; + + //const char * m = reinterpret_cast(&ma.opcode); + //const std::string opcode(m, 8); + std::string opcode = get_opcode(ma.opcode_id); + + size_t start=0, pos = 0; + while (std::string::npos != (pos = opcode.find(".", start))) + { + count++; + std::string token = opcode.substr(start, pos-start); + uint64_t s; + switch (count) + { + case 1: mem_type = token; + if ("LD" == mem_type) { mem_type_code = 0; } + else if ("ST" == mem_type) { mem_type_code = 1; } + else throw GSDataError ("Invalid mem_type must be LD(1) or ST(1)"); + break; + + case 2: mem_attr = token; + if ("E" == mem_attr) { mem_attr_code = 1; } + else { mem_attr_code = 2; } + break; + + default: + throw GSDataError("Unsupported opcode: " + opcode); + } + start = pos+1; + } + // Snag the rest as mem_size + if (start < opcode.length()) { + std::string token = opcode.substr(start, opcode.length()); + int s = atoi(token.c_str()); + mem_size = (uint16_t) s; + } + else { + throw GSDataError("Unsupported opcode: " + opcode); + } + + // TODO: This is a SLOW way of doing this std::vector te_list; - for (int i = 0; i < 32; i++) + te_list.reserve(MemPatternsForNV::CTA_LENGTH); + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { if (ma.addrs[i] != 0) { @@ -278,6 +370,7 @@ class MemPatternsForNV : public MemPatterns //std::map opcode_to_id_map; std::map id_to_opcode_map; + std::map id_to_opcode_short_map; }; @@ -489,7 +582,7 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) ss << "GSNV_PATTERNS: CTX " << " - grid_launch_id " << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) << " - "; - for (int i = 0; i < 32; i++) { + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { ss << HEX(ma->addrs[i]) << " "; } std::cout << ss.str() << std::endl; diff --git a/nvbit_tracing/nvgs_trace/common.h b/nvbit_tracing/nvgs_trace/common.h index 42bca3d..ec267b9 100644 --- a/nvbit_tracing/nvgs_trace/common.h +++ b/nvbit_tracing/nvgs_trace/common.h @@ -36,5 +36,9 @@ typedef struct { int cta_id_z; int warp_id; int opcode_id; + int opcode_short_id; + int is_load; + int is_store; + int size; uint64_t addrs[32]; } mem_access_t; diff --git a/nvbit_tracing/nvgs_trace/inject_funcs.cu b/nvbit_tracing/nvgs_trace/inject_funcs.cu index 895ee75..0477a9f 100644 --- a/nvbit_tracing/nvgs_trace/inject_funcs.cu +++ b/nvbit_tracing/nvgs_trace/inject_funcs.cu @@ -36,7 +36,12 @@ /* contains definition of the mem_access_t structure */ #include "common.h" -extern "C" __device__ __noinline__ void instrument_mem(int pred, int opcode_id, +extern "C" __device__ __noinline__ void instrument_mem(int pred, + int opcode_id, + int opcode_short_id, + int is_load, + int is_store, + int size, uint64_t addr, uint64_t grid_launch_id, uint64_t pchannel_dev) { @@ -63,6 +68,11 @@ extern "C" __device__ __noinline__ void instrument_mem(int pred, int opcode_id, ma.cta_id_z = cta.z; ma.warp_id = get_warpid(); ma.opcode_id = opcode_id; + ma.opcode_short_id = opcode_short_id; + ma.is_load = is_load; + ma.is_store = is_store; + ma.size = size; + /* first active lane pushes information on the channel */ if (first_laneid == laneid) { diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index bda59d5..8c6d3ce 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -85,6 +85,7 @@ int verbose = 0; /* opcode to id map and reverse map */ std::map opcode_to_id_map; std::map id_to_opcode_map; +std::map opcode_short_to_id_map; // Instantiate GSPatterns for NVBit std::unique_ptr mp(new MemPatternsForNV); @@ -169,7 +170,15 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { int opcode_id = opcode_to_id_map[instr->getOpcode()]; + if (opcode_short_to_id_map.find(instr->getOpcodeShort()) == opcode_short_to_id_map.end()) { + int opcode_short_id = opcode_short_to_id_map.size(); + opcode_short_to_id_map[instr->getOpcodeShort()] = opcode_short_id; + //id_to_opcode_map[opcode_id] = std::string(instr->getOpcode()); + } + int opcode_short_id = opcode_short_to_id_map[instr->getOpcodeShort()]; + mp->add_or_update_opcode(opcode_id, instr->getOpcode()); + mp->add_or_update_opcode_short(opcode_short_id, instr->getOpcodeShort()); int mref_idx = 0; /* iterate on the operands */ @@ -185,6 +194,16 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { nvbit_add_call_arg_guard_pred_val(instr); /* opcode id */ nvbit_add_call_arg_const_val32(instr, opcode_id); + + /* opcode short id */ + nvbit_add_call_arg_const_val32(instr, opcode_short_id); + /* isLoad */ + nvbit_add_call_arg_const_val32(instr, instr->isLoad()); + /* isStore */ + nvbit_add_call_arg_const_val32(instr, instr->isStore()); + /* size */ + nvbit_add_call_arg_const_val32(instr, instr->getSize()); + /* memory reference 64 bit address */ nvbit_add_call_arg_mref_addr64(instr, mref_idx); /* add "space" for kernel function pointer that will be set @@ -327,7 +346,7 @@ void* recv_thread_fun(void* args) { try { // Handle trace update here >> --- - mp->add_or_update_opcode(ma->opcode_id, id_to_opcode_map[ma->opcode_id]); + //mp->add_or_update_opcode(ma->opcode_id, id_to_opcode_map[ma->opcode_id]); mp->handle_cta_memory_access(ma); } catch (std::exception & ex) From 40136b098e7666b7eeb28123ee68c8c1faee0564 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 11 Apr 2024 15:04:20 -0400 Subject: [PATCH 31/76] Use size, opcode_short_id, is_load() & is_store() rather than parsing opcode. Fixes for size calculation and valid_gs_stride(). --- gsnv_patterns.h | 86 ++++++++------------------ gsnv_test.cpp | 3 + nvbit_tracing/nvgs_trace/nvgs_trace.cu | 1 - 3 files changed, 28 insertions(+), 62 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 9e530a2..22b544d 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -19,22 +19,6 @@ << std::dec #include "nvbit_tracing/nvgs_trace/common.h" -// Copying to redudce dependency on nvgs_patterns -#if 0 -typedef struct { - uint64_t grid_launch_id; - int cta_id_x; - int cta_id_y; - int cta_id_z; - int warp_id; - int opcode_id; - int opcode_short_id; - int is_load; - int is_store; - int size; - uint64_t addrs[32]; -} mem_access_t; -#endif struct _trace_entry_t { unsigned short type; // 2 bytes: trace_type_t @@ -98,7 +82,7 @@ class InstrAddrAdapterForNV : public InstrAddrAdapter virtual mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual size_t get_size() const override { return _te.size / 8; } // in bytes + virtual size_t get_size() const override { return _te.size; } // in bytes virtual addr_t get_address() const override { return _te.addr; } virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! @@ -131,6 +115,11 @@ class MemPatternsForNV : public MemPatterns for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { std::cout << "OPCODE: " << itr->first << " -> " << itr->second << std::endl; } + + std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; + for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { + std::cout << "OPCODE_SHORT: " << itr->first << " -> " << itr->second << std::endl; + } #endif } @@ -226,6 +215,7 @@ class MemPatternsForNV : public MemPatterns throw GSDataError(ss.str()); } +#if 0 std::vector convert_to_trace_entry(const mem_access_t & ma) { // opcode : forms LD.E.64, ST.E.64 @@ -252,7 +242,7 @@ class MemPatternsForNV : public MemPatterns case 1: mem_type = token; if ("LD" == mem_type) { mem_type_code = 0; } else if ("ST" == mem_type) { mem_type_code = 1; } - else throw GSDataError ("Invalid mem_type must be LD(1) or ST(1)"); + else throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); break; case 2: mem_attr = token; @@ -288,55 +278,27 @@ class MemPatternsForNV : public MemPatterns } return std::move(te_list); } +#endif - std::vector convert_to_trace_entry_2(const mem_access_t & ma) + std::vector convert_to_trace_entry(const mem_access_t & ma) { // opcode : forms LD.E.64, ST.E.64 - - std::string mem_type; - std::string mem_attr; - uint16_t mem_size = 0; - int count = 0; - uint16_t mem_type_code = 0; - uint16_t mem_attr_code = 0; + //std::string mem_type; + uint16_t mem_size = ma.size; + uint16_t mem_type_code; + //uint16_t mem_attr_code = 0; + + if (ma.is_load) + mem_type_code = GATHER; + else if (ma.is_store) + mem_type_code = SCATTER; + else + throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); //const char * m = reinterpret_cast(&ma.opcode); //const std::string opcode(m, 8); std::string opcode = get_opcode(ma.opcode_id); - - size_t start=0, pos = 0; - while (std::string::npos != (pos = opcode.find(".", start))) - { - count++; - std::string token = opcode.substr(start, pos-start); - uint64_t s; - switch (count) - { - case 1: mem_type = token; - if ("LD" == mem_type) { mem_type_code = 0; } - else if ("ST" == mem_type) { mem_type_code = 1; } - else throw GSDataError ("Invalid mem_type must be LD(1) or ST(1)"); - break; - - case 2: mem_attr = token; - if ("E" == mem_attr) { mem_attr_code = 1; } - else { mem_attr_code = 2; } - break; - - default: - throw GSDataError("Unsupported opcode: " + opcode); - } - start = pos+1; - } - // Snag the rest as mem_size - if (start < opcode.length()) { - std::string token = opcode.substr(start, opcode.length()); - int s = atoi(token.c_str()); - mem_size = (uint16_t) s; - } - else { - throw GSDataError("Unsupported opcode: " + opcode); - } + std::string opcode_short = get_opcode_short(ma.opcode_short_id); // TODO: This is a SLOW way of doing this std::vector te_list; @@ -589,7 +551,7 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) #endif std::vector te_list = convert_to_trace_entry(*ma); - uint64_t min_size = !te_list.empty() ? (te_list[0].size / 8) + 1 : 0; + uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; if (valid_gs_stride(te_list, min_size)) { for (auto it = te_list.begin(); it != te_list.end(); it++) @@ -617,6 +579,8 @@ bool MemPatternsForNV::valid_gs_stride(const std::vector & te_lis uint64_t diff = std::labs (last_addr - (uint64_t)te.addr); if (diff < min_stride_found) min_stride_found = diff; + + last_addr = te.addr; } return min_stride_found >= min_stride; diff --git a/gsnv_test.cpp b/gsnv_test.cpp index e4aa390..5bb0578 100644 --- a/gsnv_test.cpp +++ b/gsnv_test.cpp @@ -20,6 +20,9 @@ int main(int argc, char **argv) mp.add_or_update_opcode(0, "LD.E.64"); mp.add_or_update_opcode(1, "ST.E.64"); + mp.add_or_update_opcode_short(0, "LD"); + mp.add_or_update_opcode_short(1, "ST"); + mp.process_traces(); // ----------------- Generate Patterns ----------------- diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index 8c6d3ce..f8136bf 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -91,7 +91,6 @@ std::map opcode_short_to_id_map; std::unique_ptr mp(new MemPatternsForNV); - /* grid launch id, incremented at every launch */ uint64_t grid_launch_id = 0; From abddae8c5f806267e61aa018f9ca10c747f14b73 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 11 Apr 2024 18:48:51 -0400 Subject: [PATCH 32/76] Ignore memory accesses w/ partial warps --- CMakeLists.txt | 4 ++-- gsnv_patterns.h | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be1234a..9c3e908 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,10 +22,10 @@ add_library(gs_patterns_core SHARED add_executable( gs_patterns gspin_patterns.cpp) -add_executable(test gsnv_test.cpp) +add_executable(gsnv_test gsnv_test.cpp) target_link_libraries(gs_patterns gs_patterns_core) -target_link_libraries(test gs_patterns_core) +target_link_libraries(gsnv_test gs_patterns_core) set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 22b544d..5009568 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -280,7 +280,7 @@ class MemPatternsForNV : public MemPatterns } #endif - std::vector convert_to_trace_entry(const mem_access_t & ma) + std::vector convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps) { // opcode : forms LD.E.64, ST.E.64 //std::string mem_type; @@ -310,6 +310,11 @@ class MemPatternsForNV : public MemPatterns trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; te_list.push_back(te); } + else if (ignore_partial_warps) + { + // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH + return std::vector(); + } } return std::move(te_list); } @@ -541,18 +546,22 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) #if 0 std::stringstream ss; //ss << "CTX " << HEX(ctx) << " - grid_launch_id " - ss << "GSNV_PATTERNS: CTX " << " - grid_launch_id " + ss << "GSNV_TRACE: CTX " << " - grid_launch_id " << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z - << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) << " - "; + << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) + << " - shortOpcode: " << ma->opcode_short_id + << " isLoad: " << ma->is_load << " isStore: " << ma->is_store << " - "; + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { ss << HEX(ma->addrs[i]) << " "; } std::cout << ss.str() << std::endl; #endif - std::vector te_list = convert_to_trace_entry(*ma); + // Convert to vector of trace_entry_t if full warp. ignore partial warps. + std::vector te_list = convert_to_trace_entry(*ma, true); uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; - if (valid_gs_stride(te_list, min_size)) + if (min_size > 0 && valid_gs_stride(te_list, min_size)) { for (auto it = te_list.begin(); it != te_list.end(); it++) { From bdb3798ae6fd78e94f58f9df42261822f4c66238 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 12 Apr 2024 18:05:52 -0400 Subject: [PATCH 33/76] Exception handling around instrumental calls, log size --- gsnv_patterns.h | 3 +- nvbit_tracing/nvgs_trace/nvgs_trace.cu | 38 ++++++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 5009568..ac296bf 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -550,7 +550,8 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) << " - shortOpcode: " << ma->opcode_short_id - << " isLoad: " << ma->is_load << " isStore: " << ma->is_store << " - "; + << " isLoad: " << ma->is_load << " isStore: " << ma->is_store + << " size: " << ma->size << " - "; for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { ss << HEX(ma->addrs[i]) << " "; diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index f8136bf..d8346a7 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -286,11 +286,6 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY, p->blockDimZ, nregs, shmem_static_nbytes + p->sharedMemBytes, (uint64_t)p->hStream); - - // Dont add NVBit here - //trace_entry_t te { 1, 0, 0 }; - //mp->handle_trace_entry(InstrAddrAdapterForNV( te )); - } } skip_callback_flag = false; @@ -344,17 +339,13 @@ void* recv_thread_fun(void* args) { try { - // Handle trace update here >> --- - //mp->add_or_update_opcode(ma->opcode_id, id_to_opcode_map[ma->opcode_id]); + // Handle trace update here mp->handle_cta_memory_access(ma); } - catch (std::exception & ex) + catch (const std::exception & ex) { std::cerr << "ERROR: " << ex.what() << std::endl; } - - // << ---------------------------- - } } } @@ -377,10 +368,17 @@ void nvbit_at_ctx_init(CUcontext ctx) { pthread_mutex_unlock(&mutex); // -- init #2 - whats the difference - /// TODO: pull from env variables and set - if (1) + try + { + /// TODO: pull from env variables and set + if (1) { + mp->set_trace_out_file("./trace_file.nvbit"); + } + mp->set_file_prefix("prog_bin"); + } + catch (const exception & ex) { - mp->set_trace_out_file("./trace_file.nvbit"); + cerr << "ERROR: " << ex.what() << endl; } } @@ -406,7 +404,13 @@ void nvbit_at_ctx_term(CUcontext ctx) { delete ctx_state; pthread_mutex_unlock(&mutex); - // Generate GS Pattern output fle - mp->generate_patterns(); - + try + { + // Generate GS Pattern output fle + mp->generate_patterns(); + } + catch (const exception & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + } } From 6ce7e1fc969d01d7ce28d08f7986bcec12f22116 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 11 Apr 2024 22:27:35 -0400 Subject: [PATCH 34/76] Fix duplicate printf --- gs_patterns.h | 12 ++++++------ gs_patterns_core.cpp | 4 ++-- gsnv_patterns.h | 17 ++++++++++++----- nvbit_tracing/nvgs_trace/nvgs_trace.cu | 10 +++++----- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 3a7f8bc..51994c9 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -176,12 +176,12 @@ class TraceInfo // Stats public: /// TODO: need a reset method to zero out counters - uint64_t opcodes = 0; - uint64_t opcodes_mem = 0; - uint64_t addrs = 0; - uint64_t other = 0; - int64_t ngs = 0; - int64_t drtrace_lines = 0; + uint64_t opcodes = 0; + uint64_t opcodes_mem = 0; + uint64_t addrs = 0; + uint64_t other = 0; + int64_t ngs = 0; + int64_t trace_lines = 0; bool did_opcode = false; // revist this --------------- double other_cnt = 0.0; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 9aa0e3a..3ee633f 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -381,7 +381,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) trace_info.other++; } - trace_info.drtrace_lines++; + trace_info.trace_lines++; } void display_stats(MemPatterns & mp) @@ -389,7 +389,7 @@ void display_stats(MemPatterns & mp) printf("\n RESULTS \n"); printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().drtrace_lines); + printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().trace_lines); printf("OPCODES: %16lu\n", mp.get_trace_info().opcodes); printf("MEMOPCODES: %16lu\n", mp.get_trace_info().opcodes_mem); printf("LOAD/STORES: %16lu\n", mp.get_trace_info().addrs); diff --git a/gsnv_patterns.h b/gsnv_patterns.h index ac296bf..3fda1e8 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -276,7 +276,7 @@ class MemPatternsForNV : public MemPatterns te_list.push_back(te); } } - return std::move(te_list); + return te_list; } #endif @@ -316,7 +316,7 @@ class MemPatternsForNV : public MemPatterns return std::vector(); } } - return std::move(te_list); + return te_list; } private: @@ -332,6 +332,7 @@ class MemPatternsForNV : public MemPatterns std::string _trace_out_file_name; bool _write_trace_file = false; + bool _first_access = true; std::ofstream _ofs; std::vector _traces; @@ -426,7 +427,7 @@ std::string MemPatternsForNV::get_file_prefix() return prefix; } -// First Pass +// First Pass - Used by gsnv_test using a trace file void MemPatternsForNV::process_traces() { int iret = 0; @@ -435,8 +436,8 @@ void MemPatternsForNV::process_traces() gzFile fp_trace = open_trace_file(get_trace_file_name()); - printf("First pass to find top gather / scatter iaddresses\n"); - fflush(stdout); + //printf("First pass to find top gather / scatter iaddresses\n"); + //fflush(stdout); mem_access_t * p_trace = NULL; mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) @@ -539,6 +540,12 @@ void MemPatternsForNV::process_second_pass() void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { + if (_first_access) { + _first_access = false; + printf("First pass to find top gather / scatter iaddresses\n"); + fflush(stdout); + } + if (_write_trace_file && _ofs.is_open()) { // Write entry to trace_output file _ofs.write(reinterpret_cast(ma), sizeof *ma); diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index d8346a7..ae79db6 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -143,7 +143,7 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { if (verbose) { printf( - "MEMTRACE: CTX %p, Inspecting CUfunction %p name %s at address " + "NVGS_TRACE: CTX %p, Inspecting CUfunction %p name %s at address " "0x%lx\n", ctx, f, nvbit_get_func_name(ctx, f), nvbit_get_func_addr(f)); } @@ -279,7 +279,7 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, nvbit_enable_instrumented(ctx, p->f, true); printf( - "MEMTRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - Kernel " + "NVGS_TRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - Kernel " "name %s - grid launch id %ld - grid size %d,%d,%d - block " "size %d,%d,%d - nregs %d - shmem %d - cuda stream id %ld\n", (uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX, @@ -334,7 +334,7 @@ void* recv_thread_fun(void* args) { ss << HEX(ma->addrs[i]) << " "; } - //printf("MEMTRACE: %s\n", ss.str().c_str()); + //printf("NVGS_TRACE: %s\n", ss.str().c_str()); num_processed_bytes += sizeof(mem_access_t); try @@ -356,7 +356,7 @@ void* recv_thread_fun(void* args) { void nvbit_at_ctx_init(CUcontext ctx) { pthread_mutex_lock(&mutex); if (verbose) { - printf("MEMTRACE: STARTING CONTEXT %p\n", ctx); + printf("NVGS_TRACE: STARTING CONTEXT %p\n", ctx); } CTXstate* ctx_state = new CTXstate; assert(ctx_state_map.find(ctx) == ctx_state_map.end()); @@ -386,7 +386,7 @@ void nvbit_at_ctx_term(CUcontext ctx) { pthread_mutex_lock(&mutex); skip_callback_flag = true; if (verbose) { - printf("MEMTRACE: TERMINATING CONTEXT %p\n", ctx); + printf("NVGS_TRACE: TERMINATING CONTEXT %p\n", ctx); } /* get context state from map */ assert(ctx_state_map.find(ctx) != ctx_state_map.end()); From 00abeed88ed7bc11322f60939218cabc959ea278 Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 13 Apr 2024 00:34:31 -0400 Subject: [PATCH 35/76] Support for reading and writing out trace files that includes headers. The headers provides opcode_id to opcode mapping. Can be extended for other types of mappings numerical to string mappings. --- gsnv_patterns.h | 495 ++++++++++++++++++++++++++++++------------------ gsnv_test.cpp | 5 + 2 files changed, 316 insertions(+), 184 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 3fda1e8..1576d3b 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -1,14 +1,17 @@ #pragma once +#include #include #include #include #include -#include #include + +#include #include #include +#include #include "gs_patterns.h" #include "gs_patterns_core.h" @@ -30,6 +33,25 @@ struct _trace_entry_t { } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; +#define MAP_NAME_SIZE 24 +#define MAP_VALUE_SIZE 22 + +struct _trace_map_entry_t +{ + // 32 bytes total + char map_name[MAP_NAME_SIZE]; + uint16_t id; + char val[MAP_VALUE_SIZE]; +}; +typedef struct _trace_map_entry_t trace_map_entry_t; + +struct _trace_header_t { + uint64_t num_map_entires; + uint64_t num_maps; +}; +typedef struct _trace_header_t trace_header_t; + + gzFile open_trace_file(const std::string & trace_file_name) { gzFile fp; @@ -46,6 +68,48 @@ void close_trace_file (gzFile & fp) gzclose(fp); } +int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_entry_t); + //first read + if (NULL == *p_val) { + *edx = gzread(fp, val, sizeof(trace_header_t)); + *p_val = val; + } + else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_header_t)); + *p_val = val; + } + + if (0 == *edx) + return 0; + + return 1; +} + +int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_map_entry_t); + //first read + if (NULL == *p_val) { + *edx = gzread(fp, val, sizeof(trace_map_entry_t)); + *p_val = val; + } + else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_map_entry_t)); + *p_val = val; + } + + if (0 == *edx) + return 0; + + return 1; +} + int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { int idx; @@ -97,31 +161,14 @@ class MemPatternsForNV : public MemPatterns { public: static const uint8_t CTA_LENGTH = 32; + static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; + static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; MemPatternsForNV(): _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), - _ofs() - { } + _ofs_tmp() { } - virtual ~MemPatternsForNV() override { - if (_write_trace_file) { - _ofs.flush(); - _ofs.close(); - } - - /// TODO: COMPRESS trace_file on exit -#if 1 - std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; - for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { - std::cout << "OPCODE: " << itr->first << " -> " << itr->second << std::endl; - } - - std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; - for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { - std::cout << "OPCODE_SHORT: " << itr->first << " -> " << itr->second << std::endl; - } -#endif - } + virtual ~MemPatternsForNV() override ; void handle_trace_entry(const InstrAddrAdapter & ia) override; void generate_patterns() override; @@ -136,7 +183,7 @@ class MemPatternsForNV : public MemPatterns TraceInfo & get_trace_info() override { return _trace_info; } InstrWindow & get_instr_window() override { return _iw; } - void set_trace_file(const std::string & trace_file_name) { _trace_file_name = trace_file_name; } + void set_trace_file(const std::string & trace_file_name); const std::string & get_trace_file_name() { return _trace_file_name; } void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } @@ -154,19 +201,8 @@ class MemPatternsForNV : public MemPatterns double update_source_lines_from_binary(mem_access_type); void process_second_pass(); - void set_trace_out_file(const std::string & trace_file_name) { - _trace_out_file_name = trace_file_name; - - try - { - _ofs.open(trace_file_name, std::ios::binary); - if (_ofs.is_open()) _write_trace_file = true; - } - catch (...) - { - throw GSFileError("Unable to open " + trace_file_name + " for writing"); - } - } + void set_trace_out_file(const std::string & trace_file_name); + void write_trace_out_file(); // Handle an nvbit CTA memory update void handle_cta_memory_access(const mem_access_t * ma); @@ -174,150 +210,15 @@ class MemPatternsForNV : public MemPatterns bool valid_gs_stride(const std::vector & te_list, const uint32_t min_stride); // store opcode mappings - bool add_or_update_opcode(int opcode_id, const std::string & opcode) { - auto it = id_to_opcode_map.find(opcode_id); - if (it == id_to_opcode_map.end()) { - id_to_opcode_map[opcode_id] = opcode; - //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; - return true; - } - return false; - } + bool add_or_update_opcode(int opcode_id, const std::string & opcode); // retreive opcode mapping by opcode_id - const std::string & get_opcode(int opcode_id) { - auto result = id_to_opcode_map.find(opcode_id); - if (result != id_to_opcode_map.end()) { - return result->second; - } - std::stringstream ss; - ss << "Unknown opcode_id: " << opcode_id; - throw GSDataError(ss.str()); - } - + const std::string & get_opcode(int opcode_id); // store opcode_short mappings - bool add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { - auto it = id_to_opcode_short_map.find(opcode_short_id); - if (it == id_to_opcode_short_map.end()) { - id_to_opcode_short_map[opcode_short_id] = opcode_short; - //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; - return true; - } - return false; - } + bool add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short); // retreive opcode_short mapping by opcode_short_id - const std::string & get_opcode_short(int opcode_short_id) { - auto result = id_to_opcode_short_map.find(opcode_short_id); - if (result != id_to_opcode_short_map.end()) { - return result->second; - } - std::stringstream ss; - ss << "Unknown opcode_short_id: " << opcode_short_id; - throw GSDataError(ss.str()); - } - -#if 0 - std::vector convert_to_trace_entry(const mem_access_t & ma) - { - // opcode : forms LD.E.64, ST.E.64 - - std::string mem_type; - std::string mem_attr; - uint16_t mem_size = 0; - int count = 0; - uint16_t mem_type_code = 0; - uint16_t mem_attr_code = 0; - - //const char * m = reinterpret_cast(&ma.opcode); - //const std::string opcode(m, 8); - std::string opcode = get_opcode(ma.opcode_id); - - size_t start=0, pos = 0; - while (std::string::npos != (pos = opcode.find(".", start))) - { - count++; - std::string token = opcode.substr(start, pos-start); - uint64_t s; - switch (count) - { - case 1: mem_type = token; - if ("LD" == mem_type) { mem_type_code = 0; } - else if ("ST" == mem_type) { mem_type_code = 1; } - else throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); - break; - - case 2: mem_attr = token; - if ("E" == mem_attr) { mem_attr_code = 1; } - else { mem_attr_code = 2; } - break; - - default: - throw GSDataError("Unsupported opcode: " + opcode); - } - start = pos+1; - } - // Snag the rest as mem_size - if (start < opcode.length()) { - std::string token = opcode.substr(start, opcode.length()); - int s = atoi(token.c_str()); - mem_size = (uint16_t) s; - } - else { - throw GSDataError("Unsupported opcode: " + opcode); - } - - // TODO: This is a SLOW way of doing this - std::vector te_list; - te_list.reserve(MemPatternsForNV::CTA_LENGTH); - for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) - { - if (ma.addrs[i] != 0) - { - trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; - te_list.push_back(te); - } - } - return te_list; - } -#endif + const std::string & get_opcode_short(int opcode_short_id); - std::vector convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps) - { - // opcode : forms LD.E.64, ST.E.64 - //std::string mem_type; - uint16_t mem_size = ma.size; - uint16_t mem_type_code; - //uint16_t mem_attr_code = 0; - - if (ma.is_load) - mem_type_code = GATHER; - else if (ma.is_store) - mem_type_code = SCATTER; - else - throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); - - //const char * m = reinterpret_cast(&ma.opcode); - //const std::string opcode(m, 8); - std::string opcode = get_opcode(ma.opcode_id); - std::string opcode_short = get_opcode_short(ma.opcode_short_id); - - // TODO: This is a SLOW way of doing this - std::vector te_list; - te_list.reserve(MemPatternsForNV::CTA_LENGTH); - for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) - { - if (ma.addrs[i] != 0) - { - trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; - te_list.push_back(te); - } - else if (ignore_partial_warps) - { - // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH - return std::vector(); - } - } - return te_list; - } + std::vector convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps); private: @@ -331,16 +232,26 @@ class MemPatternsForNV : public MemPatterns std::string _file_prefix; std::string _trace_out_file_name; + std::string _tmp_trace_out_file_name; + bool _write_trace_file = false; bool _first_access = true; + std::ofstream _ofs_tmp; std::ofstream _ofs; std::vector _traces; - //std::map opcode_to_id_map; std::map id_to_opcode_map; std::map id_to_opcode_short_map; }; +MemPatternsForNV::~MemPatternsForNV() +{ + if (_write_trace_file) + { + write_trace_out_file(); + /// TODO: COMPRESS trace_file on exit + } +} Metrics & MemPatternsForNV::get_metrics(mem_access_type m) { @@ -427,7 +338,54 @@ std::string MemPatternsForNV::get_file_prefix() return prefix; } -// First Pass - Used by gsnv_test using a trace file +// store opcode mappings +bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) { + auto it = id_to_opcode_map.find(opcode_id); + if (it == id_to_opcode_map.end()) { + id_to_opcode_map[opcode_id] = opcode; + //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; + return true; + } + return false; +} + +// retreive opcode mapping by opcode_id +const std::string & MemPatternsForNV::get_opcode(int opcode_id) { + auto result = id_to_opcode_map.find(opcode_id); + if (result != id_to_opcode_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown opcode_id: " << opcode_id; + throw GSDataError(ss.str()); +} + +// store opcode_short mappings +bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { + auto it = id_to_opcode_short_map.find(opcode_short_id); + if (it == id_to_opcode_short_map.end()) { + id_to_opcode_short_map[opcode_short_id] = opcode_short; + //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; + return true; + } + return false; +} + +// retreive opcode_short mapping by opcode_short_id +const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { + auto result = id_to_opcode_short_map.find(opcode_short_id); + if (result != id_to_opcode_short_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown opcode_short_id: " << opcode_short_id; + throw GSDataError(ss.str()); +} + +/* + * Read traces from a nvbit trace file. Includes header which describes opcode mappings used in trace data. + * Used by test runner (gsnv_test) to simulate nvbit execution. + */ void MemPatternsForNV::process_traces() { int iret = 0; @@ -436,13 +394,37 @@ void MemPatternsForNV::process_traces() gzFile fp_trace = open_trace_file(get_trace_file_name()); - //printf("First pass to find top gather / scatter iaddresses\n"); - //fflush(stdout); + // Read header ** + trace_header_t * p_header = NULL; + trace_header_t header[1]; + tline_read_header(fp_trace, header, &p_header, &iret); + uint32_t count = 0; + trace_map_entry_t * p_map_entry = NULL; + trace_map_entry_t map_entry[1]; + while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) + { + // std::cout << "MAP ENTRY: " << p_map_entry -> map_name << " " << p_map_entry->id << " -> " << p_map_entry->val << std::endl; + if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { + id_to_opcode_map[p_map_entry->id] = p_map_entry->val; + } + else if (std::string(p_map_entry->map_name) == ID_TO_OPCODE_SHORT) { + id_to_opcode_short_map[p_map_entry->id] = p_map_entry->val; + } + else { + std::cerr << "Unsupported Map: " << p_map_entry->map_name << " found in trace, ignoring ..." + << p_map_entry->id << " -> " << p_map_entry->val << std::endl; + } + + count++; + p_map_entry++; + } + + // Read Traces ** mem_access_t * p_trace = NULL; mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) - - while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) { + while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) + { //decode drtrace t_line = p_trace; @@ -538,6 +520,45 @@ void MemPatternsForNV::process_second_pass() } } +std::vector MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps) +{ + // opcode : forms LD.E.64, ST.E.64 + //std::string mem_type; + uint16_t mem_size = ma.size; + uint16_t mem_type_code; + //uint16_t mem_attr_code = 0; + + if (ma.is_load) + mem_type_code = GATHER; + else if (ma.is_store) + mem_type_code = SCATTER; + else + throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); + + //const char * m = reinterpret_cast(&ma.opcode); + //const std::string opcode(m, 8); + std::string opcode = get_opcode(ma.opcode_id); + std::string opcode_short = get_opcode_short(ma.opcode_short_id); + + // TODO: This is a SLOW way of doing this + std::vector te_list; + te_list.reserve(MemPatternsForNV::CTA_LENGTH); + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) + { + if (ma.addrs[i] != 0) + { + trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; + te_list.push_back(te); + } + else if (ignore_partial_warps) + { + // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH + return std::vector(); + } + } + return te_list; +} + void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { if (_first_access) { @@ -546,9 +567,9 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) fflush(stdout); } - if (_write_trace_file && _ofs.is_open()) { + if (_write_trace_file && _ofs_tmp.is_open()) { // Write entry to trace_output file - _ofs.write(reinterpret_cast(ma), sizeof *ma); + _ofs_tmp.write(reinterpret_cast(ma), sizeof *ma); } #if 0 std::stringstream ss; @@ -601,4 +622,110 @@ bool MemPatternsForNV::valid_gs_stride(const std::vector & te_lis } return min_stride_found >= min_stride; +} + + +void MemPatternsForNV::set_trace_file(const std::string & trace_file_name) +{ + if (trace_file_name == _trace_out_file_name) { + throw GSError ("Cannot set trace input file to same name as trace output file [" + trace_file_name + "]."); + } + + _trace_file_name = trace_file_name; +} + +void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_name) +{ + try + { + if (trace_out_file_name == _trace_file_name) { + throw GSError ("Cannot set trace output file to same name as trace input file [" + trace_out_file_name + "]."); + } + + _trace_out_file_name = trace_out_file_name; + _tmp_trace_out_file_name = _trace_out_file_name + ".tmp"; + + // Open a temp file for writing data + _ofs_tmp.open(_tmp_trace_out_file_name, std::ios::binary | std::ios::trunc | std::ios::in); + if (!_ofs_tmp.is_open()) { + throw GSFileError("Unable to open " + _tmp_trace_out_file_name + " for writing"); + } + + // Open a ouput file for writing data header and appending data + _ofs.open(_trace_out_file_name, std::ios::binary | std::ios::trunc); + if (!_ofs.is_open()) { + throw GSFileError("Unable to open " + _trace_out_file_name + " for writing"); + } + _write_trace_file = true; + } + catch (const std::exception & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + throw; + } +} + +void MemPatternsForNV:: write_trace_out_file() +{ + if (!_write_trace_file) return; + + try + { + _ofs_tmp.flush(); + + // Write header + trace_header_t header; + header.num_maps = 2; + header.num_map_entires = id_to_opcode_map.size() + id_to_opcode_short_map.size(); + _ofs.write(reinterpret_cast(&header), sizeof header); + + // Write Maps + trace_map_entry_t m_entry; + strncpy(m_entry.map_name, "ID_TO_OPCODE", MAP_NAME_SIZE); + for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) + { + m_entry.id = itr->first; + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_SIZE); // write 22 chars + _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); + } + + strncpy(m_entry.map_name, "ID_TO_OPCODE_SHORT", MAP_NAME_SIZE); + //uint64_t opcode_short_len = id_to_opcode_short_map.size(); + for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) + { + m_entry.id = itr->first; + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_SIZE); // write 22 chars + _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); + } + + // Write file contents + _ofs_tmp.close(); + std::ifstream ifs(_tmp_trace_out_file_name); + if (!ifs.is_open()) { + throw GSFileError("Unable to open " + _tmp_trace_out_file_name + " for reading"); + } + + _ofs.flush(); + _ofs << ifs.rdbuf(); + _ofs.flush(); + _ofs.close(); + ifs.close(); + + std::remove(_tmp_trace_out_file_name.c_str()); + + std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; + for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } + + std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; + for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } + } + catch (const std::exception & ex) + { + std::cerr << "ERROR: failed to write trace file: " << _trace_file_name << std::endl; + throw; + } } \ No newline at end of file diff --git a/gsnv_test.cpp b/gsnv_test.cpp index 5bb0578..3bacd05 100644 --- a/gsnv_test.cpp +++ b/gsnv_test.cpp @@ -12,9 +12,14 @@ int main(int argc, char **argv) MemPatternsForNV mp; + // nvbit trace file with memory access traces mp.set_trace_file(argv[1]); + // Executable file built with debug symbols mp.set_binary_file(argv[2]); + // File to save nvbit memory accessses to + //mp.set_trace_out_file(mp.get_file_prefix() + ".nvbit.bin"); + // ----------------- Process Traces ----------------- mp.add_or_update_opcode(0, "LD.E.64"); From de76408e06fe5a481500db3cd5e4914c9be17b9e Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 13 Apr 2024 02:34:02 -0400 Subject: [PATCH 36/76] fixes. --- nvbit_tracing/nvgs_trace/nvgs_trace.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index ae79db6..83105f8 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -376,9 +376,9 @@ void nvbit_at_ctx_init(CUcontext ctx) { } mp->set_file_prefix("prog_bin"); } - catch (const exception & ex) + catch (const std::exception & ex) { - cerr << "ERROR: " << ex.what() << endl; + std::cerr << "ERROR: " << ex.what() << std::endl; } } @@ -409,7 +409,7 @@ void nvbit_at_ctx_term(CUcontext ctx) { // Generate GS Pattern output fle mp->generate_patterns(); } - catch (const exception & ex) + catch (const std::exception & ex) { std::cerr << "ERROR: " << ex.what() << std::endl; } From d9a89e7faffcff378d1d382b1cb26a1473bb41b9 Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 13 Apr 2024 17:12:56 -0400 Subject: [PATCH 37/76] Config file can be used to set kernel, trace file and program binary, use ful when run under nvbit. NVBit tool now uses MemPattersForNV::should_instrument() to determine which kernel to instrument (does all if none specified in config) --- gsnv_patterns.h | 79 +++++++++++++++++++++++--- gsnv_test.cpp | 13 +++-- nvbit_tracing/nvgs_trace/nvgs_trace.cu | 34 ++++++----- 3 files changed, 97 insertions(+), 29 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 1576d3b..2ce94ec 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -161,8 +162,13 @@ class MemPatternsForNV : public MemPatterns { public: static const uint8_t CTA_LENGTH = 32; - static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; - static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; + + static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; + static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; + + static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; + static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; + static constexpr const char * NVGS_PROGRAM_BINARY = "NVGS_PROGRAM_BINARY"; MemPatternsForNV(): _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), @@ -192,6 +198,8 @@ class MemPatternsForNV : public MemPatterns void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); + void set_config_file (const std::string & config_file); + void update_metrics(); std::string get_trace_file_prefix (); @@ -218,6 +226,8 @@ class MemPatternsForNV : public MemPatterns // retreive opcode_short mapping by opcode_short_id const std::string & get_opcode_short(int opcode_short_id); + bool should_instrument(const std::string & kernel_name); + std::vector convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps); private: @@ -230,9 +240,10 @@ class MemPatternsForNV : public MemPatterns std::string _trace_file_name; std::string _binary_file_name; std::string _file_prefix; - std::string _trace_out_file_name; std::string _tmp_trace_out_file_name; + std::string _config_file_name; + std::set _target_kernels; bool _write_trace_file = false; bool _first_access = true; @@ -404,7 +415,7 @@ void MemPatternsForNV::process_traces() trace_map_entry_t map_entry[1]; while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) { - // std::cout << "MAP ENTRY: " << p_map_entry -> map_name << " " << p_map_entry->id << " -> " << p_map_entry->val << std::endl; + std::cout << "MAP ENTRY: " << p_map_entry -> map_name << " " << p_map_entry->id << " -> " << p_map_entry->val << std::endl; if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { id_to_opcode_map[p_map_entry->id] = p_map_entry->val; } @@ -499,7 +510,6 @@ void MemPatternsForNV::process_second_pass() { uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. int iret = 0; -// trace_entry_t *drline; // State carried thru addr_t iaddr; @@ -681,7 +691,7 @@ void MemPatternsForNV:: write_trace_out_file() // Write Maps trace_map_entry_t m_entry; - strncpy(m_entry.map_name, "ID_TO_OPCODE", MAP_NAME_SIZE); + strncpy(m_entry.map_name, ID_TO_OPCODE, MAP_NAME_SIZE); for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { m_entry.id = itr->first; @@ -689,7 +699,7 @@ void MemPatternsForNV:: write_trace_out_file() _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); } - strncpy(m_entry.map_name, "ID_TO_OPCODE_SHORT", MAP_NAME_SIZE); + strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE); //uint64_t opcode_short_len = id_to_opcode_short_map.size(); for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { @@ -728,4 +738,57 @@ void MemPatternsForNV:: write_trace_out_file() std::cerr << "ERROR: failed to write trace file: " << _trace_file_name << std::endl; throw; } -} \ No newline at end of file +} + +void MemPatternsForNV::set_config_file(const std::string & config_file) +{ + _config_file_name = config_file; + std::ifstream ifs; + ifs.open(_config_file_name); + if (!ifs.is_open()) + throw GSFileError("Unable to open config file: " + _config_file_name); + + while (!ifs.eof()) + { + std::string name; + std::string value; + ifs >> name >> value; + if (name.empty() || value.empty()) + continue; + + std::cout << "CONFIG: name: " << name << " value: " << value << std::endl; + + if (NVGS_TARGET_KERNEL == name) { + _target_kernels.insert(value); + } + else if (NVGS_TRACE_OUT_FILE == name) + { + set_trace_out_file(value); + } + else if (NVGS_PROGRAM_BINARY == name) { + set_binary_file(value); + } + else { + std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " + << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; + } + } +} + +bool MemPatternsForNV::should_instrument(const std::string & kernel_name) +{ + // Instrument all if none specified + if (_target_kernels.size() == 0) { + std::cout << "Instrumenting all : " << kernel_name << std::endl; + return true; + } + + auto itr = _target_kernels.find (kernel_name); + if ( itr != _target_kernels.end()) // Hard code for now + { + std::cout << "Instrumenting: " << kernel_name << std::endl; + return true; + } + + return false; +} diff --git a/gsnv_test.cpp b/gsnv_test.cpp index 3bacd05..b6d0931 100644 --- a/gsnv_test.cpp +++ b/gsnv_test.cpp @@ -1,4 +1,6 @@ +#include + #include "gs_patterns.h" #include "gsnv_patterns.h" @@ -17,17 +19,16 @@ int main(int argc, char **argv) // Executable file built with debug symbols mp.set_binary_file(argv[2]); + const char * config_file = std::getenv("NVGS_CONFIG_FILE"); + if (config_file) { + mp.set_config_file(config_file); + } + // File to save nvbit memory accessses to //mp.set_trace_out_file(mp.get_file_prefix() + ".nvbit.bin"); // ----------------- Process Traces ----------------- - mp.add_or_update_opcode(0, "LD.E.64"); - mp.add_or_update_opcode(1, "ST.E.64"); - - mp.add_or_update_opcode_short(0, "LD"); - mp.add_or_update_opcode_short(1, "ST"); - mp.process_traces(); // ----------------- Generate Patterns ----------------- diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index ae79db6..9715a2a 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -81,6 +81,7 @@ bool skip_callback_flag = false; uint32_t instr_begin_interval = 0; uint32_t instr_end_interval = UINT32_MAX; int verbose = 0; +std::string nvgs_config_file; /* opcode to id map and reverse map */ std::map opcode_to_id_map; @@ -103,6 +104,9 @@ void nvbit_at_init() { instr_end_interval, "INSTR_END", UINT32_MAX, "End of the instruction interval where to apply instrumentation"); GET_VAR_INT(verbose, "TOOL_VERBOSE", 0, "Enable verbosity inside the tool"); + + GET_VAR_STR(nvgs_config_file, "NVGS_CONFIG_FILE", "Specify a NVGS config file"); + std::string pad(100, '-'); printf("%s\n", pad.c_str()); @@ -253,7 +257,8 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, cudaDeviceSynchronize(); assert(cudaGetLastError() == cudaSuccess); - if (!is_exit) { + if (!is_exit && mp->should_instrument(nvbit_get_func_name(ctx, p->f))) + { /* instrument */ instrument_function_if_needed(ctx, p->f); @@ -322,7 +327,7 @@ void* recv_thread_fun(void* args) { done = true; break; } - +#if 0 std::stringstream ss; ss << "CTX " << HEX(ctx) << " - grid_launch_id " << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," @@ -334,7 +339,8 @@ void* recv_thread_fun(void* args) { ss << HEX(ma->addrs[i]) << " "; } - //printf("NVGS_TRACE: %s\n", ss.str().c_str()); + printf("NVGS_TRACE: %s\n", ss.str().c_str()); +#endif num_processed_bytes += sizeof(mem_access_t); try @@ -355,7 +361,8 @@ void* recv_thread_fun(void* args) { void nvbit_at_ctx_init(CUcontext ctx) { pthread_mutex_lock(&mutex); - if (verbose) { + //if (verbose) { + if (1) { printf("NVGS_TRACE: STARTING CONTEXT %p\n", ctx); } CTXstate* ctx_state = new CTXstate; @@ -368,24 +375,21 @@ void nvbit_at_ctx_init(CUcontext ctx) { pthread_mutex_unlock(&mutex); // -- init #2 - whats the difference - try - { - /// TODO: pull from env variables and set - if (1) { - mp->set_trace_out_file("./trace_file.nvbit"); + try { + if (!nvgs_config_file.empty()) { + mp->set_config_file(nvgs_config_file); } - mp->set_file_prefix("prog_bin"); } - catch (const exception & ex) - { - cerr << "ERROR: " << ex.what() << endl; + catch (const std::exception & ex) { + std::cerr << "ERROR: " << ex.what() << std::endl; } } void nvbit_at_ctx_term(CUcontext ctx) { pthread_mutex_lock(&mutex); skip_callback_flag = true; - if (verbose) { + //if (verbose) { + if (1) { printf("NVGS_TRACE: TERMINATING CONTEXT %p\n", ctx); } /* get context state from map */ @@ -409,7 +413,7 @@ void nvbit_at_ctx_term(CUcontext ctx) { // Generate GS Pattern output fle mp->generate_patterns(); } - catch (const exception & ex) + catch (const std::exception & ex) { std::cerr << "ERROR: " << ex.what() << std::endl; } From 0cfa257c6c48f86dd8e052a55aa33bb141f681b1 Mon Sep 17 00:00:00 2001 From: christopher Date: Mon, 15 Apr 2024 12:46:39 -0400 Subject: [PATCH 38/76] Added a config setting for maximum trace count and setting a file prefix. --- gsnv_patterns.h | 54 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 2ce94ec..4d2e2f6 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -163,12 +163,15 @@ class MemPatternsForNV : public MemPatterns public: static const uint8_t CTA_LENGTH = 32; - static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; - static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; + static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; + static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; + + static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; + static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; + static constexpr const char * NVGS_PROGRAM_BINARY = "NVGS_PROGRAM_BINARY"; + static constexpr const char * NVGS_FILE_PREFIX = "NVGS_FILE_PREFIX"; + static constexpr const char * NVGS_MAX_TRACE_COUNT = "NVGS_MAX_TRACE_COUNT"; - static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; - static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; - static constexpr const char * NVGS_PROGRAM_BINARY = "NVGS_PROGRAM_BINARY"; MemPatternsForNV(): _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), @@ -198,11 +201,19 @@ class MemPatternsForNV : public MemPatterns void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); + void set_max_trace_count(const std::string & max_count_str); + bool exceed_max_count() { + if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)){ + return true; + } + return false; + } + + // Mainly Called by nvbit kernel void set_config_file (const std::string & config_file); - void update_metrics(); - std::string get_trace_file_prefix (); + void update_metrics(); void process_traces(); void update_source_lines(); @@ -244,6 +255,8 @@ class MemPatternsForNV : public MemPatterns std::string _tmp_trace_out_file_name; std::string _config_file_name; std::set _target_kernels; + bool _limit_trace_count = false; + uint64_t _max_trace_count = 0; bool _write_trace_file = false; bool _first_access = true; @@ -571,6 +584,10 @@ std::vector MemPatternsForNV::convert_to_trace_entry(const mem_ac void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { + if (exceed_max_count()) { + return; + } + if (_first_access) { _first_access = false; printf("First pass to find top gather / scatter iaddresses\n"); @@ -740,6 +757,18 @@ void MemPatternsForNV:: write_trace_out_file() } } +void MemPatternsForNV::set_max_trace_count(const std::string & max_trace_count_str) +{ + try { + _max_trace_count = std::stol(max_trace_count_str); + _limit_trace_count = true; + std::cout << "Max Trace Count set to: " << _max_trace_count << std::endl; + } + catch (...) { + std::cerr << "Failed to set Max Trace Count from value: " << max_trace_count_str << std::endl; + } +} + void MemPatternsForNV::set_config_file(const std::string & config_file) { _config_file_name = config_file; @@ -761,13 +790,18 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) if (NVGS_TARGET_KERNEL == name) { _target_kernels.insert(value); } - else if (NVGS_TRACE_OUT_FILE == name) - { + else if (NVGS_TRACE_OUT_FILE == name) { set_trace_out_file(value); } else if (NVGS_PROGRAM_BINARY == name) { set_binary_file(value); } + else if (NVGS_FILE_PREFIX == name) { + set_file_prefix(value); + } + else if (NVGS_MAX_TRACE_COUNT == name) { + set_max_trace_count(value); + } else { std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; @@ -777,6 +811,8 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) bool MemPatternsForNV::should_instrument(const std::string & kernel_name) { + if (exceed_max_count()) return false; + // Instrument all if none specified if (_target_kernels.size() == 0) { std::cout << "Instrumenting all : " << kernel_name << std::endl; From 2c1e43b0c8c5f1559b12b935a1e69c0b5c24fc81 Mon Sep 17 00:00:00 2001 From: christopher Date: Mon, 15 Apr 2024 16:57:23 -0400 Subject: [PATCH 39/76] small fixes to handle max_trace_count --- gsnv_patterns.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 4d2e2f6..8286bbb 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -193,17 +193,17 @@ class MemPatternsForNV : public MemPatterns InstrWindow & get_instr_window() override { return _iw; } void set_trace_file(const std::string & trace_file_name); - const std::string & get_trace_file_name() { return _trace_file_name; } + inline const std::string & get_trace_file_name() { return _trace_file_name; } - void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } - const std::string & get_binary_file_name() { return _binary_file_name; } + inline void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } + inline const std::string & get_binary_file_name() { return _binary_file_name; } - void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } + inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); - void set_max_trace_count(const std::string & max_count_str); - bool exceed_max_count() { - if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)){ + void set_max_trace_count(const std::string & max_trace_count_str); + inline bool exceed_max_count() const { + if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)) { return true; } return false; @@ -256,7 +256,7 @@ class MemPatternsForNV : public MemPatterns std::string _config_file_name; std::set _target_kernels; bool _limit_trace_count = false; - uint64_t _max_trace_count = 0; + int64_t _max_trace_count = 0; bool _write_trace_file = false; bool _first_access = true; @@ -584,9 +584,7 @@ std::vector MemPatternsForNV::convert_to_trace_entry(const mem_ac void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { - if (exceed_max_count()) { - return; - } + if (exceed_max_count()) { return; } if (_first_access) { _first_access = false; @@ -760,12 +758,16 @@ void MemPatternsForNV:: write_trace_out_file() void MemPatternsForNV::set_max_trace_count(const std::string & max_trace_count_str) { try { - _max_trace_count = std::stol(max_trace_count_str); + _max_trace_count = (int64_t) std::stoi(max_trace_count_str); + if (_max_trace_count < 0) { + throw GSError("Max Trace count must be greater than 0"); + } _limit_trace_count = true; std::cout << "Max Trace Count set to: " << _max_trace_count << std::endl; } - catch (...) { - std::cerr << "Failed to set Max Trace Count from value: " << max_trace_count_str << std::endl; + catch (const std::exception & ex) { + std::cerr << "Failed to set Max Trace Count from value: " << max_trace_count_str + << " with error: " << ex.what() << std::endl; } } @@ -811,7 +813,7 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) bool MemPatternsForNV::should_instrument(const std::string & kernel_name) { - if (exceed_max_count()) return false; + if (exceed_max_count()) { return false; } // Instrument all if none specified if (_target_kernels.size() == 0) { From bba948ab2c68c428b81f0e8878e759b0a6aea3da Mon Sep 17 00:00:00 2001 From: christopher Date: Mon, 15 Apr 2024 23:44:13 -0400 Subject: [PATCH 40/76] Add support for source lines in nvbit traces(both reading and writing). --- gsnv_patterns.h | 94 +++++++++++++++++++----- nvbit_tracing/nvgs_trace/common.h | 1 + nvbit_tracing/nvgs_trace/inject_funcs.cu | 2 + nvbit_tracing/nvgs_trace/nvgs_trace.cu | 29 +++++++- 4 files changed, 108 insertions(+), 18 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 8286bbb..439d227 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -36,13 +36,15 @@ typedef struct _trace_entry_t trace_entry_t; #define MAP_NAME_SIZE 24 #define MAP_VALUE_SIZE 22 +#define MAP_VALUE_LONG_SIZE 94 +#define NUM_MAPS 3 struct _trace_map_entry_t { // 32 bytes total char map_name[MAP_NAME_SIZE]; uint16_t id; - char val[MAP_VALUE_SIZE]; + char val[MAP_VALUE_LONG_SIZE]; }; typedef struct _trace_map_entry_t trace_map_entry_t; @@ -165,6 +167,7 @@ class MemPatternsForNV : public MemPatterns static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; + static constexpr const char * ID_TO_LINE = "ID_TO_LINE"; static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; @@ -228,15 +231,26 @@ class MemPatternsForNV : public MemPatterns // Validate cta stride is within minimum bool valid_gs_stride(const std::vector & te_list, const uint32_t min_stride); - // store opcode mappings + // TODO: Migrate these to template functions ! + // ----------------------------------------------------------------- + + // Store opcode mappings bool add_or_update_opcode(int opcode_id, const std::string & opcode); - // retreive opcode mapping by opcode_id + // Retrieve opcode mapping by opcode_id const std::string & get_opcode(int opcode_id); - // store opcode_short mappings + + // Store opcode_short mappings bool add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short); - // retreive opcode_short mapping by opcode_short_id + // Retrieve opcode_short mapping by opcode_short_id const std::string & get_opcode_short(int opcode_short_id); + // Store line mappings + bool add_or_update_line(int line_id, const std::string & line); + // Retrieve line number mapping by line_id + const std::string & get_line(int line_id); + + // ----------------------------------------------------------------- + bool should_instrument(const std::string & kernel_name); std::vector convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps); @@ -266,11 +280,12 @@ class MemPatternsForNV : public MemPatterns std::map id_to_opcode_map; std::map id_to_opcode_short_map; + std::map id_to_line_map; }; MemPatternsForNV::~MemPatternsForNV() { - if (_write_trace_file) + if (_write_trace_file && !_first_access) { write_trace_out_file(); /// TODO: COMPRESS trace_file on exit @@ -406,6 +421,28 @@ const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { throw GSDataError(ss.str()); } +// Store line mappings +bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) { + auto it = id_to_line_map.find(line_id); + if (it == id_to_line_map.end()) { + id_to_line_map[line_id] = line; + //std::cout << "LINE: " << line_id << " -> " << line << std::endl; + return true; + } + return false; +} + +// Retrieve line number mapping by line_id +const std::string & MemPatternsForNV::get_line(int line_id) { + auto result = id_to_line_map.find(line_id); + if (result != id_to_line_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown line_id: " << line_id; + throw GSDataError(ss.str()); +} + /* * Read traces from a nvbit trace file. Includes header which describes opcode mappings used in trace data. * Used by test runner (gsnv_test) to simulate nvbit execution. @@ -428,13 +465,17 @@ void MemPatternsForNV::process_traces() trace_map_entry_t map_entry[1]; while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) { - std::cout << "MAP ENTRY: " << p_map_entry -> map_name << " " << p_map_entry->id << " -> " << p_map_entry->val << std::endl; + std::cout << "MAP: " << p_map_entry -> map_name << " entry [" << p_map_entry->id << "] -> [" << p_map_entry->val << "]" << std::endl; + if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { id_to_opcode_map[p_map_entry->id] = p_map_entry->val; } else if (std::string(p_map_entry->map_name) == ID_TO_OPCODE_SHORT) { id_to_opcode_short_map[p_map_entry->id] = p_map_entry->val; } + else if (std::string(p_map_entry->map_name) == ID_TO_LINE) { + id_to_line_map[p_map_entry->id] = p_map_entry->val; + } else { std::cerr << "Unsupported Map: " << p_map_entry->map_name << " found in trace, ignoring ..." << p_map_entry->id << " -> " << p_map_entry->val << std::endl; @@ -560,8 +601,9 @@ std::vector MemPatternsForNV::convert_to_trace_entry(const mem_ac //const char * m = reinterpret_cast(&ma.opcode); //const std::string opcode(m, 8); - std::string opcode = get_opcode(ma.opcode_id); - std::string opcode_short = get_opcode_short(ma.opcode_short_id); + //std::string opcode = get_opcode(ma.opcode_id); + //std::string opcode_short = get_opcode_short(ma.opcode_short_id); + //std::string line = get_line(ma.line_id); // ??? neeeded why // TODO: This is a SLOW way of doing this std::vector te_list; @@ -696,30 +738,43 @@ void MemPatternsForNV:: write_trace_out_file() try { + std::cout << "Writing trace file" << std::endl; + _ofs_tmp.flush(); // Write header trace_header_t header; - header.num_maps = 2; - header.num_map_entires = id_to_opcode_map.size() + id_to_opcode_short_map.size(); + header.num_maps = NUM_MAPS; + header.num_map_entires = id_to_opcode_map.size() + + id_to_opcode_short_map.size() + + id_to_line_map.size(); _ofs.write(reinterpret_cast(&header), sizeof header); // Write Maps trace_map_entry_t m_entry; - strncpy(m_entry.map_name, ID_TO_OPCODE, MAP_NAME_SIZE); + strncpy(m_entry.map_name, ID_TO_OPCODE, MAP_NAME_SIZE-1); for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { m_entry.id = itr->first; - strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_SIZE); // write 22 chars + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); } - strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE); - //uint64_t opcode_short_len = id_to_opcode_short_map.size(); + strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE-1); + //uint64_t opcode_short_map_len = id_to_opcode_short_map.size(); for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { m_entry.id = itr->first; - strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_SIZE); // write 22 chars + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); + _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); + } + + strncpy(m_entry.map_name, ID_TO_LINE, MAP_NAME_SIZE-1); + //uint64_t line_map_len = id_to_line_map.size(); + for (auto itr = id_to_line_map.begin(); itr != id_to_line_map.end(); itr++) + { + m_entry.id = itr->first; + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); } @@ -747,6 +802,11 @@ void MemPatternsForNV:: write_trace_out_file() for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { std::cout << itr->first << " -> " << itr->second << std::endl; } + + std::cout << "-- LINE_ID to LINE MAPPING -- " << std::endl; + for (auto itr = id_to_line_map.begin(); itr != id_to_line_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } } catch (const std::exception & ex) { @@ -784,7 +844,7 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) std::string name; std::string value; ifs >> name >> value; - if (name.empty() || value.empty()) + if (name.empty() || value.empty() || name[0] == '#') continue; std::cout << "CONFIG: name: " << name << " value: " << value << std::endl; diff --git a/nvbit_tracing/nvgs_trace/common.h b/nvbit_tracing/nvgs_trace/common.h index ec267b9..2945f19 100644 --- a/nvbit_tracing/nvgs_trace/common.h +++ b/nvbit_tracing/nvgs_trace/common.h @@ -40,5 +40,6 @@ typedef struct { int is_load; int is_store; int size; + int line_id; uint64_t addrs[32]; } mem_access_t; diff --git a/nvbit_tracing/nvgs_trace/inject_funcs.cu b/nvbit_tracing/nvgs_trace/inject_funcs.cu index 0477a9f..35b375b 100644 --- a/nvbit_tracing/nvgs_trace/inject_funcs.cu +++ b/nvbit_tracing/nvgs_trace/inject_funcs.cu @@ -42,6 +42,7 @@ extern "C" __device__ __noinline__ void instrument_mem(int pred, int is_load, int is_store, int size, + int line_id, uint64_t addr, uint64_t grid_launch_id, uint64_t pchannel_dev) { @@ -72,6 +73,7 @@ extern "C" __device__ __noinline__ void instrument_mem(int pred, ma.is_load = is_load; ma.is_store = is_store; ma.size = size; + ma.line_id = line_id; /* first active lane pushes information on the channel */ diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index 9715a2a..ef20388 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -87,6 +87,7 @@ std::string nvgs_config_file; std::map opcode_to_id_map; std::map id_to_opcode_map; std::map opcode_short_to_id_map; +std::map line_to_id_map; // Instantiate GSPatterns for NVBit std::unique_ptr mp(new MemPatternsForNV); @@ -165,14 +166,15 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { instr->printDecoded(); } + // Opcode to OpCodeID if (opcode_to_id_map.find(instr->getOpcode()) == opcode_to_id_map.end()) { int opcode_id = opcode_to_id_map.size(); opcode_to_id_map[instr->getOpcode()] = opcode_id; id_to_opcode_map[opcode_id] = std::string(instr->getOpcode()); } - int opcode_id = opcode_to_id_map[instr->getOpcode()]; + // Opcode_Short to OpCode_Short_ID if (opcode_short_to_id_map.find(instr->getOpcodeShort()) == opcode_short_to_id_map.end()) { int opcode_short_id = opcode_short_to_id_map.size(); opcode_short_to_id_map[instr->getOpcodeShort()] = opcode_short_id; @@ -180,8 +182,31 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { } int opcode_short_id = opcode_short_to_id_map[instr->getOpcodeShort()]; + //Line to Line_ID + /* Get line information for a particular instruction offset if available, */ + /* binary must be compiled with --generate-line-info (-lineinfo) */ + char *line_str; + char *dir_str; + uint32_t line_num; + bool status = nvbit_get_line_info(ctx, func, instr->getOffset(), &line_str, &dir_str, &line_num); + + std::stringstream ss; + ss << dir_str << line_str << ":" << line_num; + std::string line (ss.str()); + + if (line_to_id_map.find(line) == line_to_id_map.end() ) { + int line_id = line_to_id_map.size(); + line_to_id_map[line] = line_id; + } + int line_id = line_to_id_map[line]; + + //std::cout << "Creating a mapping from: " << line << " to line_id: " << line_id << std::endl; + + // Let MemPatternsForNV know about the mapping mp->add_or_update_opcode(opcode_id, instr->getOpcode()); mp->add_or_update_opcode_short(opcode_short_id, instr->getOpcodeShort()); + mp->add_or_update_line(line_id, line); + int mref_idx = 0; /* iterate on the operands */ @@ -206,6 +231,8 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { nvbit_add_call_arg_const_val32(instr, instr->isStore()); /* size */ nvbit_add_call_arg_const_val32(instr, instr->getSize()); + /* line number id */ + nvbit_add_call_arg_const_val32(instr, line_id); /* memory reference 64 bit address */ nvbit_add_call_arg_mref_addr64(instr, mref_idx); From da046cdc4813f4292fc21532443d8e981ec49377 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 16 Apr 2024 01:23:02 -0400 Subject: [PATCH 41/76] Handle cases where --generate-line-info was not used to build kernel. --- nvbit_tracing/nvgs_trace/nvgs_trace.cu | 27 ++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index ef20388..998557c 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -182,30 +182,33 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { } int opcode_short_id = opcode_short_to_id_map[instr->getOpcodeShort()]; - //Line to Line_ID + // Line to Line_ID /* Get line information for a particular instruction offset if available, */ /* binary must be compiled with --generate-line-info (-lineinfo) */ char *line_str; char *dir_str; uint32_t line_num; - bool status = nvbit_get_line_info(ctx, func, instr->getOffset(), &line_str, &dir_str, &line_num); + bool status = nvbit_get_line_info(ctx, func, instr->getOffset(), &line_str, &dir_str, &line_num); - std::stringstream ss; - ss << dir_str << line_str << ":" << line_num; - std::string line (ss.str()); + std::string line; + int line_id = -1; + if (status) { + std::stringstream ss; + ss << dir_str << line_str << ":" << line_num; + line = ss.str(); - if (line_to_id_map.find(line) == line_to_id_map.end() ) { - int line_id = line_to_id_map.size(); - line_to_id_map[line] = line_id; + if (line_to_id_map.find(line) == line_to_id_map.end()) { + line_id = line_to_id_map.size(); + line_to_id_map[line] = line_id; + } + line_id = line_to_id_map[line]; + //std::cout << "Creating a mapping from: " << line << " to line_id: " << line_id << std::endl; } - int line_id = line_to_id_map[line]; - - //std::cout << "Creating a mapping from: " << line << " to line_id: " << line_id << std::endl; // Let MemPatternsForNV know about the mapping mp->add_or_update_opcode(opcode_id, instr->getOpcode()); mp->add_or_update_opcode_short(opcode_short_id, instr->getOpcodeShort()); - mp->add_or_update_line(line_id, line); + if (status) { mp->add_or_update_line(line_id, line); } int mref_idx = 0; From 2d555f92e6dd787c5c85f53a4fd2db6a54b204cf Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 17 Apr 2024 00:51:06 -0400 Subject: [PATCH 42/76] Fixed trace file read. Trace lines out now match trace lines read in and matches max when limited. Added support for use of source lines in gs bucketing. Added base_addr of warp addresses in bucketing. Address to line nowo uses base_addr. Added logging of debug lines read and written as well as handled. Renamed some maps. --- gs_patterns.h | 14 ++-- gs_patterns_core.cpp | 12 ++-- gsnv_patterns.h | 165 ++++++++++++++++++++++++++----------------- gspin_patterns.cpp | 25 ++++--- 4 files changed, 130 insertions(+), 86 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 51994c9..5ff8ef8 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -74,14 +74,16 @@ class InstrAddrAdapter InstrAddrAdapter() { } virtual ~InstrAddrAdapter() { } - virtual bool is_valid() const = 0; - virtual bool is_mem_instr() const = 0; - virtual bool is_other_instr() const = 0; + virtual bool is_valid() const = 0; + virtual bool is_mem_instr() const = 0; + virtual bool is_other_instr() const = 0; virtual mem_access_type get_mem_instr_type() const = 0; - virtual size_t get_size() const = 0; - virtual addr_t get_address() const = 0; - virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! + virtual size_t get_size() const = 0; + virtual addr_t get_address() const = 0; + virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! + virtual addr_t get_iaddr() const = 0; + virtual int64_t min_size() const = 0; // multiple? virtual bool is_gather() const diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 3ee633f..0bf02fb 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -231,7 +231,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /*****************************/ if (ia.is_other_instr()) { - iw.iaddr = ia.get_address(); + iw.iaddr = ia.get_iaddr(); //nops trace_info.opcodes++; @@ -242,10 +242,12 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /***********************/ } else if (ia.is_mem_instr()) { + iw.iaddr = ia.get_iaddr() != ia.get_address() ? ia.get_iaddr() : iw.iaddr; + w_rw_idx = ia.get_type(); //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iaddr, drline->addr, drline->addr % 64, drline->size); + // iw.iaddr, ia.get_address(), ia.get_address() % 64, ia.get_size()); if ((++trace_info.mcnt % PERSAMPLE) == 0) { #if SAMPLE @@ -272,8 +274,8 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) } //new window - if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= VBYTES) || - (iw.w_cnt[w_rw_idx][w_idx] >= VBYTES)) { + if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= ia.min_size()) || // was >= VBYTES + (iw.w_cnt[w_rw_idx][w_idx] >= ia.min_size())) { // was >= VBYTES /***************************/ //do analysis @@ -479,6 +481,8 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, maddr = ia.get_address() / ia.get_size(); + iaddr = ia.get_iaddr() != ia.get_address() ? ia.get_iaddr() : iaddr; + if ((++mcnt % PERSAMPLE) == 0) { #if SAMPLE break; diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 439d227..ea6cf90 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -75,7 +76,7 @@ int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, i int idx; - idx = (*edx) / sizeof(trace_entry_t); + idx = (*edx) / sizeof(trace_header_t); //first read if (NULL == *p_val) { *edx = gzread(fp, val, sizeof(trace_header_t)); @@ -117,7 +118,7 @@ int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { int idx; - idx = (*edx) / sizeof(trace_entry_t); + idx = (*edx) / sizeof(mem_access_t); //first read if (NULL == *p_val) { *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); @@ -138,25 +139,28 @@ int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { class InstrAddrAdapterForNV : public InstrAddrAdapter { public: - InstrAddrAdapterForNV(const trace_entry_t * te) : _te(*te) { } - InstrAddrAdapterForNV(const trace_entry_t te) : _te(te) { } + InstrAddrAdapterForNV(const trace_entry_t * te, addr_t base_addr) : _te(*te), _base_addr(base_addr) { } + InstrAddrAdapterForNV(const trace_entry_t te, addr_t base_addr) : _te(te), _base_addr(base_addr) { } virtual ~InstrAddrAdapterForNV() { } - virtual bool is_valid() const override { return true; } - virtual bool is_mem_instr() const override { return true; } - virtual bool is_other_instr() const override { return false; } - + virtual bool is_valid() const override { return true; } + virtual bool is_mem_instr() const override { return true; } + virtual bool is_other_instr() const override { return false; } virtual mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } + virtual addr_t get_iaddr () const override { return _base_addr; } + virtual int64_t min_size() const { return 8; } - virtual size_t get_size() const override { return _te.size; } // in bytes - virtual addr_t get_address() const override { return _te.addr; } - virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual size_t get_size() const override { return _te.size; } // in bytes + virtual addr_t get_address() const override { return _te.addr; } + virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]";} + virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" + << _te.type << "] size: [" << _te.size << "]"; } private: trace_entry_t _te; + addr_t _base_addr; //mem_access_t _ma; }; @@ -223,6 +227,18 @@ class MemPatternsForNV : public MemPatterns double update_source_lines_from_binary(mem_access_type); void process_second_pass(); + std::string addr_to_line(addr_t addr) + { + auto itr = _addr_to_line_id.find(addr); + if (itr != _addr_to_line_id.end()) { + auto it2 = _id_to_line_map.find(itr->second); + if (it2 != _id_to_line_map.end()) { + return it2->second; + } + } + return std::string(); + } + void set_trace_out_file(const std::string & trace_file_name); void write_trace_out_file(); @@ -253,7 +269,7 @@ class MemPatternsForNV : public MemPatterns bool should_instrument(const std::string & kernel_name); - std::vector convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps); + std::pair> convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps); private: @@ -271,6 +287,8 @@ class MemPatternsForNV : public MemPatterns std::set _target_kernels; bool _limit_trace_count = false; int64_t _max_trace_count = 0; + uint64_t _traces_written = 0; + uint64_t _traces_handled = 0; bool _write_trace_file = false; bool _first_access = true; @@ -278,9 +296,11 @@ class MemPatternsForNV : public MemPatterns std::ofstream _ofs; std::vector _traces; - std::map id_to_opcode_map; - std::map id_to_opcode_short_map; - std::map id_to_line_map; + // rename these _ + std::map _id_to_opcode_map; + std::map _id_to_opcode_short_map; + std::map _id_to_line_map; + std::unordered_map _addr_to_line_id; }; MemPatternsForNV::~MemPatternsForNV() @@ -379,9 +399,9 @@ std::string MemPatternsForNV::get_file_prefix() // store opcode mappings bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) { - auto it = id_to_opcode_map.find(opcode_id); - if (it == id_to_opcode_map.end()) { - id_to_opcode_map[opcode_id] = opcode; + auto it = _id_to_opcode_map.find(opcode_id); + if (it == _id_to_opcode_map.end()) { + _id_to_opcode_map[opcode_id] = opcode; //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; return true; } @@ -390,8 +410,8 @@ bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & o // retreive opcode mapping by opcode_id const std::string & MemPatternsForNV::get_opcode(int opcode_id) { - auto result = id_to_opcode_map.find(opcode_id); - if (result != id_to_opcode_map.end()) { + auto result = _id_to_opcode_map.find(opcode_id); + if (result != _id_to_opcode_map.end()) { return result->second; } std::stringstream ss; @@ -401,9 +421,9 @@ const std::string & MemPatternsForNV::get_opcode(int opcode_id) { // store opcode_short mappings bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { - auto it = id_to_opcode_short_map.find(opcode_short_id); - if (it == id_to_opcode_short_map.end()) { - id_to_opcode_short_map[opcode_short_id] = opcode_short; + auto it = _id_to_opcode_short_map.find(opcode_short_id); + if (it == _id_to_opcode_short_map.end()) { + _id_to_opcode_short_map[opcode_short_id] = opcode_short; //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; return true; } @@ -412,8 +432,8 @@ bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std // retreive opcode_short mapping by opcode_short_id const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { - auto result = id_to_opcode_short_map.find(opcode_short_id); - if (result != id_to_opcode_short_map.end()) { + auto result = _id_to_opcode_short_map.find(opcode_short_id); + if (result != _id_to_opcode_short_map.end()) { return result->second; } std::stringstream ss; @@ -423,9 +443,9 @@ const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { // Store line mappings bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) { - auto it = id_to_line_map.find(line_id); - if (it == id_to_line_map.end()) { - id_to_line_map[line_id] = line; + auto it = _id_to_line_map.find(line_id); + if (it == _id_to_line_map.end()) { + _id_to_line_map[line_id] = line; //std::cout << "LINE: " << line_id << " -> " << line << std::endl; return true; } @@ -434,8 +454,8 @@ bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) // Retrieve line number mapping by line_id const std::string & MemPatternsForNV::get_line(int line_id) { - auto result = id_to_line_map.find(line_id); - if (result != id_to_line_map.end()) { + auto result = _id_to_line_map.find(line_id); + if (result != _id_to_line_map.end()) { return result->second; } std::stringstream ss; @@ -468,13 +488,13 @@ void MemPatternsForNV::process_traces() std::cout << "MAP: " << p_map_entry -> map_name << " entry [" << p_map_entry->id << "] -> [" << p_map_entry->val << "]" << std::endl; if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { - id_to_opcode_map[p_map_entry->id] = p_map_entry->val; + _id_to_opcode_map[p_map_entry->id] = p_map_entry->val; } else if (std::string(p_map_entry->map_name) == ID_TO_OPCODE_SHORT) { - id_to_opcode_short_map[p_map_entry->id] = p_map_entry->val; + _id_to_opcode_short_map[p_map_entry->id] = p_map_entry->val; } else if (std::string(p_map_entry->map_name) == ID_TO_LINE) { - id_to_line_map[p_map_entry->id] = p_map_entry->val; + _id_to_line_map[p_map_entry->id] = p_map_entry->val; } else { std::cerr << "Unsupported Map: " << p_map_entry->map_name << " found in trace, ignoring ..." @@ -486,6 +506,8 @@ void MemPatternsForNV::process_traces() } // Read Traces ** + iret = 0; + uint64_t lines_read = 0; mem_access_t * p_trace = NULL; mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) @@ -493,20 +515,24 @@ void MemPatternsForNV::process_traces() //decode drtrace t_line = p_trace; - if (-1 == t_line->cta_id_x) { break; } + if (-1 == t_line->cta_id_x) { continue; } try { handle_cta_memory_access(t_line); p_trace++; + lines_read++; } catch (const GSError & ex) { std::cerr << "ERROR: " << ex.what() << std::endl; + close_trace_file(fp_trace); throw; } } + std::cout << "Lines Read: " << lines_read << std::endl; + close_trace_file(fp_trace); //metrics @@ -537,7 +563,7 @@ void MemPatternsForNV::update_source_lines() // TRY double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) { - double scatter_cnt = 0.0; + double target_cnt = 0.0; InstrInfo & target_iinfo = get_iinfo(mType); Metrics & target_metrics = get_metrics(mType); @@ -548,15 +574,19 @@ double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) if (0 == target_iinfo.get_iaddrs()[k]) { break; } - translate_iaddr(get_binary_file_name(), target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); - if (startswith(target_metrics.get_srcline()[k], "?")) + + std::string line; + line = addr_to_line(target_iinfo.get_iaddrs()[k]); + strncpy(target_metrics.get_srcline()[k], line.c_str(), MAX_LINE_LENGTH-1); + + if (std::string(target_metrics.get_srcline()[k]).empty()) target_iinfo.get_icnt()[k] = 0; - scatter_cnt += target_iinfo.get_icnt()[k]; + target_cnt += target_iinfo.get_icnt()[k]; } printf("done.\n"); - return scatter_cnt; + return target_cnt; } @@ -584,13 +614,10 @@ void MemPatternsForNV::process_second_pass() } } -std::vector MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps) +std::pair> MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps) { - // opcode : forms LD.E.64, ST.E.64 - //std::string mem_type; uint16_t mem_size = ma.size; uint16_t mem_type_code; - //uint16_t mem_attr_code = 0; if (ma.is_load) mem_type_code = GATHER; @@ -599,14 +626,9 @@ std::vector MemPatternsForNV::convert_to_trace_entry(const mem_ac else throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); - //const char * m = reinterpret_cast(&ma.opcode); - //const std::string opcode(m, 8); - //std::string opcode = get_opcode(ma.opcode_id); - //std::string opcode_short = get_opcode_short(ma.opcode_short_id); - //std::string line = get_line(ma.line_id); // ??? neeeded why - // TODO: This is a SLOW way of doing this std::vector te_list; + addr_t base_addr = ma.addrs[0]; te_list.reserve(MemPatternsForNV::CTA_LENGTH); for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { @@ -614,14 +636,18 @@ std::vector MemPatternsForNV::convert_to_trace_entry(const mem_ac { trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; te_list.push_back(te); + + if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { + _addr_to_line_id[base_addr] = ma.line_id; + } } else if (ignore_partial_warps) { // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH - return std::vector(); + return std::make_pair(0, std::vector()); } } - return te_list; + return std::make_pair(base_addr, te_list); } void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) @@ -637,6 +663,7 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) if (_write_trace_file && _ofs_tmp.is_open()) { // Write entry to trace_output file _ofs_tmp.write(reinterpret_cast(ma), sizeof *ma); + _traces_written++; } #if 0 std::stringstream ss; @@ -655,14 +682,19 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) #endif // Convert to vector of trace_entry_t if full warp. ignore partial warps. - std::vector te_list = convert_to_trace_entry(*ma, true); + std::pair> te_result = convert_to_trace_entry(*ma, true); + + addr_t base_addr = te_result.first; + std::vector & te_list = te_result.second; + uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; if (min_size > 0 && valid_gs_stride(te_list, min_size)) { for (auto it = te_list.begin(); it != te_list.end(); it++) { - handle_trace_entry(InstrAddrAdapterForNV(*it)); + handle_trace_entry(InstrAddrAdapterForNV(*it, base_addr)); } + _traces_handled++; } } @@ -738,22 +770,23 @@ void MemPatternsForNV:: write_trace_out_file() try { - std::cout << "Writing trace file" << std::endl; + std::cout << "Writing trace file: traces_written: " << _traces_written + << " traced_handled: " << _traces_handled << std::endl; _ofs_tmp.flush(); // Write header trace_header_t header; header.num_maps = NUM_MAPS; - header.num_map_entires = id_to_opcode_map.size() + - id_to_opcode_short_map.size() + - id_to_line_map.size(); + header.num_map_entires = _id_to_opcode_map.size() + + _id_to_opcode_short_map.size() + + _id_to_line_map.size(); _ofs.write(reinterpret_cast(&header), sizeof header); // Write Maps trace_map_entry_t m_entry; strncpy(m_entry.map_name, ID_TO_OPCODE, MAP_NAME_SIZE-1); - for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) + for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { m_entry.id = itr->first; strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); @@ -761,8 +794,8 @@ void MemPatternsForNV:: write_trace_out_file() } strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE-1); - //uint64_t opcode_short_map_len = id_to_opcode_short_map.size(); - for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) + //uint64_t opcode_short_map_len = _id_to_opcode_short_map.size(); + for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { m_entry.id = itr->first; strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); @@ -770,8 +803,8 @@ void MemPatternsForNV:: write_trace_out_file() } strncpy(m_entry.map_name, ID_TO_LINE, MAP_NAME_SIZE-1); - //uint64_t line_map_len = id_to_line_map.size(); - for (auto itr = id_to_line_map.begin(); itr != id_to_line_map.end(); itr++) + //uint64_t line_map_len = _id_to_line_map.size(); + for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { m_entry.id = itr->first; strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); @@ -794,17 +827,17 @@ void MemPatternsForNV:: write_trace_out_file() std::remove(_tmp_trace_out_file_name.c_str()); std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; - for (auto itr = id_to_opcode_map.begin(); itr != id_to_opcode_map.end(); itr++) { + for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { std::cout << itr->first << " -> " << itr->second << std::endl; } std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; - for (auto itr = id_to_opcode_short_map.begin(); itr != id_to_opcode_short_map.end(); itr++) { + for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { std::cout << itr->first << " -> " << itr->second << std::endl; } std::cout << "-- LINE_ID to LINE MAPPING -- " << std::endl; - for (auto itr = id_to_line_map.begin(); itr != id_to_line_map.end(); itr++) { + for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { std::cout << itr->first << " -> " << itr->second << std::endl; } } diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index b7fe439..6782bfa 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -89,9 +89,9 @@ class InstrAddrAdapterForPin : public InstrAddrAdapter virtual ~InstrAddrAdapterForPin() { } - virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } - virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } - virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } + virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } + virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } + virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } virtual mem_access_type get_mem_instr_type() const override { if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Instruction"); @@ -100,9 +100,11 @@ class InstrAddrAdapterForPin : public InstrAddrAdapter else return SCATTER; } - virtual size_t get_size() const override { return _te.size; } - virtual addr_t get_address() const override { return _te.addr; } - virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual size_t get_size() const override { return _te.size; } + virtual addr_t get_address() const override { return _te.addr; } + virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual addr_t get_iaddr() const override { return _te.addr; } + virtual int64_t min_size() const { return VBYTES; } virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; @@ -240,7 +242,7 @@ std::string MemPatternsForPin::get_file_prefix() double MemPatternsForPin::update_source_lines_from_binary(mem_access_type mType) { - double scatter_cnt = 0.0; + double target_cnt = 0.0; InstrInfo & target_iinfo = get_iinfo(mType); Metrics & target_metrics = get_metrics(mType); @@ -255,11 +257,11 @@ double MemPatternsForPin::update_source_lines_from_binary(mem_access_type mType) if (startswith(target_metrics.get_srcline()[k], "?")) target_iinfo.get_icnt()[k] = 0; - scatter_cnt += target_iinfo.get_icnt()[k]; + target_cnt += target_iinfo.get_icnt()[k]; } printf("done.\n"); - return scatter_cnt; + return target_cnt; } // First Pass @@ -267,13 +269,13 @@ void MemPatternsForPin::process_traces() { int iret = 0; trace_entry_t *drline; - InstrWindow iw; gzFile fp_drtrace = open_trace_file(get_trace_file_name()); printf("First pass to find top gather / scatter iaddresses\n"); fflush(stdout); + uint64_t lines_read = 0; trace_entry_t *p_drtrace = NULL; trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) @@ -284,8 +286,11 @@ void MemPatternsForPin::process_traces() handle_trace_entry(InstrAddrAdapterForPin(drline)); p_drtrace++; + lines_read++; } + std::cout << "Lines Read: " << lines_read << std::endl; + close_trace_file(fp_drtrace); //metrics From 6966005e834588fde3e2ca45b0bda950b7236ec1 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 17 Apr 2024 01:04:34 -0400 Subject: [PATCH 43/76] Formatting --- gsnv_patterns.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index ea6cf90..b677fe9 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -296,7 +296,6 @@ class MemPatternsForNV : public MemPatterns std::ofstream _ofs; std::vector _traces; - // rename these _ std::map _id_to_opcode_map; std::map _id_to_opcode_short_map; std::map _id_to_line_map; @@ -397,7 +396,7 @@ std::string MemPatternsForNV::get_file_prefix() return prefix; } -// store opcode mappings +// Store opcode mappings bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) { auto it = _id_to_opcode_map.find(opcode_id); if (it == _id_to_opcode_map.end()) { @@ -408,7 +407,7 @@ bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & o return false; } -// retreive opcode mapping by opcode_id +// Retrieve opcode mapping by opcode_id const std::string & MemPatternsForNV::get_opcode(int opcode_id) { auto result = _id_to_opcode_map.find(opcode_id); if (result != _id_to_opcode_map.end()) { @@ -419,7 +418,7 @@ const std::string & MemPatternsForNV::get_opcode(int opcode_id) { throw GSDataError(ss.str()); } -// store opcode_short mappings +// Store opcode_short mappings bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { auto it = _id_to_opcode_short_map.find(opcode_short_id); if (it == _id_to_opcode_short_map.end()) { @@ -430,7 +429,7 @@ bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std return false; } -// retreive opcode_short mapping by opcode_short_id +// Retrieve opcode_short mapping by opcode_short_id const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { auto result = _id_to_opcode_short_map.find(opcode_short_id); if (result != _id_to_opcode_short_map.end()) { @@ -543,8 +542,6 @@ void MemPatternsForNV::process_traces() } - -// TRY void MemPatternsForNV::update_source_lines() { // Find source lines for gathers - Must have symbol @@ -560,7 +557,6 @@ void MemPatternsForNV::update_source_lines() get_scatter_metrics().cnt = update_source_lines_from_binary(SCATTER); } -// TRY double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) { double target_cnt = 0.0; @@ -568,7 +564,6 @@ double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) InstrInfo & target_iinfo = get_iinfo(mType); Metrics & target_metrics = get_metrics(mType); - //Check it is not a library for (int k = 0; k < NGS; k++) { if (0 == target_iinfo.get_iaddrs()[k]) { @@ -723,7 +718,6 @@ bool MemPatternsForNV::valid_gs_stride(const std::vector & te_lis return min_stride_found >= min_stride; } - void MemPatternsForNV::set_trace_file(const std::string & trace_file_name) { if (trace_file_name == _trace_out_file_name) { From 5860367659394d94d57b903f67ba13945dcc4331 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 17 Apr 2024 16:22:52 -0400 Subject: [PATCH 44/76] Improve accounting --- gs_patterns_core.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 0bf02fb..7f13a4a 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -242,7 +242,11 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /***********************/ } else if (ia.is_mem_instr()) { - iw.iaddr = ia.get_iaddr() != ia.get_address() ? ia.get_iaddr() : iw.iaddr; + if ( ia.get_iaddr() != ia.get_address()) { + iw.iaddr = ia.get_iaddr(); + trace_info.opcodes++; + trace_info.did_opcode = true; + } w_rw_idx = ia.get_type(); From 46f9aae7a04d986865f871badc16d87b2074d131 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 17 Apr 2024 22:27:42 -0400 Subject: [PATCH 45/76] Small improvements to redue vector copies and inline some methods. --- gsnv_patterns.h | 52 ++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index b677fe9..e83012c 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -139,28 +139,27 @@ int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { class InstrAddrAdapterForNV : public InstrAddrAdapter { public: - InstrAddrAdapterForNV(const trace_entry_t * te, addr_t base_addr) : _te(*te), _base_addr(base_addr) { } - InstrAddrAdapterForNV(const trace_entry_t te, addr_t base_addr) : _te(te), _base_addr(base_addr) { } + InstrAddrAdapterForNV(const trace_entry_t & te, const addr_t & base_addr) : _te(te), _base_addr(base_addr) { } virtual ~InstrAddrAdapterForNV() { } - virtual bool is_valid() const override { return true; } - virtual bool is_mem_instr() const override { return true; } - virtual bool is_other_instr() const override { return false; } - virtual mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual addr_t get_iaddr () const override { return _base_addr; } - virtual int64_t min_size() const { return 8; } + virtual inline bool is_valid() const override { return true; } + virtual inline bool is_mem_instr() const override { return true; } + virtual inline bool is_other_instr() const override { return false; } + virtual inline mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } + virtual inline addr_t get_iaddr () const override { return _base_addr; } + virtual inline int64_t min_size() const override { return 8; } - virtual size_t get_size() const override { return _te.size; } // in bytes - virtual addr_t get_address() const override { return _te.addr; } - virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline size_t get_size() const override { return _te.size; } // in bytes + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } private: - trace_entry_t _te; - addr_t _base_addr; + const trace_entry_t _te; + const addr_t _base_addr; //mem_access_t _ma; }; @@ -269,7 +268,10 @@ class MemPatternsForNV : public MemPatterns bool should_instrument(const std::string & kernel_name); - std::pair> convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps); + bool convert_to_trace_entry(const mem_access_t & ma, + bool ignore_partial_warps, + std::vector & te_list, + addr_t & base_addr); private: @@ -344,8 +346,6 @@ void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) const InstrAddrAdapterForNV & ianv = dynamic_cast (ia); _traces.push_back(ianv); - - // TODO: Determine how to get source lines } void MemPatternsForNV::generate_patterns() @@ -609,7 +609,10 @@ void MemPatternsForNV::process_second_pass() } } -std::pair> MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps) +bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, + bool ignore_partial_warps, + std::vector & te_list, + addr_t & base_addr) { uint16_t mem_size = ma.size; uint16_t mem_type_code; @@ -622,8 +625,7 @@ std::pair> MemPatternsForNV::convert_to_trace throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); // TODO: This is a SLOW way of doing this - std::vector te_list; - addr_t base_addr = ma.addrs[0]; + base_addr = ma.addrs[0]; te_list.reserve(MemPatternsForNV::CTA_LENGTH); for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { @@ -639,10 +641,10 @@ std::pair> MemPatternsForNV::convert_to_trace else if (ignore_partial_warps) { // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH - return std::make_pair(0, std::vector()); + return false; } } - return std::make_pair(base_addr, te_list); + return true; } void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) @@ -677,10 +679,12 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) #endif // Convert to vector of trace_entry_t if full warp. ignore partial warps. - std::pair> te_result = convert_to_trace_entry(*ma, true); + std::vector te_list; + te_list.reserve(MemPatternsForNV::CTA_LENGTH); + addr_t base_addr; - addr_t base_addr = te_result.first; - std::vector & te_list = te_result.second; + bool status = convert_to_trace_entry(*ma, true, te_list, base_addr); + if (!status) return; uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; if (min_size > 0 && valid_gs_stride(te_list, min_size)) From 56e1c310550689a7b212c1276db1da47ab625720 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 19 Apr 2024 19:30:55 -0400 Subject: [PATCH 46/76] Added ability to use tmpfile for intermediate traces used in second pass. Added progress bar, improved logging around totals read, totals processes(handled), target_opcode list for filtering out non_interesting memory operations. Some interface cleanups. --- gsnv_patterns.h | 178 ++++++++++++++++++++++++++++++++++-------------- gsnv_test.cpp | 6 +- 2 files changed, 127 insertions(+), 57 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index e83012c..ea033fb 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -9,16 +9,21 @@ #include #include #include +#include #include #include #include #include +#include #include "gs_patterns.h" #include "gs_patterns_core.h" #include "utils.h" +// Enable to use a vector for storing trace data for use by second pass (if not defined data is stored to a temp file +//#define USE_VECTOR_FOR_SECOND_PASS 1 + #define HEX(x) \ "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ << std::dec @@ -32,6 +37,7 @@ struct _trace_entry_t { addr_t addr; unsigned char length[sizeof(addr_t)]; }; + addr_t base_addr; } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; @@ -50,8 +56,9 @@ struct _trace_map_entry_t typedef struct _trace_map_entry_t trace_map_entry_t; struct _trace_header_t { - uint64_t num_map_entires; uint64_t num_maps; + uint64_t num_map_entires; + uint64_t total_traces; }; typedef struct _trace_header_t trace_header_t; @@ -139,28 +146,28 @@ int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { class InstrAddrAdapterForNV : public InstrAddrAdapter { public: - InstrAddrAdapterForNV(const trace_entry_t & te, const addr_t & base_addr) : _te(te), _base_addr(base_addr) { } + InstrAddrAdapterForNV(const trace_entry_t & te) : _te(te) { } virtual ~InstrAddrAdapterForNV() { } - virtual inline bool is_valid() const override { return true; } - virtual inline bool is_mem_instr() const override { return true; } - virtual inline bool is_other_instr() const override { return false; } + virtual inline bool is_valid() const override { return true; } + virtual inline bool is_mem_instr() const override { return true; } + virtual inline bool is_other_instr() const override { return false; } virtual inline mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual inline addr_t get_iaddr () const override { return _base_addr; } - virtual inline int64_t min_size() const override { return 8; } + virtual inline addr_t get_iaddr () const override { return _te.base_addr; } + virtual inline int64_t min_size() const override { return 8; } - virtual inline size_t get_size() const override { return _te.size; } // in bytes - virtual inline addr_t get_address() const override { return _te.addr; } - virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline size_t get_size() const override { return _te.size; } // in bytes + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } + const trace_entry_t & get_trace_entry() const { return _te; } + private: const trace_entry_t _te; - const addr_t _base_addr; - //mem_access_t _ma; }; class MemPatternsForNV : public MemPatterns @@ -181,7 +188,8 @@ class MemPatternsForNV : public MemPatterns MemPatternsForNV(): _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), - _ofs_tmp() { } + _target_opcodes { "LD", "ST", "LDS", "STS", "LDG", "STG" } + { } virtual ~MemPatternsForNV() override ; @@ -201,9 +209,6 @@ class MemPatternsForNV : public MemPatterns void set_trace_file(const std::string & trace_file_name); inline const std::string & get_trace_file_name() { return _trace_file_name; } - inline void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } - inline const std::string & get_binary_file_name() { return _binary_file_name; } - inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); @@ -268,10 +273,7 @@ class MemPatternsForNV : public MemPatterns bool should_instrument(const std::string & kernel_name); - bool convert_to_trace_entry(const mem_access_t & ma, - bool ignore_partial_warps, - std::vector & te_list, - addr_t & base_addr); + bool convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps, std::vector & te_list); private: @@ -280,11 +282,11 @@ class MemPatternsForNV : public MemPatterns TraceInfo _trace_info; InstrWindow _iw; - std::string _trace_file_name; - std::string _binary_file_name; - std::string _file_prefix; - std::string _trace_out_file_name; - std::string _tmp_trace_out_file_name; + std::string _trace_file_name; // Input compressed nvbit trace file + std::string _file_prefix; // Used by gs_patterns_core to write out pattern files + std::string _trace_out_file_name; // Ouput file containing nvbit traces encounterd if requested + std::string _tmp_trace_out_file_name; // Temp file used to store traces before re-writing to _trace_out_filename + std::string _config_file_name; std::set _target_kernels; bool _limit_trace_count = false; @@ -294,14 +296,26 @@ class MemPatternsForNV : public MemPatterns bool _write_trace_file = false; bool _first_access = true; - std::ofstream _ofs_tmp; + + /* The output stream used to temporarily hold raw trace warp data (mem_access_t) before being writen to _trace_out_file_name */ + std::fstream _ofs_tmp; + /* The output stream cooresponding to _trace_out_file_name */ std::ofstream _ofs; + +#ifdef USE_VECTOR_FOR_SECOND_PASS + /* A vector used to store intermediate trace records (trace_entry_t) exclusively for use by second pass + (instead of _tmp_dump_file if USE_VECTOR_FOR_SECOND_PASS is defined) */ std::vector _traces; +#else + /* A temp file used to store intermediate trace records (trace_entry_t) exclusively for use by second pass */ + std::FILE * _tmp_dump_file; +#endif std::map _id_to_opcode_map; std::map _id_to_opcode_short_map; std::map _id_to_line_map; std::unordered_map _addr_to_line_id; + const std::set _target_opcodes; }; MemPatternsForNV::~MemPatternsForNV() @@ -344,8 +358,15 @@ void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) // Call libgs_patterns ::handle_trace_entry(*this, ia); - const InstrAddrAdapterForNV & ianv = dynamic_cast (ia); + const InstrAddrAdapterForNV &ianv = dynamic_cast (ia); +#ifdef USE_VECTOR_FOR_SECOND_PASS _traces.push_back(ianv); +#else + size_t rc; + if (!(rc = std::fwrite(reinterpret_cast(&ianv.get_trace_entry()), sizeof(trace_entry_t), 1, _tmp_dump_file) != sizeof(trace_entry_t))) { + throw GSFileError("Write of trace to temp file failed"); + } +#endif } void MemPatternsForNV::generate_patterns() @@ -511,13 +532,19 @@ void MemPatternsForNV::process_traces() mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) { - //decode drtrace + // Decode trace t_line = p_trace; if (-1 == t_line->cta_id_x) { continue; } try { + // Progress bar + if (lines_read % ((uint64_t) std::max((p_header->total_traces * .01), 1.0)) == 0) { + std::cout << "+"; + std::flush(std::cout); + } + handle_cta_memory_access(t_line); p_trace++; @@ -530,7 +557,7 @@ void MemPatternsForNV::process_traces() } } - std::cout << "Lines Read: " << lines_read << std::endl; + std::cout << "\nLines Read: " << lines_read << " of Total: " << p_header->total_traces << std::endl; close_trace_file(fp_trace); @@ -600,19 +627,47 @@ void MemPatternsForNV::process_second_pass() printf("\nSecond pass to fill gather / scatter subtraces\n"); fflush(stdout); +#if USE_VECTOR_FOR_SECOND_PASS for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) { InstrAddrAdapter & ia = *itr; breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); + if (breakout) { + break; + } + } +#else + std::fflush(_tmp_dump_file); + std::rewind(_tmp_dump_file); // Back to the future, ... sort of + try + { + trace_entry_t t; + while (std::fread(reinterpret_cast (&t), sizeof(trace_entry_t), 1, _tmp_dump_file) && !breakout) { + InstrAddrAdapterForNV ia(const_cast(t)); + breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); + } + if (!breakout && !std::feof(_tmp_dump_file)) { + if (std::ferror(_tmp_dump_file)) { + throw GSFileError("Unexpected error occurred while reading temp file"); + } + } + std::fclose(_tmp_dump_file); + } + catch (const GSError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + std::fclose(_tmp_dump_file); + throw; } +#endif } bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps, - std::vector & te_list, - addr_t & base_addr) + std::vector & te_list) { uint16_t mem_size = ma.size; uint16_t mem_type_code; @@ -624,14 +679,21 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, else throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); + if (_id_to_opcode_short_map.find(ma.opcode_short_id) == _id_to_opcode_short_map.end()) + return false; + std::string opcode_short = _id_to_opcode_short_map[ma.opcode_short_id]; + + if (_target_opcodes.find(opcode_short) == _target_opcodes.end()) + return false; + // TODO: This is a SLOW way of doing this - base_addr = ma.addrs[0]; + const addr_t & base_addr = ma.addrs[0]; te_list.reserve(MemPatternsForNV::CTA_LENGTH); for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { if (ma.addrs[i] != 0) { - trace_entry_t te { mem_type_code, mem_size, ma.addrs[i] }; + trace_entry_t te { mem_type_code, mem_size, ma.addrs[i], base_addr }; te_list.push_back(te); if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { @@ -681,9 +743,8 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) // Convert to vector of trace_entry_t if full warp. ignore partial warps. std::vector te_list; te_list.reserve(MemPatternsForNV::CTA_LENGTH); - addr_t base_addr; - bool status = convert_to_trace_entry(*ma, true, te_list, base_addr); + bool status = convert_to_trace_entry(*ma, true, te_list); if (!status) return; uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; @@ -691,7 +752,7 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { for (auto it = te_list.begin(); it != te_list.end(); it++) { - handle_trace_entry(InstrAddrAdapterForNV(*it, base_addr)); + handle_trace_entry(InstrAddrAdapterForNV(*it)); } _traces_handled++; } @@ -713,6 +774,9 @@ bool MemPatternsForNV::valid_gs_stride(const std::vector & te_lis } uint64_t diff = std::labs (last_addr - (uint64_t)te.addr); + if (diff < min_stride) + return false; + if (diff < min_stride_found) min_stride_found = diff; @@ -735,24 +799,37 @@ void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_nam { try { + if (trace_out_file_name.empty()) { + throw GSError ("Cannot set trace output file to empty filename [" + trace_out_file_name + "]."); + } + if (trace_out_file_name == _trace_file_name) { throw GSError ("Cannot set trace output file to same name as trace input file [" + trace_out_file_name + "]."); } - _trace_out_file_name = trace_out_file_name; + _trace_out_file_name = trace_out_file_name; _tmp_trace_out_file_name = _trace_out_file_name + ".tmp"; // Open a temp file for writing data - _ofs_tmp.open(_tmp_trace_out_file_name, std::ios::binary | std::ios::trunc | std::ios::in); + _ofs_tmp.open(_tmp_trace_out_file_name, std::ios::binary | std::ios::trunc | std::ios::in | std::ios::out); if (!_ofs_tmp.is_open()) { throw GSFileError("Unable to open " + _tmp_trace_out_file_name + " for writing"); } + std::remove(_tmp_trace_out_file_name.c_str()); // Force auto cleanup // Open a ouput file for writing data header and appending data _ofs.open(_trace_out_file_name, std::ios::binary | std::ios::trunc); if (!_ofs.is_open()) { throw GSFileError("Unable to open " + _trace_out_file_name + " for writing"); } + +#ifndef USE_VECTOR_FOR_SECOND_PASS + // Open an output file for dumping temp data used exclusively by second_pass + _tmp_dump_file = std::tmpfile(); + if (!_tmp_dump_file) { + throw GSFileError("Unable to open " + _trace_out_file_name + " for reading & writing"); + } +#endif _write_trace_file = true; } catch (const std::exception & ex) @@ -762,23 +839,25 @@ void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_nam } } -void MemPatternsForNV:: write_trace_out_file() +void MemPatternsForNV::write_trace_out_file() { if (!_write_trace_file) return; try { - std::cout << "Writing trace file: traces_written: " << _traces_written + std::cout << "Writing trace file - traces_written: " << _traces_written << " traced_handled: " << _traces_handled << std::endl; _ofs_tmp.flush(); // Write header trace_header_t header; - header.num_maps = NUM_MAPS; + header.num_maps = NUM_MAPS; header.num_map_entires = _id_to_opcode_map.size() + _id_to_opcode_short_map.size() + _id_to_line_map.size(); + header.total_traces = _traces_written; + _ofs.write(reinterpret_cast(&header), sizeof header); // Write Maps @@ -808,19 +887,14 @@ void MemPatternsForNV:: write_trace_out_file() strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); } + _ofs.flush(); // Write file contents - _ofs_tmp.close(); - std::ifstream ifs(_tmp_trace_out_file_name); - if (!ifs.is_open()) { - throw GSFileError("Unable to open " + _tmp_trace_out_file_name + " for reading"); - } - - _ofs.flush(); - _ofs << ifs.rdbuf(); + _ofs_tmp.seekp(0); + _ofs << _ofs_tmp.rdbuf(); _ofs.flush(); _ofs.close(); - ifs.close(); + _ofs_tmp.close(); std::remove(_tmp_trace_out_file_name.c_str()); @@ -841,6 +915,7 @@ void MemPatternsForNV:: write_trace_out_file() } catch (const std::exception & ex) { + std::remove(_tmp_trace_out_file_name.c_str()); std::cerr << "ERROR: failed to write trace file: " << _trace_file_name << std::endl; throw; } @@ -886,9 +961,6 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) else if (NVGS_TRACE_OUT_FILE == name) { set_trace_out_file(value); } - else if (NVGS_PROGRAM_BINARY == name) { - set_binary_file(value); - } else if (NVGS_FILE_PREFIX == name) { set_file_prefix(value); } diff --git a/gsnv_test.cpp b/gsnv_test.cpp index b6d0931..7c6ffbf 100644 --- a/gsnv_test.cpp +++ b/gsnv_test.cpp @@ -8,16 +8,14 @@ int main(int argc, char **argv) { try { - if (argc != 3) { - throw GSError("Invalid arguments, should be: trace.gz binary_file_name"); + if (argc != 2) { + throw GSError("Invalid arguments, should be: trace.gz"); } MemPatternsForNV mp; // nvbit trace file with memory access traces mp.set_trace_file(argv[1]); - // Executable file built with debug symbols - mp.set_binary_file(argv[2]); const char * config_file = std::getenv("NVGS_CONFIG_FILE"); if (config_file) { From 4f4ee11e336339277465d40eb5b955bc64206c8e Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 20 Apr 2024 00:26:40 -0400 Subject: [PATCH 47/76] Bug fixes. Error checking fixes. Bulk read of trace temp file. Progress log. --- gsnv_patterns.h | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index ea033fb..17c47e2 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -38,6 +38,7 @@ struct _trace_entry_t { unsigned char length[sizeof(addr_t)]; }; addr_t base_addr; + char padding[4]; } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; @@ -45,6 +46,8 @@ typedef struct _trace_entry_t trace_entry_t; #define MAP_VALUE_SIZE 22 #define MAP_VALUE_LONG_SIZE 94 #define NUM_MAPS 3 +// Setting this to fit within a 4k page e.g 170 * 24 bytes <= 4k +#define TRACE_BUFFER_LENGTH 170 struct _trace_map_entry_t { @@ -362,8 +365,8 @@ void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) #ifdef USE_VECTOR_FOR_SECOND_PASS _traces.push_back(ianv); #else - size_t rc; - if (!(rc = std::fwrite(reinterpret_cast(&ianv.get_trace_entry()), sizeof(trace_entry_t), 1, _tmp_dump_file) != sizeof(trace_entry_t))) { + if (std::fwrite(reinterpret_cast(&ianv.get_trace_entry()), sizeof(trace_entry_t), 1, _tmp_dump_file) != 1) + { throw GSFileError("Write of trace to temp file failed"); } #endif @@ -528,6 +531,7 @@ void MemPatternsForNV::process_traces() // Read Traces ** iret = 0; uint64_t lines_read = 0; + uint64_t pos = 0; mem_access_t * p_trace = NULL; mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) @@ -540,9 +544,15 @@ void MemPatternsForNV::process_traces() try { // Progress bar + if (lines_read == 0) { + for (int i = 0; i < 100; i++) { std::cout << "-"; } + std::cout << std::endl; + } if (lines_read % ((uint64_t) std::max((p_header->total_traces * .01), 1.0)) == 0) { - std::cout << "+"; + if ((pos % 20) == 0) { std::cout << "|"; } + else { std::cout << "+"; } std::flush(std::cout); + pos++; } handle_cta_memory_access(t_line); @@ -643,12 +653,21 @@ void MemPatternsForNV::process_second_pass() std::rewind(_tmp_dump_file); // Back to the future, ... sort of try { - trace_entry_t t; - while (std::fread(reinterpret_cast (&t), sizeof(trace_entry_t), 1, _tmp_dump_file) && !breakout) { - InstrAddrAdapterForNV ia(const_cast(t)); - breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); + trace_entry_t ta[TRACE_BUFFER_LENGTH]; + size_t count_read = 0; + size_t read; + while ( (read = std::fread(&ta, sizeof (ta[0]), TRACE_BUFFER_LENGTH, _tmp_dump_file)) && !breakout ) + { + for (int i = 0; i < read; i++) + { + InstrAddrAdapterForNV ia(const_cast(ta[i])); + breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); + count_read++; + } } + std::cout << "Reread: " << count_read << " for second_pass " << std::endl; + if (!breakout && !std::feof(_tmp_dump_file)) { if (std::ferror(_tmp_dump_file)) { throw GSFileError("Unexpected error occurred while reading temp file"); @@ -827,7 +846,7 @@ void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_nam // Open an output file for dumping temp data used exclusively by second_pass _tmp_dump_file = std::tmpfile(); if (!_tmp_dump_file) { - throw GSFileError("Unable to open " + _trace_out_file_name + " for reading & writing"); + throw GSFileError("Unable to create a temp file for second pass"); } #endif _write_trace_file = true; From 696fcb872bc05eeda8f285f3227b9254bfb01e7d Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 20 Apr 2024 10:18:04 -0400 Subject: [PATCH 48/76] Expand PSize and fix breakout. --- gs_patterns.h | 3 +-- gsnv_patterns.h | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 5ff8ef8..2ce7a4e 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -25,8 +25,7 @@ #define NSTRIDES 15 //Threshold for number of unique distances #define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram #define NTOP (10) -#define PSIZE (1<<23) -//#define PSIZE (1<<18) +#define PSIZE (1<<27) // Was 2^23 (8mb) //DONT CHANGE #define VBYTES (VBITS/8) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 17c47e2..9e237a0 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -656,7 +656,7 @@ void MemPatternsForNV::process_second_pass() trace_entry_t ta[TRACE_BUFFER_LENGTH]; size_t count_read = 0; size_t read; - while ( (read = std::fread(&ta, sizeof (ta[0]), TRACE_BUFFER_LENGTH, _tmp_dump_file)) && !breakout ) + while ( !breakout && (read = std::fread(&ta, sizeof (ta[0]), TRACE_BUFFER_LENGTH, _tmp_dump_file)) ) { for (int i = 0; i < read; i++) { @@ -664,6 +664,8 @@ void MemPatternsForNV::process_second_pass() breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); count_read++; + + if (breakout) break; } } std::cout << "Reread: " << count_read << " for second_pass " << std::endl; From 1494f1d88276dc12ab375e3aec6a8501c8abf689 Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 20 Apr 2024 13:51:52 -0400 Subject: [PATCH 49/76] Write trace file before 2nd pass rather than in destructor. --- gsnv_patterns.h | 20 +++++++------------- gsnv_test.cpp | 2 ++ nvbit_tracing/nvgs_trace/nvgs_trace.cu | 3 +++ 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 9e237a0..e736266 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -194,7 +194,7 @@ class MemPatternsForNV : public MemPatterns _target_opcodes { "LD", "ST", "LDS", "STS", "LDG", "STG" } { } - virtual ~MemPatternsForNV() override ; + virtual ~MemPatternsForNV() override { } void handle_trace_entry(const InstrAddrAdapter & ia) override; void generate_patterns() override; @@ -321,15 +321,6 @@ class MemPatternsForNV : public MemPatterns const std::set _target_opcodes; }; -MemPatternsForNV::~MemPatternsForNV() -{ - if (_write_trace_file && !_first_access) - { - write_trace_out_file(); - /// TODO: COMPRESS trace_file on exit - } -} - Metrics & MemPatternsForNV::get_metrics(mem_access_type m) { switch (m) @@ -862,12 +853,13 @@ void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_nam void MemPatternsForNV::write_trace_out_file() { - if (!_write_trace_file) return; + if (!_write_trace_file || _first_access) return; + /// TODO: COMPRESS trace_file try { - std::cout << "Writing trace file - traces_written: " << _traces_written - << " traced_handled: " << _traces_handled << std::endl; + std::cout << "\nSaving trace file - traces_written: " << _traces_written + << " traced_handled: " << _traces_handled << "\n" << std::endl; _ofs_tmp.flush(); @@ -919,6 +911,8 @@ void MemPatternsForNV::write_trace_out_file() std::remove(_tmp_trace_out_file_name.c_str()); + std::cout << "Mappings found" << std::endl; + std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { std::cout << itr->first << " -> " << itr->second << std::endl; diff --git a/gsnv_test.cpp b/gsnv_test.cpp index 7c6ffbf..2dfa8b9 100644 --- a/gsnv_test.cpp +++ b/gsnv_test.cpp @@ -29,6 +29,8 @@ int main(int argc, char **argv) mp.process_traces(); + mp.write_trace_out_file(); + // ----------------- Generate Patterns ----------------- mp.generate_patterns(); diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/nvgs_trace/nvgs_trace.cu index 998557c..4c85fca 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/nvgs_trace/nvgs_trace.cu @@ -440,6 +440,9 @@ void nvbit_at_ctx_term(CUcontext ctx) { try { + // Persist trace file + mp->write_trace_out_file(); + // Generate GS Pattern output fle mp->generate_patterns(); } From 3eaa16fff6f62b1a3aa3d143f42e319cbc24791a Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 20 Apr 2024 18:55:21 -0400 Subject: [PATCH 50/76] Introduce instruction type, Improved numbers. --- gs_patterns.h | 14 ++++++++------ gs_patterns_core.cpp | 18 ++++++++++-------- gsnv_patterns.h | 7 ++++--- gsnv_test.cpp | 11 ++++++----- gspin_patterns.cpp | 7 ++++--- 5 files changed, 32 insertions(+), 25 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 2ce7a4e..f1154b5 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -34,6 +34,7 @@ typedef uintptr_t addr_t; typedef enum { GATHER=0, SCATTER } mem_access_type; +typedef enum { VECTOR=0, CTA } mem_instr_type; class GSError : public std::exception { @@ -73,10 +74,11 @@ class InstrAddrAdapter InstrAddrAdapter() { } virtual ~InstrAddrAdapter() { } - virtual bool is_valid() const = 0; - virtual bool is_mem_instr() const = 0; - virtual bool is_other_instr() const = 0; - virtual mem_access_type get_mem_instr_type() const = 0; + virtual bool is_valid() const = 0; + virtual bool is_mem_instr() const = 0; + virtual bool is_other_instr() const = 0; + virtual mem_access_type get_mem_access_type() const = 0; + virtual mem_instr_type get_mem_instr_type() const = 0; virtual size_t get_size() const = 0; virtual addr_t get_address() const = 0; @@ -86,10 +88,10 @@ class InstrAddrAdapter // multiple? virtual bool is_gather() const - { return (is_valid() && is_mem_instr() && GATHER == get_mem_instr_type()) ? true : false; } + { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } virtual bool is_scatter() const - { return (is_valid() && is_mem_instr() && SCATTER == get_mem_instr_type()) ? true : false; } + { return (is_valid() && is_mem_instr() && SCATTER == get_mem_access_type()) ? true : false; } virtual void output(std::ostream & os) const = 0; }; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 7f13a4a..b53ba62 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -231,7 +231,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /*****************************/ if (ia.is_other_instr()) { - iw.iaddr = ia.get_iaddr(); + iw.iaddr = ia.get_address(); //nops trace_info.opcodes++; @@ -242,7 +242,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /***********************/ } else if (ia.is_mem_instr()) { - if ( ia.get_iaddr() != ia.get_address()) { + if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() != ia.get_address()) { iw.iaddr = ia.get_iaddr(); trace_info.opcodes++; trace_info.did_opcode = true; @@ -364,7 +364,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) //Set window values iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; - iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_address() / ia.get_size(); + iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_iaddr(); iw.w_bytes[w_rw_idx][w_idx] += ia.get_size(); //num access per iaddr in loop @@ -483,9 +483,11 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, } else if (ia.is_mem_instr()) { - maddr = ia.get_address() / ia.get_size(); + maddr = ia.get_iaddr(); - iaddr = ia.get_iaddr() != ia.get_address() ? ia.get_iaddr() : iaddr; + if (CTA == ia.get_mem_instr_type() && ia.get_address() != ia.get_iaddr()) { + iaddr = ia.get_iaddr(); + } if ((++mcnt % PERSAMPLE) == 0) { #if SAMPLE @@ -496,7 +498,7 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, } // gather ? - if (ia.get_mem_instr_type() == GATHER) { + if (GATHER == ia.get_mem_access_type()) { for (i = 0; i < gather_metrics.ntop; i++) { @@ -519,7 +521,7 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, } } // scatter ? - else if (ia.get_mem_instr_type() == SCATTER) { + else if (SCATTER == ia.get_mem_access_type()) { for (i = 0; i < scatter_metrics.ntop; i++) { @@ -541,7 +543,7 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, } } else { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo - throw GSDataError("Unknown Memory Instruction Type: " + ia.get_mem_instr_type()); + throw GSDataError("Unknown Memory Access Type: " + ia.get_mem_access_type()); } } // MEM diff --git a/gsnv_patterns.h b/gsnv_patterns.h index e736266..a7292cd 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -156,13 +156,14 @@ class InstrAddrAdapterForNV : public InstrAddrAdapter virtual inline bool is_valid() const override { return true; } virtual inline bool is_mem_instr() const override { return true; } virtual inline bool is_other_instr() const override { return false; } - virtual inline mem_access_type get_mem_instr_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual inline addr_t get_iaddr () const override { return _te.base_addr; } - virtual inline int64_t min_size() const override { return 8; } + virtual inline mem_access_type get_mem_access_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } + virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } virtual inline size_t get_size() const override { return _te.size; } // in bytes virtual inline addr_t get_address() const override { return _te.addr; } virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline addr_t get_iaddr () const override { return _te.base_addr; } + virtual inline int64_t min_size() const override { return 256; } virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } diff --git a/gsnv_test.cpp b/gsnv_test.cpp index 2dfa8b9..f55d834 100644 --- a/gsnv_test.cpp +++ b/gsnv_test.cpp @@ -4,12 +4,16 @@ #include "gs_patterns.h" #include "gsnv_patterns.h" +#define NVGS_CONFIG_FILE "NVGS_CONFIG_FILE" + int main(int argc, char **argv) { try { if (argc != 2) { - throw GSError("Invalid arguments, should be: trace.gz"); + size_t pos = std::string(argv[0]).find_last_of("/"); + std::string prog_name = std::string(argv[0]).substr(pos+1); + throw GSError("Invalid program arguments, should be: " + prog_name + " "); } MemPatternsForNV mp; @@ -17,7 +21,7 @@ int main(int argc, char **argv) // nvbit trace file with memory access traces mp.set_trace_file(argv[1]); - const char * config_file = std::getenv("NVGS_CONFIG_FILE"); + const char * config_file = std::getenv(NVGS_CONFIG_FILE); if (config_file) { mp.set_config_file(config_file); } @@ -63,6 +67,3 @@ int main(int argc, char **argv) return 0; } - - - diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index 6782bfa..a577a2e 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -93,17 +93,18 @@ class InstrAddrAdapterForPin : public InstrAddrAdapter virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } - virtual mem_access_type get_mem_instr_type() const override { - if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Instruction"); + virtual mem_access_type get_mem_access_type() const override { + if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Access Type"); // Must be 0x0 or 0x1 if (_te.type == 0x0) return GATHER; else return SCATTER; } + virtual inline mem_instr_type get_mem_instr_type() const override { return VECTOR; } virtual size_t get_size() const override { return _te.size; } virtual addr_t get_address() const override { return _te.addr; } virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual addr_t get_iaddr() const override { return _te.addr; } + virtual addr_t get_iaddr() const override { return _te.addr / _te.size; } virtual int64_t min_size() const { return VBYTES; } virtual void output(std::ostream & os) const override { From a96b3583bbc047494e2f9dad0f4a88a41d16e321 Mon Sep 17 00:00:00 2001 From: christopher Date: Sun, 21 Apr 2024 17:02:45 -0400 Subject: [PATCH 51/76] Renamed some address functions to better model. Fixes to Numbers, output patterns looking reasonable. --- gs_patterns.h | 4 ++-- gs_patterns_core.cpp | 12 ++++++------ gsnv_patterns.h | 5 +++-- gspin_patterns.cpp | 3 ++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index f1154b5..6ce3299 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -82,10 +82,10 @@ class InstrAddrAdapter virtual size_t get_size() const = 0; virtual addr_t get_address() const = 0; - virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! virtual addr_t get_iaddr() const = 0; + virtual addr_t get_maddr() const = 0; + virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! virtual int64_t min_size() const = 0; - // multiple? virtual bool is_gather() const { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index b53ba62..96123f2 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -231,7 +231,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /*****************************/ if (ia.is_other_instr()) { - iw.iaddr = ia.get_address(); + iw.iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() //nops trace_info.opcodes++; @@ -242,7 +242,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) /***********************/ } else if (ia.is_mem_instr()) { - if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() != ia.get_address()) { + if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() == ia.get_address()) { iw.iaddr = ia.get_iaddr(); trace_info.opcodes++; trace_info.did_opcode = true; @@ -364,7 +364,7 @@ void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) //Set window values iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; - iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_iaddr(); + iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_maddr(); iw.w_bytes[w_rw_idx][w_idx] += ia.get_size(); //num access per iaddr in loop @@ -475,7 +475,7 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, } if (ia.is_other_instr()) { - iaddr = ia.get_address(); + iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() /***********************/ /** MEM 0x00 and 0x01 **/ @@ -483,9 +483,9 @@ bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, } else if (ia.is_mem_instr()) { - maddr = ia.get_iaddr(); + maddr = ia.get_maddr(); - if (CTA == ia.get_mem_instr_type() && ia.get_address() != ia.get_iaddr()) { + if (CTA == ia.get_mem_instr_type() && ia.get_address() == ia.get_iaddr()) { iaddr = ia.get_iaddr(); } diff --git a/gsnv_patterns.h b/gsnv_patterns.h index a7292cd..96287e9 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -161,9 +161,10 @@ class InstrAddrAdapterForNV : public InstrAddrAdapter virtual inline size_t get_size() const override { return _te.size; } // in bytes virtual inline addr_t get_address() const override { return _te.addr; } - virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! virtual inline addr_t get_iaddr () const override { return _te.base_addr; } - virtual inline int64_t min_size() const override { return 256; } + virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline int64_t min_size() const override { return 256; } // 32 * 8 bytes virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index a577a2e..dc5d3ee 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -103,8 +103,9 @@ class InstrAddrAdapterForPin : public InstrAddrAdapter virtual size_t get_size() const override { return _te.size; } virtual addr_t get_address() const override { return _te.addr; } + virtual addr_t get_iaddr() const override { return _te.addr; } + virtual addr_t get_maddr() const override { return _te.addr / _te.size; } virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual addr_t get_iaddr() const override { return _te.addr / _te.size; } virtual int64_t min_size() const { return VBYTES; } virtual void output(std::ostream & os) const override { From 7a57d3618fcae0dc95536467f0d6d456fdfe2041 Mon Sep 17 00:00:00 2001 From: christopher Date: Mon, 22 Apr 2024 23:14:08 -0400 Subject: [PATCH 52/76] When not using vector for second pass, and not writing out trace file still need to setup temp dump file for second pass use. --- gsnv_patterns.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 96287e9..fed881b 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -299,8 +300,8 @@ class MemPatternsForNV : public MemPatterns uint64_t _traces_written = 0; uint64_t _traces_handled = 0; - bool _write_trace_file = false; - bool _first_access = true; + bool _write_trace_file = false; + bool _first_trace_seen = false; /* The output stream used to temporarily hold raw trace warp data (mem_access_t) before being writen to _trace_out_file_name */ std::fstream _ofs_tmp; @@ -630,7 +631,7 @@ void MemPatternsForNV::process_second_pass() printf("\nSecond pass to fill gather / scatter subtraces\n"); fflush(stdout); -#if USE_VECTOR_FOR_SECOND_PASS +#ifdef USE_VECTOR_FOR_SECOND_PASS for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) { InstrAddrAdapter & ia = *itr; @@ -727,10 +728,18 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { if (exceed_max_count()) { return; } - if (_first_access) { - _first_access = false; + if (!_first_trace_seen) { + _first_trace_seen = true; printf("First pass to find top gather / scatter iaddresses\n"); fflush(stdout); + +#ifndef USE_VECTOR_FOR_SECOND_PASS + // Open an output file for dumping temp data used exclusively by second_pass + _tmp_dump_file = tmpfile(); + if (!_tmp_dump_file) { + throw GSFileError("Unable to create a temp file for second pass"); + } +#endif } if (_write_trace_file && _ofs_tmp.is_open()) { @@ -837,13 +846,6 @@ void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_nam throw GSFileError("Unable to open " + _trace_out_file_name + " for writing"); } -#ifndef USE_VECTOR_FOR_SECOND_PASS - // Open an output file for dumping temp data used exclusively by second_pass - _tmp_dump_file = std::tmpfile(); - if (!_tmp_dump_file) { - throw GSFileError("Unable to create a temp file for second pass"); - } -#endif _write_trace_file = true; } catch (const std::exception & ex) @@ -855,7 +857,7 @@ void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_nam void MemPatternsForNV::write_trace_out_file() { - if (!_write_trace_file || _first_access) return; + if (!_write_trace_file || !_first_trace_seen) return; /// TODO: COMPRESS trace_file try From 99f594d0537b199a9b8237701c76f5c42ede0279 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 24 Apr 2024 01:03:19 -0400 Subject: [PATCH 53/76] Combined runners into one for simplification, moved MemPatternsForPin and MemPatternsForNV into separate namespaces to avoid namespace conflicts. Renamed a few files to be more descriptive. Deleted old gsnv_test as its no longer needed. --- CMakeLists.txt | 14 +- gs_patterns.h | 372 +++--- gs_patterns_core.cpp | 854 ++++++------ gs_patterns_core.h | 38 +- gs_patterns_main.cpp | 119 ++ gsnv_patterns.cpp | 801 ++++++++++++ gsnv_patterns.h | 1140 +++-------------- gsnv_test.cpp | 69 - gspin_patterns.cpp | 198 +-- gspin_patterns.h | 134 ++ .../{nvgs_trace => gsnv_trace}/common.h | 2 + .../gsnv_trace.cu} | 3 + .../inject_funcs.cu | 0 utils.cpp | 17 + utils.h | 8 +- 15 files changed, 1953 insertions(+), 1816 deletions(-) create mode 100644 gs_patterns_main.cpp create mode 100644 gsnv_patterns.cpp delete mode 100644 gsnv_test.cpp create mode 100644 gspin_patterns.h rename nvbit_tracing/{nvgs_trace => gsnv_trace}/common.h (99%) rename nvbit_tracing/{nvgs_trace/nvgs_trace.cu => gsnv_trace/gsnv_trace.cu} (99%) rename nvbit_tracing/{nvgs_trace => gsnv_trace}/inject_funcs.cu (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c3e908..265c128 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,18 +14,16 @@ add_library(gs_patterns_core SHARED gs_patterns.h gs_patterns_core.h gs_patterns_core.cpp - gsnv_patterns.h - gsnv_test.cpp -# nv_opcodes.h ) add_executable( gs_patterns - gspin_patterns.cpp) - -add_executable(gsnv_test gsnv_test.cpp) + gs_patterns_main.cpp + gsnv_patterns.cpp + gspin_patterns.cpp + gspin_patterns.h + gsnv_patterns.h +) target_link_libraries(gs_patterns gs_patterns_core) -target_link_libraries(gsnv_test gs_patterns_core) - set(CMAKE_CXX_STANDARD_LIBRARIES "-lm -lz ${CMAKE_CXX_STANDARD_LIBRARIES}") diff --git a/gs_patterns.h b/gs_patterns.h index 6ce3299..964c822 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -32,223 +32,229 @@ #define MAX_LINE_LENGTH 1024 -typedef uintptr_t addr_t; -typedef enum { GATHER=0, SCATTER } mem_access_type; -typedef enum { VECTOR=0, CTA } mem_instr_type; - -class GSError : public std::exception +namespace gs_patterns { -public: - GSError (const std::string & reason) : _reason(reason) { } - ~GSError() {} + //using namespace std; - const char * what() const noexcept override { return _reason.c_str(); } -private: - std::string _reason; -}; + typedef uintptr_t addr_t; + typedef enum { GATHER=0, SCATTER } mem_access_type; + typedef enum { VECTOR=0, CTA } mem_instr_type; -class GSFileError : public GSError -{ -public: - GSFileError (const std::string & reason) : GSError(reason) { } - ~GSFileError() {} -}; + class GSError : public std::exception + { + public: + GSError (const std::string & reason) : _reason(reason) { } + ~GSError() {} -class GSDataError : public GSError -{ -public: - GSDataError (const std::string & reason) : GSError(reason) { } - ~GSDataError() {} -}; + const char * what() const noexcept override { return _reason.c_str(); } + private: + std::string _reason; + }; -class GSAllocError : public GSError -{ -public: - GSAllocError (const std::string & reason) : GSError(reason) { } - ~GSAllocError() {} -}; + class GSFileError : public GSError + { + public: + GSFileError (const std::string & reason) : GSError(reason) { } + ~GSFileError() {} + }; -class InstrAddrAdapter -{ -public: - InstrAddrAdapter() { } - virtual ~InstrAddrAdapter() { } + class GSDataError : public GSError + { + public: + GSDataError (const std::string & reason) : GSError(reason) { } + ~GSDataError() {} + }; - virtual bool is_valid() const = 0; - virtual bool is_mem_instr() const = 0; - virtual bool is_other_instr() const = 0; - virtual mem_access_type get_mem_access_type() const = 0; - virtual mem_instr_type get_mem_instr_type() const = 0; + class GSAllocError : public GSError + { + public: + GSAllocError (const std::string & reason) : GSError(reason) { } + ~GSAllocError() {} + }; - virtual size_t get_size() const = 0; - virtual addr_t get_address() const = 0; - virtual addr_t get_iaddr() const = 0; - virtual addr_t get_maddr() const = 0; - virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t min_size() const = 0; + class InstrAddrAdapter + { + public: + InstrAddrAdapter() { } + virtual ~InstrAddrAdapter() { } - virtual bool is_gather() const - { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } + virtual bool is_valid() const = 0; + virtual bool is_mem_instr() const = 0; + virtual bool is_other_instr() const = 0; + virtual mem_access_type get_mem_access_type() const = 0; + virtual mem_instr_type get_mem_instr_type() const = 0; - virtual bool is_scatter() const - { return (is_valid() && is_mem_instr() && SCATTER == get_mem_access_type()) ? true : false; } + virtual size_t get_size() const = 0; + virtual addr_t get_address() const = 0; + virtual addr_t get_iaddr() const = 0; + virtual addr_t get_maddr() const = 0; + virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! + virtual int64_t min_size() const = 0; - virtual void output(std::ostream & os) const = 0; -}; + virtual bool is_gather() const + { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } -std::ostream & operator<<(std::ostream & os, const InstrAddrAdapter & ia); + virtual bool is_scatter() const + { return (is_valid() && is_mem_instr() && SCATTER == get_mem_access_type()) ? true : false; } + virtual void output(std::ostream & os) const = 0; + }; -class Metrics -{ -public: - Metrics(mem_access_type mType) : _mType(mType) + std::ostream & operator<<(std::ostream & os, const InstrAddrAdapter & ia); + + + class Metrics { - /// TODO: Convert to new/free - for (int j = 0; j < NTOP; j++) { - patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); - if (patterns[j] == NULL) { - throw GSAllocError("Could not allocate patterns for " + type_as_string() + "!"); + public: + Metrics(mem_access_type mType) : _mType(mType) + { + /// TODO: Convert to new/delete + for (int j = 0; j < NTOP; j++) { + patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); + if (patterns[j] == NULL) { + throw GSAllocError("Could not allocate patterns for " + type_as_string() + "!"); + } } } - } - ~Metrics() - { - /// TODO: Convert to new/free - for (int i = 0; i < NTOP; i++) { - free(patterns[i]); + ~Metrics() + { + /// TODO: Convert to new/delete + for (int i = 0; i < NTOP; i++) { + free(patterns[i]); + } + + delete [] srcline; } - delete [] srcline; - } + Metrics(const Metrics &) = delete; + Metrics & operator=(const Metrics & right) = delete; + + std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } + std::string getName() { return !_mType ? "Gather" : "Scatter"; } + std::string getShortName() { return !_mType ? "G" : "S"; } - Metrics(const Metrics &) = delete; - Metrics & operator=(const Metrics & right) = delete; + auto get_srcline() { return srcline[_mType]; } - std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } - std::string getName() { return !_mType ? "Gather" : "Scatter"; } - std::string getShortName() { return !_mType ? "G" : "S"; } + int ntop = 0; + double cnt = 0.0; + int offset[NTOP] = {0}; - auto get_srcline() { return srcline[_mType]; } + addr_t tot[NTOP] = {0}; + addr_t top[NTOP] = {0}; + addr_t top_idx[NTOP] = {0}; - int ntop = 0; - double cnt = 0.0; - int offset[NTOP] = {0}; + int64_t* patterns[NTOP] = {0}; - addr_t tot[NTOP] = {0}; - addr_t top[NTOP] = {0}; - addr_t top_idx[NTOP] = {0}; + private: + char (*srcline)[NGS][MAX_LINE_LENGTH] = new char[2][NGS][MAX_LINE_LENGTH]; - int64_t* patterns[NTOP] = {0}; + mem_access_type _mType; + }; -private: - char (*srcline)[NGS][MAX_LINE_LENGTH] = new char[2][NGS][MAX_LINE_LENGTH]; - mem_access_type _mType; -}; + class InstrInfo + { + public: + InstrInfo(mem_access_type mType) : _mType(mType) { } + ~InstrInfo() { + delete [] iaddrs; + delete [] icnt; + delete [] occ; + } + InstrInfo(const InstrInfo &) = delete; + InstrInfo & operator=(const InstrInfo & right) = delete; -class InstrInfo -{ -public: - InstrInfo(mem_access_type mType) : _mType(mType) { } - ~InstrInfo() { - delete [] iaddrs; - delete [] icnt; - delete [] occ; - } - - InstrInfo(const InstrInfo &) = delete; - InstrInfo & operator=(const InstrInfo & right) = delete; - - addr_t* get_iaddrs() { return iaddrs[_mType]; } - int64_t* get_icnt() { return icnt[_mType]; } - int64_t* get_occ() { return occ[_mType]; } - -private: - addr_t (*iaddrs)[NGS] = new addr_t[2][NGS]; - int64_t (*icnt)[NGS] = new int64_t[2][NGS]; - int64_t (*occ)[NGS] = new int64_t[2][NGS]; - - mem_access_type _mType; -}; - -class TraceInfo // Stats -{ -public: - /// TODO: need a reset method to zero out counters - - uint64_t opcodes = 0; - uint64_t opcodes_mem = 0; - uint64_t addrs = 0; - uint64_t other = 0; - int64_t ngs = 0; - int64_t trace_lines = 0; - - bool did_opcode = false; // revist this --------------- - double other_cnt = 0.0; - double gather_score = 0.0; - double gather_occ_avg = 0.0; - double scatter_occ_avg = 0.0; - - uint64_t mcnt = 0; -}; - -class InstrWindow -{ -public: - InstrWindow() { - //init window arrays - for (int w = 0; w < 2; w++) { - for (int i = 0; i < IWINDOW; i++) { - w_iaddrs[w][i] = -1; - w_bytes[w][i] = 0; - w_cnt[w][i] = 0; - for (int j = 0; j < VBYTES; j++) - w_maddr[w][i][j] = -1; + addr_t* get_iaddrs() { return iaddrs[_mType]; } + int64_t* get_icnt() { return icnt[_mType]; } + int64_t* get_occ() { return occ[_mType]; } + + private: + addr_t (*iaddrs)[NGS] = new addr_t[2][NGS]; + int64_t (*icnt)[NGS] = new int64_t[2][NGS]; + int64_t (*occ)[NGS] = new int64_t[2][NGS]; + + mem_access_type _mType; + }; + + class TraceInfo // Stats + { + public: + /// TODO: need a reset method to zero out counters + + uint64_t opcodes = 0; + uint64_t opcodes_mem = 0; + uint64_t addrs = 0; + uint64_t other = 0; + int64_t ngs = 0; + int64_t trace_lines = 0; + + bool did_opcode = false; // revist this --------------- + double other_cnt = 0.0; + double gather_score = 0.0; + double gather_occ_avg = 0.0; + double scatter_occ_avg = 0.0; + + uint64_t mcnt = 0; + }; + + class InstrWindow + { + public: + InstrWindow() { + //init window arrays + for (int w = 0; w < 2; w++) { + for (int i = 0; i < IWINDOW; i++) { + w_iaddrs[w][i] = -1; + w_bytes[w][i] = 0; + w_cnt[w][i] = 0; + for (int j = 0; j < VBYTES; j++) + w_maddr[w][i][j] = -1; + } } } - } - ~InstrWindow() { } + ~InstrWindow() { } - InstrWindow(const InstrWindow &) = delete; - InstrWindow & operator=(const InstrWindow & right) = delete; + InstrWindow(const InstrWindow &) = delete; + InstrWindow & operator=(const InstrWindow & right) = delete; - // moved from static storage to instance variables (watch out for stack overflow) - // Revisit and move to heap if an issue - estimate of 2k*3 + 128k - // First dimension is 0=GATHER/1=SCATTER - int64_t w_iaddrs[2][IWINDOW]; - int64_t w_bytes[2][IWINDOW]; - int64_t w_maddr[2][IWINDOW][VBYTES]; - int64_t w_cnt[2][IWINDOW]; + // moved from static storage to instance variables (watch out for stack overflow) + // Revisit and move to heap if an issue - estimate of 2k*3 + 128k + // First dimension is 0=GATHER/1=SCATTER + int64_t w_iaddrs[2][IWINDOW]; + int64_t w_bytes[2][IWINDOW]; + int64_t w_maddr[2][IWINDOW][VBYTES]; + int64_t w_cnt[2][IWINDOW]; - // State which must be carried with each call to handle a trace - addr_t iaddr; - int64_t maddr_prev; - int64_t maddr; -}; + // State which must be carried with each call to handle a trace + addr_t iaddr; + int64_t maddr_prev; + int64_t maddr; + }; -class MemPatterns -{ -public: - MemPatterns() { } - virtual ~MemPatterns() { }; - - MemPatterns(const MemPatterns &) = delete; - MemPatterns & operator=(const MemPatterns &) = delete; - - virtual void handle_trace_entry(const InstrAddrAdapter & ia) = 0; - virtual void generate_patterns() = 0; - - virtual Metrics & get_metrics(mem_access_type) = 0; - virtual InstrInfo & get_iinfo(mem_access_type) = 0; - - virtual Metrics & get_gather_metrics() = 0; - virtual Metrics & get_scatter_metrics() = 0; - virtual InstrInfo & get_gather_iinfo() = 0; - virtual InstrInfo & get_scatter_iinfo() = 0; - virtual TraceInfo & get_trace_info() = 0; - virtual InstrWindow & get_instr_window() = 0; -}; + class MemPatterns + { + public: + MemPatterns() { } + virtual ~MemPatterns() { }; + + MemPatterns(const MemPatterns &) = delete; + MemPatterns & operator=(const MemPatterns &) = delete; + + virtual void handle_trace_entry(const InstrAddrAdapter & ia) = 0; + virtual void generate_patterns() = 0; + + virtual Metrics & get_metrics(mem_access_type) = 0; + virtual InstrInfo & get_iinfo(mem_access_type) = 0; + + virtual Metrics & get_gather_metrics() = 0; + virtual Metrics & get_scatter_metrics() = 0; + virtual InstrInfo & get_gather_iinfo() = 0; + virtual InstrInfo & get_scatter_iinfo() = 0; + virtual TraceInfo & get_trace_info() = 0; + virtual InstrWindow & get_instr_window() = 0; + }; + +} // namespace gs_patterns diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 96123f2..df3c7da 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -8,550 +8,562 @@ #include "utils.h" #include "gs_patterns.h" -void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { +namespace gs_patterns +{ +namespace gs_patterns_core +{ + using namespace gs_patterns; - int i = 0; - int ntranslated = 0; - char path[MAX_LINE_LENGTH]; - char cmd[MAX_LINE_LENGTH]; - FILE *fp; + void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { - sprintf(cmd, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); + int i = 0; + int ntranslated = 0; + char path[MAX_LINE_LENGTH]; + char cmd[MAX_LINE_LENGTH]; + FILE *fp; - /* Open the command for reading. */ - fp = popen(cmd, "r"); - if (NULL == fp) { - throw GSError("Failed to run command"); - } + sprintf(cmd, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); - /* Read the output a line at a time - output it. */ - while (fgets(path, sizeof(path), fp) != NULL) { - strcpy(source_line, path); - source_line[strcspn(source_line, "\n")] = 0; - } + /* Open the command for reading. */ + fp = popen(cmd, "r"); + if (NULL == fp) { + throw GSError("Failed to run command"); + } - /* close */ - pclose(fp); + /* Read the output a line at a time - output it. */ + while (fgets(path, sizeof(path), fp) != NULL) { + strcpy(source_line, path); + source_line[strcspn(source_line, "\n")] = 0; + } - return; -} + /* close */ + pclose(fp); + return; + } -static void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) -{ - int i = 0; - int j = 0; - //Create stride histogram and create spatter - int sidx; - int unique_strides; - int64_t idx, pidx; - int64_t n_stride[1027]; - double outbounds; + static void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) + { + int i = 0; + int j = 0; - if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); + //Create stride histogram and create spatter + int sidx; + int unique_strides; + int64_t idx, pidx; + int64_t n_stride[1027]; + double outbounds; - if (first_spatter) printf("\n"); + if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); - printf("\n"); - for (i = 0; i < target_metrics.ntop; i++) { - printf("***************************************************************************************\n"); + if (first_spatter) printf("\n"); - unique_strides = 0; - for (j = 0; j < 1027; j++) - n_stride[j] = 0; + printf("\n"); + for (i = 0; i < target_metrics.ntop; i++) { + printf("***************************************************************************************\n"); - for (j = 1; j < target_metrics.offset[i]; j++) { - sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513; - sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > 1025) ? 1026 : sidx; - n_stride[sidx]++; - } + unique_strides = 0; + for (j = 0; j < 1027; j++) + n_stride[j] = 0; - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - unique_strides++; + for (j = 1; j < target_metrics.offset[i]; j++) { + sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > 1025) ? 1026 : sidx; + n_stride[sidx]++; } - } - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } - //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (1) { + outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; - //create a binary file - FILE *fp_bin; - std::string bin_name = file_prefix + ".sbin"; - printf("%s\n", bin_name.c_str()); - fp_bin = fopen(bin_name.c_str(), "w"); - if (NULL == fp_bin) { - throw GSFileError("Could not open " + std::string(bin_name) + "!"); - } + //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ + if (1) { - printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); - printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); - printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(), - '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%'); - printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]); - - int64_t nlcnt = 0; - for (j = 0; j < target_metrics.offset[i]; j++) { - - if (j < 39) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (0 == (++nlcnt % 13)) - printf("\n"); - - } else if (j >= (target_metrics.offset[i] - 39)) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (0 == (++nlcnt % 13)) - printf("\n"); - - } else if (39 == j) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); + //create a binary file + FILE *fp_bin; + std::string bin_name = file_prefix + ".sbin"; + printf("%s\n", bin_name.c_str()); + fp_bin = fopen(bin_name.c_str(), "w"); + if (NULL == fp_bin) { + throw GSFileError("Could not open " + std::string(bin_name) + "!"); + } - for (j = 0; j < 1027; j++) { - if (n_stride[j] > 0) { - if (0 == j) - printf("%6s: %ld\n", "< -512", n_stride[j]); - else if (1026 == j) - printf("%6s: %ld\n", "> 512", n_stride[j]); - else - printf("%6d: %ld\n", j - 513, n_stride[j]); + printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); + printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); + printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(), + '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%'); + printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]); + + int64_t nlcnt = 0; + for (j = 0; j < target_metrics.offset[i]; j++) { + + if (j < 39) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (0 == (++nlcnt % 13)) + printf("\n"); + + } else if (j >= (target_metrics.offset[i] - 39)) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (0 == (++nlcnt % 13)) + printf("\n"); + + } else if (39 == j) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + + for (j = 0; j < 1027; j++) { + if (n_stride[j] > 0) { + if (0 == j) + printf("%6s: %ld\n", "< -512", n_stride[j]); + else if (1026 == j) + printf("%6s: %ld\n", "> 512", n_stride[j]); + else + printf("%6d: %ld\n", j - 513, n_stride[j]); + } } - } - if (first_spatter) { - first_spatter = false; - fprintf(fp, " {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); - } else { - fprintf(fp, ",\n {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); - } + if (first_spatter) { + first_spatter = false; + fprintf(fp, " {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); + } else { + fprintf(fp, ",\n {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); + } - fwrite(target_metrics.patterns[i], sizeof(uint64_t), target_metrics.offset[i], fp_bin); - fclose(fp_bin); + fwrite(target_metrics.patterns[i], sizeof(uint64_t), target_metrics.offset[i], fp_bin); + fclose(fp_bin); - for (j = 0; j < target_metrics.offset[i] - 1; j++) - fprintf(fp, "%ld,", target_metrics.patterns[i][j]); - fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]); - fprintf(fp, "], \"count\":1}"); + for (j = 0; j < target_metrics.offset[i] - 1; j++) + fprintf(fp, "%ld,", target_metrics.patterns[i][j]); + fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]); + fprintf(fp, "], \"count\":1}"); - fprintf(fp2, "%s,%s,%ld,%6.3f\n", - target_metrics.get_srcline()[target_metrics.top_idx[i]], target_metrics.getShortName().c_str(), - (long int)target_metrics.offset[i], - 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt); + fprintf(fp2, "%s,%s,%ld,%6.3f\n", + target_metrics.get_srcline()[target_metrics.top_idx[i]], target_metrics.getShortName().c_str(), + (long int)target_metrics.offset[i], + 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt); + } + printf("***************************************************************************************\n\n"); } - printf("***************************************************************************************\n\n"); } -} -void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) -{ - // Create spatter file - FILE *fp, *fp2; + void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) + { + // Create spatter file + FILE *fp, *fp2; - if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); + if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); - std::string json_name = file_prefix + ".json"; - fp = fopen(json_name.c_str(), "w"); - if (NULL == fp) { - throw GSFileError("Could not open " + json_name + "!"); - } + std::string json_name = file_prefix + ".json"; + fp = fopen(json_name.c_str(), "w"); + if (NULL == fp) { + throw GSFileError("Could not open " + json_name + "!"); + } - std::string gs_info = file_prefix + ".txt"; - fp2 = fopen(gs_info.c_str(), "w"); - if (NULL == fp2) { - throw GSFileError("Could not open " + gs_info + "!"); - } + std::string gs_info = file_prefix + ".txt"; + fp2 = fopen(gs_info.c_str(), "w"); + if (NULL == fp2) { + throw GSFileError("Could not open " + gs_info + "!"); + } - //Header - fprintf(fp, "[ "); - fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); + //Header + fprintf(fp, "[ "); + fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - bool first_spatter = true; - create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); + bool first_spatter = true; + create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); - create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); - //Footer - fprintf(fp, " ]"); - fclose(fp); - fclose(fp2); -} + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); + } -void normalize_stats(Metrics & target_metrics) -{ - //Normalize - int64_t smallest; - for (int i = 0; i < target_metrics.ntop; i++) { - - //Find smallest - smallest = 0; - for (int j = 0; j < target_metrics.offset[i]; j++) { - if (target_metrics.patterns[i][j] < smallest) - smallest = target_metrics.patterns[i][j]; - } + void normalize_stats(Metrics & target_metrics) + { + //Normalize + int64_t smallest; + for (int i = 0; i < target_metrics.ntop; i++) { + + //Find smallest + smallest = 0; + for (int j = 0; j < target_metrics.offset[i]; j++) { + if (target_metrics.patterns[i][j] < smallest) + smallest = target_metrics.patterns[i][j]; + } - smallest *= -1; + smallest *= -1; - //Normalize - for (int j = 0; j < target_metrics.offset[i]; j++) { - target_metrics.patterns[i][j] += smallest; + //Normalize + for (int j = 0; j < target_metrics.offset[i]; j++) { + target_metrics.patterns[i][j] += smallest; + } } } -} - -void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) -{ - int i, j, k, w = 0; - int w_rw_idx; // Index into instruction window first dimension (RW: 0=Gather(R) or 1=Scatter(W)) - int w_idx; - int gs; - - auto & trace_info = mp.get_trace_info(); - auto & gather_iinfo = mp.get_gather_iinfo(); - auto & scatter_iinfo = mp.get_scatter_iinfo(); - auto & gather_metrics = mp.get_gather_metrics(); - auto & scatter_metrics = mp.get_scatter_metrics(); - auto & iw = mp.get_instr_window(); - - if (!ia.is_valid()) { - std::ostringstream os; - os << "Invalid " << ia; - throw GSDataError(os.str()); - } - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (ia.is_other_instr()) { - - iw.iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() + void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) + { + int i, j, k, w = 0; + int w_rw_idx; // Index into instruction window first dimension (RW: 0=Gather(R) or 1=Scatter(W)) + int w_idx; + int gs; + + auto & trace_info = mp.get_trace_info(); + auto & gather_iinfo = mp.get_gather_iinfo(); + auto & scatter_iinfo = mp.get_scatter_iinfo(); + auto & gather_metrics = mp.get_gather_metrics(); + auto & scatter_metrics = mp.get_scatter_metrics(); + auto & iw = mp.get_instr_window(); + + if (!ia.is_valid()) { + std::ostringstream os; + os << "Invalid " << ia; + throw GSDataError(os.str()); + } - //nops - trace_info.opcodes++; - trace_info.did_opcode = true; + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (ia.is_other_instr()) { - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if (ia.is_mem_instr()) { + iw.iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() - if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() == ia.get_address()) { - iw.iaddr = ia.get_iaddr(); + //nops trace_info.opcodes++; trace_info.did_opcode = true; - } - - w_rw_idx = ia.get_type(); - //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iw.iaddr, ia.get_address(), ia.get_address() % 64, ia.get_size()); + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if (ia.is_mem_instr()) { - if ((++trace_info.mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } + if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() == ia.get_address()) { + iw.iaddr = ia.get_iaddr(); + trace_info.opcodes++; + trace_info.did_opcode = true; + } - //is iaddr in window - w_idx = -1; - for (i = 0; i < IWINDOW; i++) { + w_rw_idx = ia.get_type(); - //new iaddr - if (iw.w_iaddrs[w_rw_idx][i] == -1) { - w_idx = i; - break; + //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", + // iw.iaddr, ia.get_address(), ia.get_address() % 64, ia.get_size()); - //iaddr exists - } else if (iw.w_iaddrs[w_rw_idx][i] == iw.iaddr) { - w_idx = i; + if ((++trace_info.mcnt % PERSAMPLE) == 0) { + #if SAMPLE break; + #endif + printf("."); + fflush(stdout); } - } - //new window - if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= ia.min_size()) || // was >= VBYTES - (iw.w_cnt[w_rw_idx][w_idx] >= ia.min_size())) { // was >= VBYTES + //is iaddr in window + w_idx = -1; + for (i = 0; i < IWINDOW; i++) { - /***************************/ - //do analysis - /***************************/ - //i = each window - for (w = 0; w < 2; w++) { // 2 + //new iaddr + if (iw.w_iaddrs[w_rw_idx][i] == -1) { + w_idx = i; + break; - for (i = 0; i < IWINDOW; i++) { // 1024 + //iaddr exists + } else if (iw.w_iaddrs[w_rw_idx][i] == iw.iaddr) { + w_idx = i; + break; + } + } - if (iw.w_iaddrs[w][i] == -1) - break; + //new window + if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= ia.min_size()) || // was >= VBYTES + (iw.w_cnt[w_rw_idx][w_idx] >= ia.min_size())) { // was >= VBYTES - int byte = iw.w_bytes[w][i] / iw.w_cnt[w][i]; + /***************************/ + //do analysis + /***************************/ + //i = each window + for (w = 0; w < 2; w++) { // 2 - //First pass - //Determine - //gather/scatter? - gs = -1; - for (j = 0; j < iw.w_cnt[w][i]; j++) { + for (i = 0; i < IWINDOW; i++) { // 1024 - //address and cl - iw.maddr = iw.w_maddr[w][i][j]; - assert(iw.maddr > -1); + if (iw.w_iaddrs[w][i] == -1) + break; - //previous addr - if (j == 0) - iw.maddr_prev = iw.maddr - 1; + int byte = iw.w_bytes[w][i] / iw.w_cnt[w][i]; - //gather / scatter - if (iw.maddr != iw.maddr_prev) { - if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) // ? > 1 stride (non-contiguous) <-------------------- - gs = w; - } - iw.maddr_prev = iw.maddr; - } + //First pass + //Determine + //gather/scatter? + gs = -1; + for (j = 0; j < iw.w_cnt[w][i]; j++) { - // Update other_cnt - if (gs == -1) trace_info.other_cnt += iw.w_cnt[w][i]; + //address and cl + iw.maddr = iw.w_maddr[w][i][j]; + assert(iw.maddr > -1); - // GATHER or SCATTER handling - if (gs == 0 || gs == 1) { - InstrInfo & target_iinfo = (gs == 0) ? gather_iinfo : scatter_iinfo; + //previous addr + if (j == 0) + iw.maddr_prev = iw.maddr - 1; - if (gs == 0) { - trace_info.gather_occ_avg += iw.w_cnt[w][i]; - gather_metrics.cnt += 1.0; - } - else { - trace_info.scatter_occ_avg += iw.w_cnt[w][i]; - scatter_metrics.cnt += 1.0; + //gather / scatter + if (iw.maddr != iw.maddr_prev) { + if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) // ? > 1 stride (non-contiguous) <-------------------- + gs = w; + } + iw.maddr_prev = iw.maddr; } - for (k = 0; k < NGS; k++) { - if (target_iinfo.get_iaddrs()[k] == 0) { - target_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; - (target_iinfo.get_icnt()[k])++; - target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; + // Update other_cnt + if (gs == -1) trace_info.other_cnt += iw.w_cnt[w][i]; + + // GATHER or SCATTER handling + if (gs == 0 || gs == 1) { + InstrInfo & target_iinfo = (gs == 0) ? gather_iinfo : scatter_iinfo; + + if (gs == 0) { + trace_info.gather_occ_avg += iw.w_cnt[w][i]; + gather_metrics.cnt += 1.0; + } + else { + trace_info.scatter_occ_avg += iw.w_cnt[w][i]; + scatter_metrics.cnt += 1.0; } - if (target_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { - (target_iinfo.get_icnt()[k])++; - target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; + for (k = 0; k < NGS; k++) { + if (target_iinfo.get_iaddrs()[k] == 0) { + target_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; + (target_iinfo.get_icnt()[k])++; + target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } + + if (target_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { + (target_iinfo.get_icnt()[k])++; + target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; + break; + } } } - } - } //WINDOW i + } //WINDOW i - w_idx = 0; + w_idx = 0; - //reset windows - for (i = 0; i < IWINDOW; i++) { - iw.w_iaddrs[w][i] = -1; - iw.w_bytes[w][i] = 0; - iw.w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - iw.w_maddr[w][i][j] = -1; - } - } // rw w - } //analysis + //reset windows + for (i = 0; i < IWINDOW; i++) { + iw.w_iaddrs[w][i] = -1; + iw.w_bytes[w][i] = 0; + iw.w_cnt[w][i] = 0; + for (j = 0; j < VBYTES; j++) + iw.w_maddr[w][i][j] = -1; + } + } // rw w + } //analysis - //Set window values - iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; - iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_maddr(); - iw.w_bytes[w_rw_idx][w_idx] += ia.get_size(); + //Set window values + iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; + iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_maddr(); + iw.w_bytes[w_rw_idx][w_idx] += ia.get_size(); - //num access per iaddr in loop - iw.w_cnt[w_rw_idx][w_idx]++; + //num access per iaddr in loop + iw.w_cnt[w_rw_idx][w_idx]++; - if (trace_info.did_opcode) { + if (trace_info.did_opcode) { - trace_info.opcodes_mem++; - trace_info.addrs++; - trace_info.did_opcode = false; + trace_info.opcodes_mem++; + trace_info.addrs++; + trace_info.did_opcode = false; + } else { + trace_info.addrs++; + } + + /***********************/ + /** SOMETHING ELSE **/ + /***********************/ } else { - trace_info.addrs++; + trace_info.other++; } - /***********************/ - /** SOMETHING ELSE **/ - /***********************/ - } else { - trace_info.other++; + trace_info.trace_lines++; } - trace_info.trace_lines++; -} + void display_stats(MemPatterns & mp) + { + printf("\n RESULTS \n"); + + printf("DRTRACE STATS\n"); + printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().trace_lines); + printf("OPCODES: %16lu\n", mp.get_trace_info().opcodes); + printf("MEMOPCODES: %16lu\n", mp.get_trace_info().opcodes_mem); + printf("LOAD/STORES: %16lu\n", mp.get_trace_info().addrs); + printf("OTHER: %16lu\n", mp.get_trace_info().other); + + printf("\n"); + + printf("GATHER/SCATTER STATS: \n"); + printf("LOADS per GATHER: %16.3f\n", mp.get_trace_info().gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", mp.get_trace_info().scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(mp.get_gather_metrics().cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(mp.get_scatter_metrics().cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(mp.get_trace_info().other_cnt) / log(2.0)); + } -void display_stats(MemPatterns & mp) -{ - printf("\n RESULTS \n"); - - printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().trace_lines); - printf("OPCODES: %16lu\n", mp.get_trace_info().opcodes); - printf("MEMOPCODES: %16lu\n", mp.get_trace_info().opcodes_mem); - printf("LOAD/STORES: %16lu\n", mp.get_trace_info().addrs); - printf("OTHER: %16lu\n", mp.get_trace_info().other); - - printf("\n"); - - printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", mp.get_trace_info().gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", mp.get_trace_info().scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(mp.get_gather_metrics().cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(mp.get_scatter_metrics().cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(mp.get_trace_info().other_cnt) / log(2.0)); -} + int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) + { + int target_ntop = 0; + int bestcnt; -int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) -{ - int target_ntop = 0; - int bestcnt; + for (int j = 0; j < NTOP; j++) { - for (int j = 0; j < NTOP; j++) { + int bestcnt = 0; + addr_t best_iaddr = 0; + int bestidx = -1; - int bestcnt = 0; - addr_t best_iaddr = 0; - int bestidx = -1; + for (int k = 0; k < NGS; k++) { - for (int k = 0; k < NGS; k++) { + if (target_iinfo.get_icnt()[k] == 0) + continue; - if (target_iinfo.get_icnt()[k] == 0) - continue; + if (target_iinfo.get_iaddrs()[k] == 0) { + break; + } - if (target_iinfo.get_iaddrs()[k] == 0) { - break; + if (target_iinfo.get_icnt()[k] > bestcnt) { + bestcnt = target_iinfo.get_icnt()[k]; + best_iaddr = target_iinfo.get_iaddrs()[k]; + bestidx = k; + } } - if (target_iinfo.get_icnt()[k] > bestcnt) { - bestcnt = target_iinfo.get_icnt()[k]; - best_iaddr = target_iinfo.get_iaddrs()[k]; - bestidx = k; + if (best_iaddr == 0) { + break; + } else { + target_ntop++; + target_metrics.top[j] = best_iaddr; + target_metrics.top_idx[j] = bestidx; + target_metrics.tot[j] = target_iinfo.get_icnt()[bestidx]; + target_iinfo.get_icnt()[bestidx] = 0; + + //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); } } - if (best_iaddr == 0) { - break; - } else { - target_ntop++; - target_metrics.top[j] = best_iaddr; - target_metrics.top_idx[j] = bestidx; - target_metrics.tot[j] = target_iinfo.get_icnt()[bestidx]; - target_iinfo.get_icnt()[bestidx] = 0; - - //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); - } + return target_ntop; } - return target_ntop; -} + bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, + Metrics & gather_metrics, Metrics & scatter_metrics, + addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, + addr_t * gather_base, addr_t * scatter_base) + { + int iret = 0; + int i = 0; + + bool breakout = false; + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (!ia.is_valid()) { + std::ostringstream os; + os << "Invalid " << ia; + throw GSDataError(os.str()); + } -bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, - Metrics & gather_metrics, Metrics & scatter_metrics, - addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, - addr_t * gather_base, addr_t * scatter_base) -{ - int iret = 0; - int i = 0; - - bool breakout = false; - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (!ia.is_valid()) { - std::ostringstream os; - os << "Invalid " << ia; - throw GSDataError(os.str()); - } + if (ia.is_other_instr()) { + iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() - if (ia.is_other_instr()) { - iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } + else if (ia.is_mem_instr()) { - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } - else if (ia.is_mem_instr()) { + maddr = ia.get_maddr(); - maddr = ia.get_maddr(); + if (CTA == ia.get_mem_instr_type() && ia.get_address() == ia.get_iaddr()) { + iaddr = ia.get_iaddr(); + } - if (CTA == ia.get_mem_instr_type() && ia.get_address() == ia.get_iaddr()) { - iaddr = ia.get_iaddr(); - } + if ((++mcnt % PERSAMPLE) == 0) { + #if SAMPLE + break; + #endif + printf("."); + fflush(stdout); + } - if ((++mcnt % PERSAMPLE) == 0) { -#if SAMPLE - break; -#endif - printf("."); - fflush(stdout); - } + // gather ? + if (GATHER == ia.get_mem_access_type()) { - // gather ? - if (GATHER == ia.get_mem_access_type()) { + for (i = 0; i < gather_metrics.ntop; i++) { - for (i = 0; i < gather_metrics.ntop; i++) { + //found it + if (iaddr == gather_metrics.top[i]) { - //found it - if (iaddr == gather_metrics.top[i]) { + if (gather_base[i] == 0) + gather_base[i] = maddr; - if (gather_base[i] == 0) - gather_base[i] = maddr; + //Add index + if (gather_metrics.offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = true; + } + //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); + gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); - //Add index - if (gather_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; + break; } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); - gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); - - break; } } - } - // scatter ? - else if (SCATTER == ia.get_mem_access_type()) { + // scatter ? + else if (SCATTER == ia.get_mem_access_type()) { - for (i = 0; i < scatter_metrics.ntop; i++) { + for (i = 0; i < scatter_metrics.ntop; i++) { - //found it - if (iaddr == scatter_metrics.top[i]) { + //found it + if (iaddr == scatter_metrics.top[i]) { - //set base - if (scatter_base[i] == 0) - scatter_base[i] = maddr; + //set base + if (scatter_base[i] == 0) + scatter_base[i] = maddr; - //Add index - if (scatter_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; + //Add index + if (scatter_metrics.offset[i] >= PSIZE) { + printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); + breakout = true; + } + scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); + break; } - scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); - break; } } - } - else { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo - throw GSDataError("Unknown Memory Access Type: " + ia.get_mem_access_type()); - } - } // MEM + else { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo + throw GSDataError("Unknown Memory Access Type: " + ia.get_mem_access_type()); + } + } // MEM - return breakout; -} + return breakout; + } + +} // namespace gs_patterns_core -std::ostream & operator<<(std::ostream & os, const InstrAddrAdapter & ia) +std::ostream & operator<<(std::ostream & os, const gs_patterns::InstrAddrAdapter & ia) { ia.output(os); return os; } + +} //namespace gs_patterns + + diff --git a/gs_patterns_core.h b/gs_patterns_core.h index b833e23..182f744 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -6,25 +6,33 @@ #include "gs_patterns.h" -void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); +namespace gs_patterns +{ +namespace gs_patterns_core +{ + void translate_iaddr(const std::string &binary, char *source_line, addr_t iaddr); -void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia); + void handle_trace_entry(MemPatterns &mp, const InstrAddrAdapter &ia); -void display_stats(MemPatterns & mp); + void display_stats(MemPatterns &mp); -int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); + int get_top_target(InstrInfo &target_iinfo, Metrics &target_metrics); -void normalize_stats(Metrics & target_metrics); + void normalize_stats(Metrics &target_metrics); -bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, - Metrics & gather_metrics, Metrics & scatter_metrics, - addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, - addr_t * gather_base, addr_t * scatter_base); + bool handle_2nd_pass_trace_entry(const InstrAddrAdapter &ia, + Metrics &gather_metrics, Metrics &scatter_metrics, + addr_t &iaddr, int64_t &maddr, uint64_t &mcnt, + addr_t *gather_base, addr_t *scatter_base); -void create_metrics_file(FILE * fp, - FILE * fp2, - const std::string & file_prefix, - Metrics & target_metrics, - bool & first_spatter); + void create_metrics_file(FILE *fp, + FILE *fp2, + const std::string &file_prefix, + Metrics &target_metrics, + bool &first_spatter); -void create_spatter_file(MemPatterns & mp, const std::string & file_prefix); + void create_spatter_file(MemPatterns &mp, const std::string &file_prefix); + +} // gs_patterns_core + +} // gs_patterns diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp new file mode 100644 index 0000000..f0fe5f6 --- /dev/null +++ b/gs_patterns_main.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include + +#include "gs_patterns.h" +#include "gs_patterns_core.h" +#include "gspin_patterns.h" +#include "gsnv_patterns.h" +#include "utils.h" + +#define NVGS_CONFIG_FILE "NVGS_CONFIG_FILE" + +using namespace gs_patterns; +using namespace gs_patterns::gs_patterns_core; +using namespace gs_patterns::gsnv_patterns; +using namespace gs_patterns::gspin_patterns; + +void usage (const std::string & prog_name) +{ + std::cerr << "Usage: " << prog_name << " | " + prog_name + " [-nv]" << std::endl; +} + +int main(int argc, char **argv) +{ + try + { + bool use_gs_nv = false; + for (int i = 0; i < argc; i++) { + if (std::string(argv[i]) == "-nv") { + use_gs_nv = true; + } + } + + size_t pos = std::string(argv[0]).find_last_of("/"); + std::string prog_name = std::string(argv[0]).substr(pos+1); + + std::unique_ptr mp (use_gs_nv ? (MemPatterns *) new MemPatternsForNV : (MemPatterns *) new MemPatternsForPin); + + if (use_gs_nv) + { + if (argc != 3) { + usage(prog_name); + throw GSError("Invalid program arguments"); + } + + MemPatternsForNV mp; + + // nvbit trace file with memory access traces + mp.set_trace_file(argv[1]); + + const char * config_file = std::getenv(NVGS_CONFIG_FILE); + if (config_file) { + mp.set_config_file(config_file); + } + + // File to save nvbit memory accessses to + //mp.set_trace_out_file(mp.get_file_prefix() + ".nvbit.bin"); + + // ----------------- Process Traces ----------------- + + mp.process_traces(); + + mp.write_trace_out_file(); + + // ----------------- Generate Patterns ----------------- + + mp.generate_patterns(); + } + else + { + if (argc != 3) { + usage(prog_name); + throw GSError("Invalid program arguments"); + } + + MemPatternsForPin mp; + + mp.set_trace_file(argv[1]); + mp.set_binary_file(argv[2]); + + // ----------------- Process Traces ----------------- + + mp.process_traces(); + + // ----------------- Generate Patterns ----------------- + + mp.generate_patterns(); + } + } + catch (const GSFileError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + catch (const GSAllocError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + catch (const GSDataError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(1); + } + catch (const GSError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(1); + } + catch (const std::exception & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + exit(-1); + } + + return 0; +} diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp new file mode 100644 index 0000000..c4cca4c --- /dev/null +++ b/gsnv_patterns.cpp @@ -0,0 +1,801 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gs_patterns.h" +#include "gs_patterns_core.h" +#include "gsnv_patterns.h" +#include "utils.h" +#include "nvbit_tracing/gsnv_trace/common.h" + +// Enable to use a vector for storing trace data for use by second pass (if not defined data is stored to a temp file +//#define USE_VECTOR_FOR_SECOND_PASS 1 + +#define HEX(x) \ + "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ + << std::dec + +namespace gs_patterns +{ +namespace gsnv_patterns +{ + +using namespace gs_patterns::gs_patterns_core; + +int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_header_t); + //first read + if (NULL == *p_val) { + *edx = gzread(fp, val, sizeof(trace_header_t)); + *p_val = val; + } + else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_header_t)); + *p_val = val; + } + + if (0 == *edx) + return 0; + + return 1; +} + +int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(trace_map_entry_t); + //first read + if (NULL == *p_val) { + *edx = gzread(fp, val, sizeof(trace_map_entry_t)); + *p_val = val; + } + else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(trace_map_entry_t)); + *p_val = val; + } + + if (0 == *edx) + return 0; + + return 1; +} + +int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { + + int idx; + + idx = (*edx) / sizeof(mem_access_t); + //first read + if (NULL == *p_val) { + *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); + *p_val = val; + + } else if (*p_val == &val[idx]) { + *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); + *p_val = val; + } + + if (0 == *edx) + return 0; + + return 1; +} + +Metrics & MemPatternsForNV::get_metrics(mem_access_type m) +{ + switch (m) + { + case GATHER : return _metrics.first; + break; + case SCATTER : return _metrics.second; + break; + default: + throw GSError("Unable to get Metrics - Invalid Metrics Type: " + m); + } +} + +InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) +{ + switch (m) + { + case GATHER : return _iinfo.first; + break; + case SCATTER : return _iinfo.second; + break; + default: + throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + m); + } +} + +void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) +{ + // Call libgs_patterns + gs_patterns_core::handle_trace_entry(*this, ia); + + const InstrAddrAdapterForNV &ianv = dynamic_cast (ia); +#ifdef USE_VECTOR_FOR_SECOND_PASS + _traces.push_back(ianv); +#else + if (std::fwrite(reinterpret_cast(&ianv.get_trace_entry()), sizeof(trace_entry_t), 1, _tmp_dump_file) != 1) + { + throw GSFileError("Write of trace to temp file failed"); + } +#endif +} + +void MemPatternsForNV::generate_patterns() +{ + // ----------------- Update Source Lines ----------------- + + update_source_lines(); + + // ----------------- Update Metrics ----------------- + + update_metrics(); + + // ----------------- Create Spatter File ----------------- + + create_spatter_file(*this, get_file_prefix()); + +} + +void MemPatternsForNV::update_metrics() +{ + // Get top gathers + get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); + + // Get top scatters + get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics()); + + // ----------------- Second Pass ----------------- + + process_second_pass(); + + // ----------------- Normalize ----------------- + + normalize_stats(get_gather_metrics()); + normalize_stats(get_scatter_metrics()); +} + +std::string MemPatternsForNV::get_file_prefix() +{ + if (!_file_prefix.empty()) return _file_prefix; + + // If no file_prefix was set try extracting one from trace_file + std::string prefix = _trace_file_name; + size_t pos = std::string::npos; + while (std::string::npos != (pos = prefix.find(".gz"))) + { + prefix.replace(pos, 3, ""); + } + return prefix; +} + +// Store opcode mappings +bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) { + auto it = _id_to_opcode_map.find(opcode_id); + if (it == _id_to_opcode_map.end()) { + _id_to_opcode_map[opcode_id] = opcode; + //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; + return true; + } + return false; +} + +// Retrieve opcode mapping by opcode_id +const std::string & MemPatternsForNV::get_opcode(int opcode_id) { + auto result = _id_to_opcode_map.find(opcode_id); + if (result != _id_to_opcode_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown opcode_id: " << opcode_id; + throw GSDataError(ss.str()); +} + +// Store opcode_short mappings +bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { + auto it = _id_to_opcode_short_map.find(opcode_short_id); + if (it == _id_to_opcode_short_map.end()) { + _id_to_opcode_short_map[opcode_short_id] = opcode_short; + //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; + return true; + } + return false; +} + +// Retrieve opcode_short mapping by opcode_short_id +const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { + auto result = _id_to_opcode_short_map.find(opcode_short_id); + if (result != _id_to_opcode_short_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown opcode_short_id: " << opcode_short_id; + throw GSDataError(ss.str()); +} + +// Store line mappings +bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) { + auto it = _id_to_line_map.find(line_id); + if (it == _id_to_line_map.end()) { + _id_to_line_map[line_id] = line; + //std::cout << "LINE: " << line_id << " -> " << line << std::endl; + return true; + } + return false; +} + +// Retrieve line number mapping by line_id +const std::string & MemPatternsForNV::get_line(int line_id) { + auto result = _id_to_line_map.find(line_id); + if (result != _id_to_line_map.end()) { + return result->second; + } + std::stringstream ss; + ss << "Unknown line_id: " << line_id; + throw GSDataError(ss.str()); +} + +/* + * Read traces from a nvbit trace file. Includes header which describes opcode mappings used in trace data. + * Used by test runner (gsnv_test) to simulate nvbit execution. + */ +void MemPatternsForNV::process_traces() +{ + int iret = 0; + mem_access_t * t_line; + InstrWindow iw; + + gzFile fp_trace; + try + { + fp_trace = open_trace_file(get_trace_file_name()); + } + catch (const std::runtime_error & ex) + { + throw GSFileError(ex.what()); + } + + // Read header ** + trace_header_t * p_header = NULL; + trace_header_t header[1]; + tline_read_header(fp_trace, header, &p_header, &iret); + + uint32_t count = 0; + trace_map_entry_t * p_map_entry = NULL; + trace_map_entry_t map_entry[1]; + while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) + { + std::cout << "MAP: " << p_map_entry -> map_name << " entry [" << p_map_entry->id << "] -> [" << p_map_entry->val << "]" << std::endl; + + if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { + _id_to_opcode_map[p_map_entry->id] = p_map_entry->val; + } + else if (std::string(p_map_entry->map_name) == ID_TO_OPCODE_SHORT) { + _id_to_opcode_short_map[p_map_entry->id] = p_map_entry->val; + } + else if (std::string(p_map_entry->map_name) == ID_TO_LINE) { + _id_to_line_map[p_map_entry->id] = p_map_entry->val; + } + else { + std::cerr << "Unsupported Map: " << p_map_entry->map_name << " found in trace, ignoring ..." + << p_map_entry->id << " -> " << p_map_entry->val << std::endl; + } + + count++; + p_map_entry++; + } + + // Read Traces ** + iret = 0; + uint64_t lines_read = 0; + uint64_t pos = 0; + mem_access_t * p_trace = NULL; + mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) + while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) + { + // Decode trace + t_line = p_trace; + + if (-1 == t_line->cta_id_x) { continue; } + + try + { + // Progress bar + if (lines_read == 0) { + for (int i = 0; i < 100; i++) { std::cout << "-"; } + std::cout << std::endl; + } + if (lines_read % ((uint64_t) std::max((p_header->total_traces * .01), 1.0)) == 0) { + if ((pos % 20) == 0) { std::cout << "|"; } + else { std::cout << "+"; } + std::flush(std::cout); + pos++; + } + + handle_cta_memory_access(t_line); + + p_trace++; + lines_read++; + } + catch (const GSError & ex) { + std::cerr << "ERROR: " << ex.what() << std::endl; + close_trace_file(fp_trace); + throw; + } + } + + std::cout << "\nLines Read: " << lines_read << " of Total: " << p_header->total_traces << std::endl; + + close_trace_file(fp_trace); + + //metrics + get_trace_info().gather_occ_avg /= get_gather_metrics().cnt; + get_trace_info().scatter_occ_avg /= get_scatter_metrics().cnt; + + display_stats(*this); + +} + +void MemPatternsForNV::update_source_lines() +{ + // Find source lines for gathers - Must have symbol + printf("\nSymbol table lookup for gathers..."); + fflush(stdout); + + get_gather_metrics().cnt = update_source_lines_from_binary(GATHER); + + // Find source lines for scatters + printf("Symbol table lookup for scatters..."); + fflush(stdout); + + get_scatter_metrics().cnt = update_source_lines_from_binary(SCATTER); +} + +double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) +{ + double target_cnt = 0.0; + + InstrInfo & target_iinfo = get_iinfo(mType); + Metrics & target_metrics = get_metrics(mType); + + for (int k = 0; k < NGS; k++) { + + if (0 == target_iinfo.get_iaddrs()[k]) { + break; + } + + std::string line; + line = addr_to_line(target_iinfo.get_iaddrs()[k]); + strncpy(target_metrics.get_srcline()[k], line.c_str(), MAX_LINE_LENGTH-1); + + if (std::string(target_metrics.get_srcline()[k]).empty()) + target_iinfo.get_icnt()[k] = 0; + + target_cnt += target_iinfo.get_icnt()[k]; + } + printf("done.\n"); + + return target_cnt; + +} + +void MemPatternsForNV::process_second_pass() +{ + uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. + int iret = 0; + + // State carried thru + addr_t iaddr; + int64_t maddr; + addr_t gather_base[NTOP] = {0}; + addr_t scatter_base[NTOP] = {0}; + + bool breakout = false; + printf("\nSecond pass to fill gather / scatter subtraces\n"); + fflush(stdout); + +#ifdef USE_VECTOR_FOR_SECOND_PASS + for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) + { + InstrAddrAdapter & ia = *itr; + + breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); + if (breakout) { + break; + } + } +#else + std::fflush(_tmp_dump_file); + std::rewind(_tmp_dump_file); // Back to the future, ... sort of + try + { + trace_entry_t ta[TRACE_BUFFER_LENGTH]; + size_t count_read = 0; + size_t read; + while ( !breakout && (read = std::fread(&ta, sizeof (ta[0]), TRACE_BUFFER_LENGTH, _tmp_dump_file)) ) + { + for (int i = 0; i < read; i++) + { + InstrAddrAdapterForNV ia(const_cast(ta[i])); + breakout = handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), + iaddr, maddr, mcnt, gather_base, scatter_base); + count_read++; + + if (breakout) break; + } + } + std::cout << "Reread: " << count_read << " for second_pass " << std::endl; + + if (!breakout && !std::feof(_tmp_dump_file)) { + if (std::ferror(_tmp_dump_file)) { + throw GSFileError("Unexpected error occurred while reading temp file"); + } + } + std::fclose(_tmp_dump_file); + } + catch (const GSError & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + std::fclose(_tmp_dump_file); + throw; + } +#endif +} + +bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, + bool ignore_partial_warps, + std::vector & te_list) +{ + uint16_t mem_size = ma.size; + uint16_t mem_type_code; + + if (ma.is_load) + mem_type_code = GATHER; + else if (ma.is_store) + mem_type_code = SCATTER; + else + throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); + + if (_id_to_opcode_short_map.find(ma.opcode_short_id) == _id_to_opcode_short_map.end()) + return false; + std::string opcode_short = _id_to_opcode_short_map[ma.opcode_short_id]; + + if (_target_opcodes.find(opcode_short) == _target_opcodes.end()) + return false; + + // TODO: This is a SLOW way of doing this + const addr_t & base_addr = ma.addrs[0]; + te_list.reserve(MemPatternsForNV::CTA_LENGTH); + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) + { + if (ma.addrs[i] != 0) + { + trace_entry_t te { mem_type_code, mem_size, ma.addrs[i], base_addr }; + te_list.push_back(te); + + if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { + _addr_to_line_id[base_addr] = ma.line_id; + } + } + else if (ignore_partial_warps) + { + // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH + return false; + } + } + return true; +} + +void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) +{ + if (exceed_max_count()) { return; } + + if (!_first_trace_seen) { + _first_trace_seen = true; + printf("First pass to find top gather / scatter iaddresses\n"); + fflush(stdout); + +#ifndef USE_VECTOR_FOR_SECOND_PASS + // Open an output file for dumping temp data used exclusively by second_pass + _tmp_dump_file = tmpfile(); + if (!_tmp_dump_file) { + throw GSFileError("Unable to create a temp file for second pass"); + } +#endif + } + + if (_write_trace_file && _ofs_tmp.is_open()) { + // Write entry to trace_output file + _ofs_tmp.write(reinterpret_cast(ma), sizeof *ma); + _traces_written++; + } +#if 0 + std::stringstream ss; + //ss << "CTX " << HEX(ctx) << " - grid_launch_id " + ss << "GSNV_TRACE: CTX " << " - grid_launch_id " + << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z + << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) + << " - shortOpcode: " << ma->opcode_short_id + << " isLoad: " << ma->is_load << " isStore: " << ma->is_store + << " size: " << ma->size << " - "; + + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { + ss << HEX(ma->addrs[i]) << " "; + } + std::cout << ss.str() << std::endl; +#endif + + // Convert to vector of trace_entry_t if full warp. ignore partial warps. + std::vector te_list; + te_list.reserve(MemPatternsForNV::CTA_LENGTH); + + bool status = convert_to_trace_entry(*ma, true, te_list); + if (!status) return; + + uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; + if (min_size > 0 && valid_gs_stride(te_list, min_size)) + { + for (auto it = te_list.begin(); it != te_list.end(); it++) + { + handle_trace_entry(InstrAddrAdapterForNV(*it)); + } + _traces_handled++; + } +} + +bool MemPatternsForNV::valid_gs_stride(const std::vector & te_list, const uint32_t min_stride) +{ + bool valid_stride = false; + uint32_t min_stride_found = INT32_MAX; + uint64_t last_addr = 0; + bool first = true; + for (auto it = te_list.begin(); it != te_list.end(); it++) + { + const trace_entry_t & te = *it; + if (first) { + first = false; + last_addr = te.addr; + continue; + } + + uint64_t diff = std::labs (last_addr - (uint64_t)te.addr); + if (diff < min_stride) + return false; + + if (diff < min_stride_found) + min_stride_found = diff; + + last_addr = te.addr; + } + + return min_stride_found >= min_stride; +} + +void MemPatternsForNV::set_trace_file(const std::string & trace_file_name) +{ + if (trace_file_name == _trace_out_file_name) { + throw GSError ("Cannot set trace input file to same name as trace output file [" + trace_file_name + "]."); + } + + _trace_file_name = trace_file_name; +} + +void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_name) +{ + try + { + if (trace_out_file_name.empty()) { + throw GSError ("Cannot set trace output file to empty filename [" + trace_out_file_name + "]."); + } + + if (trace_out_file_name == _trace_file_name) { + throw GSError ("Cannot set trace output file to same name as trace input file [" + trace_out_file_name + "]."); + } + + _trace_out_file_name = trace_out_file_name; + _tmp_trace_out_file_name = _trace_out_file_name + ".tmp"; + + // Open a temp file for writing data + _ofs_tmp.open(_tmp_trace_out_file_name, std::ios::binary | std::ios::trunc | std::ios::in | std::ios::out); + if (!_ofs_tmp.is_open()) { + throw GSFileError("Unable to open " + _tmp_trace_out_file_name + " for writing"); + } + std::remove(_tmp_trace_out_file_name.c_str()); // Force auto cleanup + + // Open a ouput file for writing data header and appending data + _ofs.open(_trace_out_file_name, std::ios::binary | std::ios::trunc); + if (!_ofs.is_open()) { + throw GSFileError("Unable to open " + _trace_out_file_name + " for writing"); + } + + _write_trace_file = true; + } + catch (const std::exception & ex) + { + std::cerr << "ERROR: " << ex.what() << std::endl; + throw; + } +} + +void MemPatternsForNV::write_trace_out_file() +{ + if (!_write_trace_file || !_first_trace_seen) return; + + /// TODO: COMPRESS trace_file + try + { + std::cout << "\nSaving trace file - traces_written: " << _traces_written + << " traced_handled: " << _traces_handled << "\n" << std::endl; + + _ofs_tmp.flush(); + + // Write header + trace_header_t header; + header.num_maps = NUM_MAPS; + header.num_map_entires = _id_to_opcode_map.size() + + _id_to_opcode_short_map.size() + + _id_to_line_map.size(); + header.total_traces = _traces_written; + + _ofs.write(reinterpret_cast(&header), sizeof header); + + // Write Maps + trace_map_entry_t m_entry; + strncpy(m_entry.map_name, ID_TO_OPCODE, MAP_NAME_SIZE-1); + for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) + { + m_entry.id = itr->first; + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); + _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); + } + + strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE-1); + //uint64_t opcode_short_map_len = _id_to_opcode_short_map.size(); + for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) + { + m_entry.id = itr->first; + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); + _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); + } + + strncpy(m_entry.map_name, ID_TO_LINE, MAP_NAME_SIZE-1); + //uint64_t line_map_len = _id_to_line_map.size(); + for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) + { + m_entry.id = itr->first; + strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); + _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); + } + _ofs.flush(); + + // Write file contents + _ofs_tmp.seekp(0); + _ofs << _ofs_tmp.rdbuf(); + _ofs.flush(); + _ofs.close(); + _ofs_tmp.close(); + + std::remove(_tmp_trace_out_file_name.c_str()); + + std::cout << "Mappings found" << std::endl; + + std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; + for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } + + std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; + for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } + + std::cout << "-- LINE_ID to LINE MAPPING -- " << std::endl; + for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } + } + catch (const std::exception & ex) + { + std::remove(_tmp_trace_out_file_name.c_str()); + std::cerr << "ERROR: failed to write trace file: " << _trace_file_name << std::endl; + throw; + } +} + +void MemPatternsForNV::set_max_trace_count(const std::string & max_trace_count_str) +{ + try { + _max_trace_count = (int64_t) std::stoi(max_trace_count_str); + if (_max_trace_count < 0) { + throw GSError("Max Trace count must be greater than 0"); + } + _limit_trace_count = true; + std::cout << "Max Trace Count set to: " << _max_trace_count << std::endl; + } + catch (const std::exception & ex) { + std::cerr << "Failed to set Max Trace Count from value: " << max_trace_count_str + << " with error: " << ex.what() << std::endl; + } +} + +void MemPatternsForNV::set_config_file(const std::string & config_file) +{ + _config_file_name = config_file; + std::ifstream ifs; + ifs.open(_config_file_name); + if (!ifs.is_open()) + throw GSFileError("Unable to open config file: " + _config_file_name); + + while (!ifs.eof()) + { + std::string name; + std::string value; + ifs >> name >> value; + if (name.empty() || value.empty() || name[0] == '#') + continue; + + std::cout << "CONFIG: name: " << name << " value: " << value << std::endl; + + if (NVGS_TARGET_KERNEL == name) { + _target_kernels.insert(value); + } + else if (NVGS_TRACE_OUT_FILE == name) { + set_trace_out_file(value); + } + else if (NVGS_FILE_PREFIX == name) { + set_file_prefix(value); + } + else if (NVGS_MAX_TRACE_COUNT == name) { + set_max_trace_count(value); + } + else { + std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " + << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; + } + } +} + +bool MemPatternsForNV::should_instrument(const std::string & kernel_name) +{ + if (exceed_max_count()) { return false; } + + // Instrument all if none specified + if (_target_kernels.size() == 0) { + std::cout << "Instrumenting all : " << kernel_name << std::endl; + return true; + } + + auto itr = _target_kernels.find (kernel_name); + if ( itr != _target_kernels.end()) // Hard code for now + { + std::cout << "Instrumenting: " << kernel_name << std::endl; + return true; + } + + return false; +} + +} // namespace gsnv_patterns + +} // namespace gs_patterns \ No newline at end of file diff --git a/gsnv_patterns.h b/gsnv_patterns.h index fed881b..a6fcfdd 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -1,5 +1,4 @@ - -#pragma once +#pragma once #include #include @@ -11,7 +10,6 @@ #include #include -#include #include #include #include @@ -29,986 +27,226 @@ "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ << std::dec -#include "nvbit_tracing/nvgs_trace/common.h" - -struct _trace_entry_t { - unsigned short type; // 2 bytes: trace_type_t - unsigned short size; - union { - addr_t addr; - unsigned char length[sizeof(addr_t)]; +#include "nvbit_tracing/gsnv_trace/common.h" + +namespace gs_patterns +{ +namespace gsnv_patterns +{ + struct _trace_entry_t { + unsigned short type; // 2 bytes: trace_type_t + unsigned short size; + union { + addr_t addr; + unsigned char length[sizeof(addr_t)]; + }; + addr_t base_addr; + char padding[4]; + } __attribute__((packed)); + typedef struct _trace_entry_t trace_entry_t; + + #define MAP_NAME_SIZE 24 + #define MAP_VALUE_SIZE 22 + #define MAP_VALUE_LONG_SIZE 94 + #define NUM_MAPS 3 + // Setting this to fit within a 4k page e.g 170 * 24 bytes <= 4k + #define TRACE_BUFFER_LENGTH 170 + + struct _trace_map_entry_t + { + // 32 bytes total + char map_name[MAP_NAME_SIZE]; + uint16_t id; + char val[MAP_VALUE_LONG_SIZE]; }; - addr_t base_addr; - char padding[4]; -} __attribute__((packed)); -typedef struct _trace_entry_t trace_entry_t; - -#define MAP_NAME_SIZE 24 -#define MAP_VALUE_SIZE 22 -#define MAP_VALUE_LONG_SIZE 94 -#define NUM_MAPS 3 -// Setting this to fit within a 4k page e.g 170 * 24 bytes <= 4k -#define TRACE_BUFFER_LENGTH 170 - -struct _trace_map_entry_t -{ - // 32 bytes total - char map_name[MAP_NAME_SIZE]; - uint16_t id; - char val[MAP_VALUE_LONG_SIZE]; -}; -typedef struct _trace_map_entry_t trace_map_entry_t; - -struct _trace_header_t { - uint64_t num_maps; - uint64_t num_map_entires; - uint64_t total_traces; -}; -typedef struct _trace_header_t trace_header_t; - - -gzFile open_trace_file(const std::string & trace_file_name) -{ - gzFile fp; - - fp = gzopen(trace_file_name.c_str(), "hrb"); - if (NULL == fp) { - throw GSFileError("Could not open " + trace_file_name + "!"); - } - return fp; -} - -void close_trace_file (gzFile & fp) -{ - gzclose(fp); -} - -int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, int *edx) { - - int idx; - - idx = (*edx) / sizeof(trace_header_t); - //first read - if (NULL == *p_val) { - *edx = gzread(fp, val, sizeof(trace_header_t)); - *p_val = val; - } - else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(trace_header_t)); - *p_val = val; - } - - if (0 == *edx) - return 0; - - return 1; -} - -int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_val, int *edx) { - - int idx; - - idx = (*edx) / sizeof(trace_map_entry_t); - //first read - if (NULL == *p_val) { - *edx = gzread(fp, val, sizeof(trace_map_entry_t)); - *p_val = val; - } - else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(trace_map_entry_t)); - *p_val = val; - } - - if (0 == *edx) - return 0; - - return 1; -} - -int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { - - int idx; - - idx = (*edx) / sizeof(mem_access_t); - //first read - if (NULL == *p_val) { - *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); - *p_val = val; - - } else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); - *p_val = val; - } - - if (0 == *edx) - return 0; - - return 1; -} - -// An adapter for trace_entry_t (temporaritly untl replaced with nvbit memory detail type) -class InstrAddrAdapterForNV : public InstrAddrAdapter -{ -public: - InstrAddrAdapterForNV(const trace_entry_t & te) : _te(te) { } - - virtual ~InstrAddrAdapterForNV() { } - - virtual inline bool is_valid() const override { return true; } - virtual inline bool is_mem_instr() const override { return true; } - virtual inline bool is_other_instr() const override { return false; } - virtual inline mem_access_type get_mem_access_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } - - virtual inline size_t get_size() const override { return _te.size; } // in bytes - virtual inline addr_t get_address() const override { return _te.addr; } - virtual inline addr_t get_iaddr () const override { return _te.base_addr; } - virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr - virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual inline int64_t min_size() const override { return 256; } // 32 * 8 bytes - - virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" - << _te.type << "] size: [" << _te.size << "]"; } - - const trace_entry_t & get_trace_entry() const { return _te; } - -private: - const trace_entry_t _te; -}; - -class MemPatternsForNV : public MemPatterns -{ -public: - static const uint8_t CTA_LENGTH = 32; - - static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; - static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; - static constexpr const char * ID_TO_LINE = "ID_TO_LINE"; - - static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; - static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; - static constexpr const char * NVGS_PROGRAM_BINARY = "NVGS_PROGRAM_BINARY"; - static constexpr const char * NVGS_FILE_PREFIX = "NVGS_FILE_PREFIX"; - static constexpr const char * NVGS_MAX_TRACE_COUNT = "NVGS_MAX_TRACE_COUNT"; - + typedef struct _trace_map_entry_t trace_map_entry_t; - MemPatternsForNV(): _metrics(GATHER, SCATTER), - _iinfo(GATHER, SCATTER), - _target_opcodes { "LD", "ST", "LDS", "STS", "LDG", "STG" } - { } - - virtual ~MemPatternsForNV() override { } - - void handle_trace_entry(const InstrAddrAdapter & ia) override; - void generate_patterns() override; - - Metrics & get_metrics(mem_access_type) override; - InstrInfo & get_iinfo(mem_access_type) override; - - Metrics & get_gather_metrics() override { return _metrics.first; } - Metrics & get_scatter_metrics() override { return _metrics.second; } - InstrInfo & get_gather_iinfo () override { return _iinfo.first; } - InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } - TraceInfo & get_trace_info() override { return _trace_info; } - InstrWindow & get_instr_window() override { return _iw; } - - void set_trace_file(const std::string & trace_file_name); - inline const std::string & get_trace_file_name() { return _trace_file_name; } - - inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } - std::string get_file_prefix(); - - void set_max_trace_count(const std::string & max_trace_count_str); - inline bool exceed_max_count() const { - if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)) { - return true; - } - return false; - } - - // Mainly Called by nvbit kernel - void set_config_file (const std::string & config_file); - - - void update_metrics(); + struct _trace_header_t { + uint64_t num_maps; + uint64_t num_map_entires; + uint64_t total_traces; + }; + typedef struct _trace_header_t trace_header_t; - void process_traces(); - void update_source_lines(); - double update_source_lines_from_binary(mem_access_type); - void process_second_pass(); - std::string addr_to_line(addr_t addr) + // An adapter for trace_entry_t (temporaritly untl replaced with nvbit memory detail type) + class InstrAddrAdapterForNV : public InstrAddrAdapter { - auto itr = _addr_to_line_id.find(addr); - if (itr != _addr_to_line_id.end()) { - auto it2 = _id_to_line_map.find(itr->second); - if (it2 != _id_to_line_map.end()) { - return it2->second; - } - } - return std::string(); - } - - void set_trace_out_file(const std::string & trace_file_name); - void write_trace_out_file(); - - // Handle an nvbit CTA memory update - void handle_cta_memory_access(const mem_access_t * ma); - // Validate cta stride is within minimum - bool valid_gs_stride(const std::vector & te_list, const uint32_t min_stride); - - // TODO: Migrate these to template functions ! - // ----------------------------------------------------------------- - - // Store opcode mappings - bool add_or_update_opcode(int opcode_id, const std::string & opcode); - // Retrieve opcode mapping by opcode_id - const std::string & get_opcode(int opcode_id); - - // Store opcode_short mappings - bool add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short); - // Retrieve opcode_short mapping by opcode_short_id - const std::string & get_opcode_short(int opcode_short_id); - - // Store line mappings - bool add_or_update_line(int line_id, const std::string & line); - // Retrieve line number mapping by line_id - const std::string & get_line(int line_id); - - // ----------------------------------------------------------------- - - bool should_instrument(const std::string & kernel_name); - - bool convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps, std::vector & te_list); - -private: - - std::pair _metrics; - std::pair _iinfo; - TraceInfo _trace_info; - InstrWindow _iw; - - std::string _trace_file_name; // Input compressed nvbit trace file - std::string _file_prefix; // Used by gs_patterns_core to write out pattern files - std::string _trace_out_file_name; // Ouput file containing nvbit traces encounterd if requested - std::string _tmp_trace_out_file_name; // Temp file used to store traces before re-writing to _trace_out_filename - - std::string _config_file_name; - std::set _target_kernels; - bool _limit_trace_count = false; - int64_t _max_trace_count = 0; - uint64_t _traces_written = 0; - uint64_t _traces_handled = 0; - - bool _write_trace_file = false; - bool _first_trace_seen = false; - - /* The output stream used to temporarily hold raw trace warp data (mem_access_t) before being writen to _trace_out_file_name */ - std::fstream _ofs_tmp; - /* The output stream cooresponding to _trace_out_file_name */ - std::ofstream _ofs; - -#ifdef USE_VECTOR_FOR_SECOND_PASS - /* A vector used to store intermediate trace records (trace_entry_t) exclusively for use by second pass - (instead of _tmp_dump_file if USE_VECTOR_FOR_SECOND_PASS is defined) */ - std::vector _traces; -#else - /* A temp file used to store intermediate trace records (trace_entry_t) exclusively for use by second pass */ - std::FILE * _tmp_dump_file; -#endif - - std::map _id_to_opcode_map; - std::map _id_to_opcode_short_map; - std::map _id_to_line_map; - std::unordered_map _addr_to_line_id; - const std::set _target_opcodes; -}; - -Metrics & MemPatternsForNV::get_metrics(mem_access_type m) -{ - switch (m) - { - case GATHER : return _metrics.first; - break; - case SCATTER : return _metrics.second; - break; - default: - throw GSError("Unable to get Metrics - Invalid Metrics Type: " + m); - } -} - -InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) -{ - switch (m) - { - case GATHER : return _iinfo.first; - break; - case SCATTER : return _iinfo.second; - break; - default: - throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + m); - } -} - -void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) -{ - // Call libgs_patterns - ::handle_trace_entry(*this, ia); - - const InstrAddrAdapterForNV &ianv = dynamic_cast (ia); -#ifdef USE_VECTOR_FOR_SECOND_PASS - _traces.push_back(ianv); -#else - if (std::fwrite(reinterpret_cast(&ianv.get_trace_entry()), sizeof(trace_entry_t), 1, _tmp_dump_file) != 1) - { - throw GSFileError("Write of trace to temp file failed"); - } -#endif -} - -void MemPatternsForNV::generate_patterns() -{ - // ----------------- Update Source Lines ----------------- - - update_source_lines(); - - // ----------------- Update Metrics ----------------- + public: + InstrAddrAdapterForNV(const trace_entry_t & te) : _te(te) { } - update_metrics(); - - // ----------------- Create Spatter File ----------------- - - ::create_spatter_file(*this, get_file_prefix()); - -} - -void MemPatternsForNV::update_metrics() -{ - // Get top gathers - get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); - - // Get top scatters - get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics()); - - // ----------------- Second Pass ----------------- - - process_second_pass(); - - // ----------------- Normalize ----------------- - - ::normalize_stats(get_gather_metrics()); - ::normalize_stats(get_scatter_metrics()); -} - -std::string MemPatternsForNV::get_file_prefix() -{ - if (!_file_prefix.empty()) return _file_prefix; + virtual ~InstrAddrAdapterForNV() { } - // If no file_prefix was set try extracting one from trace_file - std::string prefix = _trace_file_name; - size_t pos = std::string::npos; - while (std::string::npos != (pos = prefix.find(".gz"))) - { - prefix.replace(pos, 3, ""); - } - return prefix; -} - -// Store opcode mappings -bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) { - auto it = _id_to_opcode_map.find(opcode_id); - if (it == _id_to_opcode_map.end()) { - _id_to_opcode_map[opcode_id] = opcode; - //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; - return true; - } - return false; -} - -// Retrieve opcode mapping by opcode_id -const std::string & MemPatternsForNV::get_opcode(int opcode_id) { - auto result = _id_to_opcode_map.find(opcode_id); - if (result != _id_to_opcode_map.end()) { - return result->second; - } - std::stringstream ss; - ss << "Unknown opcode_id: " << opcode_id; - throw GSDataError(ss.str()); -} - -// Store opcode_short mappings -bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { - auto it = _id_to_opcode_short_map.find(opcode_short_id); - if (it == _id_to_opcode_short_map.end()) { - _id_to_opcode_short_map[opcode_short_id] = opcode_short; - //std::cout << "OPCODE: " << opcode_id << " -> " << opcode << std::endl; - return true; - } - return false; -} - -// Retrieve opcode_short mapping by opcode_short_id -const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { - auto result = _id_to_opcode_short_map.find(opcode_short_id); - if (result != _id_to_opcode_short_map.end()) { - return result->second; - } - std::stringstream ss; - ss << "Unknown opcode_short_id: " << opcode_short_id; - throw GSDataError(ss.str()); -} - -// Store line mappings -bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) { - auto it = _id_to_line_map.find(line_id); - if (it == _id_to_line_map.end()) { - _id_to_line_map[line_id] = line; - //std::cout << "LINE: " << line_id << " -> " << line << std::endl; - return true; - } - return false; -} - -// Retrieve line number mapping by line_id -const std::string & MemPatternsForNV::get_line(int line_id) { - auto result = _id_to_line_map.find(line_id); - if (result != _id_to_line_map.end()) { - return result->second; - } - std::stringstream ss; - ss << "Unknown line_id: " << line_id; - throw GSDataError(ss.str()); -} - -/* - * Read traces from a nvbit trace file. Includes header which describes opcode mappings used in trace data. - * Used by test runner (gsnv_test) to simulate nvbit execution. - */ -void MemPatternsForNV::process_traces() -{ - int iret = 0; - mem_access_t * t_line; - InstrWindow iw; + virtual inline bool is_valid() const override { return true; } + virtual inline bool is_mem_instr() const override { return true; } + virtual inline bool is_other_instr() const override { return false; } + virtual inline mem_access_type get_mem_access_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } + virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } - gzFile fp_trace = open_trace_file(get_trace_file_name()); + virtual inline size_t get_size() const override { return _te.size; } // in bytes + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline addr_t get_iaddr () const override { return _te.base_addr; } + virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline int64_t min_size() const override { return 256; } // 32 * 8 bytes - // Read header ** - trace_header_t * p_header = NULL; - trace_header_t header[1]; - tline_read_header(fp_trace, header, &p_header, &iret); + virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" + << _te.type << "] size: [" << _te.size << "]"; } - uint32_t count = 0; - trace_map_entry_t * p_map_entry = NULL; - trace_map_entry_t map_entry[1]; - while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) - { - std::cout << "MAP: " << p_map_entry -> map_name << " entry [" << p_map_entry->id << "] -> [" << p_map_entry->val << "]" << std::endl; + const trace_entry_t & get_trace_entry() const { return _te; } - if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { - _id_to_opcode_map[p_map_entry->id] = p_map_entry->val; - } - else if (std::string(p_map_entry->map_name) == ID_TO_OPCODE_SHORT) { - _id_to_opcode_short_map[p_map_entry->id] = p_map_entry->val; - } - else if (std::string(p_map_entry->map_name) == ID_TO_LINE) { - _id_to_line_map[p_map_entry->id] = p_map_entry->val; - } - else { - std::cerr << "Unsupported Map: " << p_map_entry->map_name << " found in trace, ignoring ..." - << p_map_entry->id << " -> " << p_map_entry->val << std::endl; - } + private: + const trace_entry_t _te; + }; - count++; - p_map_entry++; - } - - // Read Traces ** - iret = 0; - uint64_t lines_read = 0; - uint64_t pos = 0; - mem_access_t * p_trace = NULL; - mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) - while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) + class MemPatternsForNV : public MemPatterns { - // Decode trace - t_line = p_trace; - - if (-1 == t_line->cta_id_x) { continue; } - - try - { - // Progress bar - if (lines_read == 0) { - for (int i = 0; i < 100; i++) { std::cout << "-"; } - std::cout << std::endl; - } - if (lines_read % ((uint64_t) std::max((p_header->total_traces * .01), 1.0)) == 0) { - if ((pos % 20) == 0) { std::cout << "|"; } - else { std::cout << "+"; } - std::flush(std::cout); - pos++; - } - - handle_cta_memory_access(t_line); - - p_trace++; - lines_read++; - } - catch (const GSError & ex) { - std::cerr << "ERROR: " << ex.what() << std::endl; - close_trace_file(fp_trace); - throw; - } - } - - std::cout << "\nLines Read: " << lines_read << " of Total: " << p_header->total_traces << std::endl; + public: + static const uint8_t CTA_LENGTH = 32; - close_trace_file(fp_trace); + static constexpr const char * ID_TO_OPCODE = "ID_TO_OPCODE"; + static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; + static constexpr const char * ID_TO_LINE = "ID_TO_LINE"; - //metrics - get_trace_info().gather_occ_avg /= get_gather_metrics().cnt; - get_trace_info().scatter_occ_avg /= get_scatter_metrics().cnt; + static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; + static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; + static constexpr const char * NVGS_PROGRAM_BINARY = "NVGS_PROGRAM_BINARY"; + static constexpr const char * NVGS_FILE_PREFIX = "NVGS_FILE_PREFIX"; + static constexpr const char * NVGS_MAX_TRACE_COUNT = "NVGS_MAX_TRACE_COUNT"; - display_stats(*this); -} + MemPatternsForNV(): _metrics(GATHER, SCATTER), + _iinfo(GATHER, SCATTER), + _target_opcodes { "LD", "ST", "LDS", "STS", "LDG", "STG" } + { } -void MemPatternsForNV::update_source_lines() -{ - // Find source lines for gathers - Must have symbol - printf("\nSymbol table lookup for gathers..."); - fflush(stdout); - - get_gather_metrics().cnt = update_source_lines_from_binary(GATHER); - - // Find source lines for scatters - printf("Symbol table lookup for scatters..."); - fflush(stdout); + virtual ~MemPatternsForNV() override { } - get_scatter_metrics().cnt = update_source_lines_from_binary(SCATTER); -} + void handle_trace_entry(const InstrAddrAdapter & ia) override; + void generate_patterns() override; -double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) -{ - double target_cnt = 0.0; - - InstrInfo & target_iinfo = get_iinfo(mType); - Metrics & target_metrics = get_metrics(mType); + Metrics & get_metrics(mem_access_type) override; + InstrInfo & get_iinfo(mem_access_type) override; - for (int k = 0; k < NGS; k++) { + Metrics & get_gather_metrics() override { return _metrics.first; } + Metrics & get_scatter_metrics() override { return _metrics.second; } + InstrInfo & get_gather_iinfo () override { return _iinfo.first; } + InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } + TraceInfo & get_trace_info() override { return _trace_info; } + InstrWindow & get_instr_window() override { return _iw; } - if (0 == target_iinfo.get_iaddrs()[k]) { - break; - } + void set_trace_file(const std::string & trace_file_name); + inline const std::string & get_trace_file_name() { return _trace_file_name; } - std::string line; - line = addr_to_line(target_iinfo.get_iaddrs()[k]); - strncpy(target_metrics.get_srcline()[k], line.c_str(), MAX_LINE_LENGTH-1); + inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } + std::string get_file_prefix(); - if (std::string(target_metrics.get_srcline()[k]).empty()) - target_iinfo.get_icnt()[k] = 0; - - target_cnt += target_iinfo.get_icnt()[k]; - } - printf("done.\n"); - - return target_cnt; - -} - -void MemPatternsForNV::process_second_pass() -{ - uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. - int iret = 0; - - // State carried thru - addr_t iaddr; - int64_t maddr; - addr_t gather_base[NTOP] = {0}; - addr_t scatter_base[NTOP] = {0}; - - bool breakout = false; - printf("\nSecond pass to fill gather / scatter subtraces\n"); - fflush(stdout); - -#ifdef USE_VECTOR_FOR_SECOND_PASS - for (auto itr = _traces.begin(); itr != _traces.end(); ++itr) - { - InstrAddrAdapter & ia = *itr; - - breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); - if (breakout) { - break; - } - } -#else - std::fflush(_tmp_dump_file); - std::rewind(_tmp_dump_file); // Back to the future, ... sort of - try - { - trace_entry_t ta[TRACE_BUFFER_LENGTH]; - size_t count_read = 0; - size_t read; - while ( !breakout && (read = std::fread(&ta, sizeof (ta[0]), TRACE_BUFFER_LENGTH, _tmp_dump_file)) ) - { - for (int i = 0; i < read; i++) - { - InstrAddrAdapterForNV ia(const_cast(ta[i])); - breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); - count_read++; - - if (breakout) break; - } - } - std::cout << "Reread: " << count_read << " for second_pass " << std::endl; - - if (!breakout && !std::feof(_tmp_dump_file)) { - if (std::ferror(_tmp_dump_file)) { - throw GSFileError("Unexpected error occurred while reading temp file"); - } - } - std::fclose(_tmp_dump_file); - } - catch (const GSError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - std::fclose(_tmp_dump_file); - throw; - } -#endif -} - -bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, - bool ignore_partial_warps, - std::vector & te_list) -{ - uint16_t mem_size = ma.size; - uint16_t mem_type_code; - - if (ma.is_load) - mem_type_code = GATHER; - else if (ma.is_store) - mem_type_code = SCATTER; - else - throw GSDataError ("Invalid mem_type must be LD(0) or ST(1)"); - - if (_id_to_opcode_short_map.find(ma.opcode_short_id) == _id_to_opcode_short_map.end()) - return false; - std::string opcode_short = _id_to_opcode_short_map[ma.opcode_short_id]; - - if (_target_opcodes.find(opcode_short) == _target_opcodes.end()) - return false; - - // TODO: This is a SLOW way of doing this - const addr_t & base_addr = ma.addrs[0]; - te_list.reserve(MemPatternsForNV::CTA_LENGTH); - for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) - { - if (ma.addrs[i] != 0) - { - trace_entry_t te { mem_type_code, mem_size, ma.addrs[i], base_addr }; - te_list.push_back(te); - - if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { - _addr_to_line_id[base_addr] = ma.line_id; + void set_max_trace_count(const std::string & max_trace_count_str); + inline bool exceed_max_count() const { + if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)) { + return true; } - } - else if (ignore_partial_warps) - { - // Ignore memory_accesses which have less than MemPatternsForNV::CTA_LENGTH return false; } - } - return true; -} - -void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) -{ - if (exceed_max_count()) { return; } - - if (!_first_trace_seen) { - _first_trace_seen = true; - printf("First pass to find top gather / scatter iaddresses\n"); - fflush(stdout); - -#ifndef USE_VECTOR_FOR_SECOND_PASS - // Open an output file for dumping temp data used exclusively by second_pass - _tmp_dump_file = tmpfile(); - if (!_tmp_dump_file) { - throw GSFileError("Unable to create a temp file for second pass"); - } -#endif - } - - if (_write_trace_file && _ofs_tmp.is_open()) { - // Write entry to trace_output file - _ofs_tmp.write(reinterpret_cast(ma), sizeof *ma); - _traces_written++; - } -#if 0 - std::stringstream ss; - //ss << "CTX " << HEX(ctx) << " - grid_launch_id " - ss << "GSNV_TRACE: CTX " << " - grid_launch_id " - << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z - << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) - << " - shortOpcode: " << ma->opcode_short_id - << " isLoad: " << ma->is_load << " isStore: " << ma->is_store - << " size: " << ma->size << " - "; - - for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { - ss << HEX(ma->addrs[i]) << " "; - } - std::cout << ss.str() << std::endl; -#endif - - // Convert to vector of trace_entry_t if full warp. ignore partial warps. - std::vector te_list; - te_list.reserve(MemPatternsForNV::CTA_LENGTH); - - bool status = convert_to_trace_entry(*ma, true, te_list); - if (!status) return; - - uint64_t min_size = !te_list.empty() ? (te_list[0].size) + 1 : 0; - if (min_size > 0 && valid_gs_stride(te_list, min_size)) - { - for (auto it = te_list.begin(); it != te_list.end(); it++) - { - handle_trace_entry(InstrAddrAdapterForNV(*it)); - } - _traces_handled++; - } -} - -bool MemPatternsForNV::valid_gs_stride(const std::vector & te_list, const uint32_t min_stride) -{ - bool valid_stride = false; - uint32_t min_stride_found = INT32_MAX; - uint64_t last_addr = 0; - bool first = true; - for (auto it = te_list.begin(); it != te_list.end(); it++) - { - const trace_entry_t & te = *it; - if (first) { - first = false; - last_addr = te.addr; - continue; - } - - uint64_t diff = std::labs (last_addr - (uint64_t)te.addr); - if (diff < min_stride) - return false; - - if (diff < min_stride_found) - min_stride_found = diff; - - last_addr = te.addr; - } - - return min_stride_found >= min_stride; -} - -void MemPatternsForNV::set_trace_file(const std::string & trace_file_name) -{ - if (trace_file_name == _trace_out_file_name) { - throw GSError ("Cannot set trace input file to same name as trace output file [" + trace_file_name + "]."); - } - _trace_file_name = trace_file_name; -} + // Mainly Called by nvbit kernel + void set_config_file (const std::string & config_file); -void MemPatternsForNV::set_trace_out_file(const std::string & trace_out_file_name) -{ - try - { - if (trace_out_file_name.empty()) { - throw GSError ("Cannot set trace output file to empty filename [" + trace_out_file_name + "]."); - } - if (trace_out_file_name == _trace_file_name) { - throw GSError ("Cannot set trace output file to same name as trace input file [" + trace_out_file_name + "]."); - } - - _trace_out_file_name = trace_out_file_name; - _tmp_trace_out_file_name = _trace_out_file_name + ".tmp"; + void update_metrics(); - // Open a temp file for writing data - _ofs_tmp.open(_tmp_trace_out_file_name, std::ios::binary | std::ios::trunc | std::ios::in | std::ios::out); - if (!_ofs_tmp.is_open()) { - throw GSFileError("Unable to open " + _tmp_trace_out_file_name + " for writing"); - } - std::remove(_tmp_trace_out_file_name.c_str()); // Force auto cleanup + void process_traces(); + void update_source_lines(); + double update_source_lines_from_binary(mem_access_type); + void process_second_pass(); - // Open a ouput file for writing data header and appending data - _ofs.open(_trace_out_file_name, std::ios::binary | std::ios::trunc); - if (!_ofs.is_open()) { - throw GSFileError("Unable to open " + _trace_out_file_name + " for writing"); - } - - _write_trace_file = true; - } - catch (const std::exception & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - throw; - } -} - -void MemPatternsForNV::write_trace_out_file() -{ - if (!_write_trace_file || !_first_trace_seen) return; - - /// TODO: COMPRESS trace_file - try - { - std::cout << "\nSaving trace file - traces_written: " << _traces_written - << " traced_handled: " << _traces_handled << "\n" << std::endl; - - _ofs_tmp.flush(); - - // Write header - trace_header_t header; - header.num_maps = NUM_MAPS; - header.num_map_entires = _id_to_opcode_map.size() + - _id_to_opcode_short_map.size() + - _id_to_line_map.size(); - header.total_traces = _traces_written; - - _ofs.write(reinterpret_cast(&header), sizeof header); - - // Write Maps - trace_map_entry_t m_entry; - strncpy(m_entry.map_name, ID_TO_OPCODE, MAP_NAME_SIZE-1); - for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) - { - m_entry.id = itr->first; - strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); - _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); - } - - strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE-1); - //uint64_t opcode_short_map_len = _id_to_opcode_short_map.size(); - for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) - { - m_entry.id = itr->first; - strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); - _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); - } - - strncpy(m_entry.map_name, ID_TO_LINE, MAP_NAME_SIZE-1); - //uint64_t line_map_len = _id_to_line_map.size(); - for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) + std::string addr_to_line(addr_t addr) { - m_entry.id = itr->first; - strncpy(m_entry.val, itr->second.c_str(), MAP_VALUE_LONG_SIZE-1); - _ofs.write(reinterpret_cast(&m_entry), sizeof m_entry); - } - _ofs.flush(); - - // Write file contents - _ofs_tmp.seekp(0); - _ofs << _ofs_tmp.rdbuf(); - _ofs.flush(); - _ofs.close(); - _ofs_tmp.close(); - - std::remove(_tmp_trace_out_file_name.c_str()); - - std::cout << "Mappings found" << std::endl; - - std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; - for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { - std::cout << itr->first << " -> " << itr->second << std::endl; - } - - std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; - for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { - std::cout << itr->first << " -> " << itr->second << std::endl; - } - - std::cout << "-- LINE_ID to LINE MAPPING -- " << std::endl; - for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { - std::cout << itr->first << " -> " << itr->second << std::endl; - } - } - catch (const std::exception & ex) - { - std::remove(_tmp_trace_out_file_name.c_str()); - std::cerr << "ERROR: failed to write trace file: " << _trace_file_name << std::endl; - throw; - } -} - -void MemPatternsForNV::set_max_trace_count(const std::string & max_trace_count_str) -{ - try { - _max_trace_count = (int64_t) std::stoi(max_trace_count_str); - if (_max_trace_count < 0) { - throw GSError("Max Trace count must be greater than 0"); - } - _limit_trace_count = true; - std::cout << "Max Trace Count set to: " << _max_trace_count << std::endl; - } - catch (const std::exception & ex) { - std::cerr << "Failed to set Max Trace Count from value: " << max_trace_count_str - << " with error: " << ex.what() << std::endl; - } -} - -void MemPatternsForNV::set_config_file(const std::string & config_file) -{ - _config_file_name = config_file; - std::ifstream ifs; - ifs.open(_config_file_name); - if (!ifs.is_open()) - throw GSFileError("Unable to open config file: " + _config_file_name); - - while (!ifs.eof()) - { - std::string name; - std::string value; - ifs >> name >> value; - if (name.empty() || value.empty() || name[0] == '#') - continue; - - std::cout << "CONFIG: name: " << name << " value: " << value << std::endl; - - if (NVGS_TARGET_KERNEL == name) { - _target_kernels.insert(value); - } - else if (NVGS_TRACE_OUT_FILE == name) { - set_trace_out_file(value); - } - else if (NVGS_FILE_PREFIX == name) { - set_file_prefix(value); - } - else if (NVGS_MAX_TRACE_COUNT == name) { - set_max_trace_count(value); - } - else { - std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " - << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; - } - } -} - -bool MemPatternsForNV::should_instrument(const std::string & kernel_name) -{ - if (exceed_max_count()) { return false; } - - // Instrument all if none specified - if (_target_kernels.size() == 0) { - std::cout << "Instrumenting all : " << kernel_name << std::endl; - return true; - } + auto itr = _addr_to_line_id.find(addr); + if (itr != _addr_to_line_id.end()) { + auto it2 = _id_to_line_map.find(itr->second); + if (it2 != _id_to_line_map.end()) { + return it2->second; + } + } + return std::string(); + } + + void set_trace_out_file(const std::string & trace_file_name); + void write_trace_out_file(); + + // Handle an nvbit CTA memory update + void handle_cta_memory_access(const mem_access_t * ma); + // Validate cta stride is within minimum + bool valid_gs_stride(const std::vector & te_list, const uint32_t min_stride); + + // TODO: Migrate these to template functions ! + // ----------------------------------------------------------------- + + // Store opcode mappings + bool add_or_update_opcode(int opcode_id, const std::string & opcode); + // Retrieve opcode mapping by opcode_id + const std::string & get_opcode(int opcode_id); + + // Store opcode_short mappings + bool add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short); + // Retrieve opcode_short mapping by opcode_short_id + const std::string & get_opcode_short(int opcode_short_id); + + // Store line mappings + bool add_or_update_line(int line_id, const std::string & line); + // Retrieve line number mapping by line_id + const std::string & get_line(int line_id); + + // ----------------------------------------------------------------- + + bool should_instrument(const std::string & kernel_name); + + bool convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps, std::vector & te_list); + + private: + + std::pair _metrics; + std::pair _iinfo; + TraceInfo _trace_info; + InstrWindow _iw; + + std::string _trace_file_name; // Input compressed nvbit trace file + std::string _file_prefix; // Used by gs_patterns_core to write out pattern files + std::string _trace_out_file_name; // Ouput file containing nvbit traces encounterd if requested + std::string _tmp_trace_out_file_name; // Temp file used to store traces before re-writing to _trace_out_filename + + std::string _config_file_name; + std::set _target_kernels; + bool _limit_trace_count = false; + int64_t _max_trace_count = 0; + uint64_t _traces_written = 0; + uint64_t _traces_handled = 0; + + bool _write_trace_file = false; + bool _first_trace_seen = false; + + /* The output stream used to temporarily hold raw trace warp data (mem_access_t) before being writen to _trace_out_file_name */ + std::fstream _ofs_tmp; + /* The output stream cooresponding to _trace_out_file_name */ + std::ofstream _ofs; + + #ifdef USE_VECTOR_FOR_SECOND_PASS + /* A vector used to store intermediate trace records (trace_entry_t) exclusively for use by second pass + (instead of _tmp_dump_file if USE_VECTOR_FOR_SECOND_PASS is defined) */ + std::vector _traces; + #else + /* A temp file used to store intermediate trace records (trace_entry_t) exclusively for use by second pass */ + std::FILE * _tmp_dump_file; + #endif + + std::map _id_to_opcode_map; + std::map _id_to_opcode_short_map; + std::map _id_to_line_map; + std::unordered_map _addr_to_line_id; + const std::set _target_opcodes; + }; - auto itr = _target_kernels.find (kernel_name); - if ( itr != _target_kernels.end()) // Hard code for now - { - std::cout << "Instrumenting: " << kernel_name << std::endl; - return true; - } +} // namespace gsnv_patterns - return false; -} +} // namespace gs_patterns diff --git a/gsnv_test.cpp b/gsnv_test.cpp deleted file mode 100644 index f55d834..0000000 --- a/gsnv_test.cpp +++ /dev/null @@ -1,69 +0,0 @@ - -#include - -#include "gs_patterns.h" -#include "gsnv_patterns.h" - -#define NVGS_CONFIG_FILE "NVGS_CONFIG_FILE" - -int main(int argc, char **argv) -{ - try - { - if (argc != 2) { - size_t pos = std::string(argv[0]).find_last_of("/"); - std::string prog_name = std::string(argv[0]).substr(pos+1); - throw GSError("Invalid program arguments, should be: " + prog_name + " "); - } - - MemPatternsForNV mp; - - // nvbit trace file with memory access traces - mp.set_trace_file(argv[1]); - - const char * config_file = std::getenv(NVGS_CONFIG_FILE); - if (config_file) { - mp.set_config_file(config_file); - } - - // File to save nvbit memory accessses to - //mp.set_trace_out_file(mp.get_file_prefix() + ".nvbit.bin"); - - // ----------------- Process Traces ----------------- - - mp.process_traces(); - - mp.write_trace_out_file(); - - // ----------------- Generate Patterns ----------------- - - mp.generate_patterns(); - } - catch (const GSFileError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(-1); - } - catch (const GSAllocError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(-1); - } - catch (const GSDataError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(1); - } - catch (const GSError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(1); - } - catch (const std::exception & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(-1); - } - - return 0; -} diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index dc5d3ee..e29922a 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -11,6 +10,7 @@ #include "gs_patterns.h" #include "gs_patterns_core.h" +#include "gspin_patterns.h" #include "utils.h" //Terminal colors @@ -25,33 +25,12 @@ #define ADDREND (0xFFFFFFFFFFFFFFFFUL) #define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) -//FROM DR SOURCE -//DR trace -struct _trace_entry_t { - unsigned short type; // 2 bytes: trace_type_t - unsigned short size; - union { - addr_t addr; - unsigned char length[sizeof(addr_t)]; - }; -} __attribute__((packed)); -typedef struct _trace_entry_t trace_entry_t; - -gzFile open_trace_file(const std::string & trace_file_name) +namespace gs_patterns { - gzFile fp; - - fp = gzopen(trace_file_name.c_str(), "hrb"); - if (NULL == fp) { - throw GSFileError("Could not open " + trace_file_name + "!"); - } - return fp; -} - -void close_trace_file (gzFile & fp) +namespace gspin_patterns { - gzclose(fp); -} + +using namespace gs_patterns::gs_patterns_core; int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { @@ -74,93 +53,6 @@ int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) return 1; } -// An adapter for trace_entry_t -class InstrAddrAdapterForPin : public InstrAddrAdapter -{ -public: - InstrAddrAdapterForPin(const trace_entry_t * te) - { - /// TODO: do we need to copy this, will we outlive trace_entry_t which is passed in ? - _te.type = te->type; - _te.size = te->size; - _te.addr = te->addr; - } - InstrAddrAdapterForPin(const trace_entry_t te) : _te(te) { } - - virtual ~InstrAddrAdapterForPin() { } - - virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } - virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } - virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } - - virtual mem_access_type get_mem_access_type() const override { - if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Access Type"); - // Must be 0x0 or 0x1 - if (_te.type == 0x0) return GATHER; - else return SCATTER; - } - virtual inline mem_instr_type get_mem_instr_type() const override { return VECTOR; } - - virtual size_t get_size() const override { return _te.size; } - virtual addr_t get_address() const override { return _te.addr; } - virtual addr_t get_iaddr() const override { return _te.addr; } - virtual addr_t get_maddr() const override { return _te.addr / _te.size; } - virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t min_size() const { return VBYTES; } - - virtual void output(std::ostream & os) const override { - os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; - } - -private: - trace_entry_t _te; -}; - -class MemPatternsForPin : public MemPatterns -{ -public: - MemPatternsForPin() : _metrics(GATHER, SCATTER), - _iinfo(GATHER, SCATTER) { } - virtual ~MemPatternsForPin() override { } - - void handle_trace_entry(const InstrAddrAdapter & ia) override; - void generate_patterns() override; - - Metrics & get_metrics(mem_access_type) override; - InstrInfo & get_iinfo(mem_access_type) override; - - Metrics & get_gather_metrics() override { return _metrics.first; } - Metrics & get_scatter_metrics() override { return _metrics.second; } - InstrInfo & get_gather_iinfo () override { return _iinfo.first; } - InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } - TraceInfo & get_trace_info() override { return _trace_info; } - InstrWindow & get_instr_window() override { return _iw; } - - void set_trace_file(const std::string & trace_file_name) { _trace_file_name = trace_file_name; } - const std::string & get_trace_file_name() { return _trace_file_name; } - - void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } - const std::string & get_binary_file_name() { return _binary_file_name; } - - void update_metrics(); - - std::string get_file_prefix (); - - void process_traces(); - void update_source_lines(); - double update_source_lines_from_binary(mem_access_type); - void process_second_pass(gzFile & fp_drtrace); - -private: - std::pair _metrics; - std::pair _iinfo; - TraceInfo _trace_info; - InstrWindow _iw; - - std::string _trace_file_name; - std::string _binary_file_name; -}; - Metrics & MemPatternsForPin::get_metrics(mem_access_type m) { switch (m) @@ -190,7 +82,7 @@ InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) void MemPatternsForPin::handle_trace_entry(const InstrAddrAdapter & ia) { // Call libgs_patterns - ::handle_trace_entry(*this, ia); + gs_patterns_core::handle_trace_entry(*this, ia); } void MemPatternsForPin::generate_patterns() @@ -205,13 +97,21 @@ void MemPatternsForPin::generate_patterns() // ----------------- Create Spatter File ----------------- - ::create_spatter_file(*this, get_file_prefix()); + create_spatter_file(*this, get_file_prefix()); } void MemPatternsForPin::update_metrics() { - gzFile fp_drtrace = ::open_trace_file(get_trace_file_name()); + gzFile fp_drtrace; + try + { + fp_drtrace = ::open_trace_file(get_trace_file_name()); + } + catch (const std::runtime_error & ex) + { + throw GSFileError(ex.what()); + } // Get top gathers get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); @@ -225,8 +125,8 @@ void MemPatternsForPin::update_metrics() // ----------------- Normalize ----------------- - ::normalize_stats(get_gather_metrics()); - ::normalize_stats(get_scatter_metrics()); + normalize_stats(get_gather_metrics()); + normalize_stats(get_scatter_metrics()); close_trace_file(fp_drtrace); } @@ -271,8 +171,16 @@ void MemPatternsForPin::process_traces() { int iret = 0; trace_entry_t *drline; + gzFile fp_drtrace; - gzFile fp_drtrace = open_trace_file(get_trace_file_name()); + try + { + fp_drtrace = open_trace_file(get_trace_file_name()); + } + catch (const std::runtime_error & ex) + { + throw GSFileError(ex.what()); + } printf("First pass to find top gather / scatter iaddresses\n"); fflush(stdout); @@ -326,7 +234,7 @@ void MemPatternsForPin::process_second_pass(gzFile & fp_drtrace) //decode drtrace drline = p_drtrace; - breakout = ::handle_2nd_pass_trace_entry(InstrAddrAdapterForPin(drline), get_gather_metrics(), get_scatter_metrics(), + breakout = handle_2nd_pass_trace_entry(InstrAddrAdapterForPin(drline), get_gather_metrics(), get_scatter_metrics(), iaddr, maddr, mcnt, gather_base, scatter_base); p_drtrace++; @@ -348,52 +256,6 @@ void MemPatternsForPin::update_source_lines() get_scatter_metrics().cnt = update_source_lines_from_binary(SCATTER); } -int main(int argc, char **argv) -{ - try - { - if (argc != 3) { - throw GSError("Invalid arguments, should be: trace.gz binary_file_name"); - } - - MemPatternsForPin mp; - - mp.set_trace_file(argv[1]); - mp.set_binary_file(argv[2]); - - // ----------------- Process Traces ----------------- +} // namespace gspin_patterns - mp.process_traces(); - - // ----------------- Generate Patterns ----------------- - - mp.generate_patterns(); - } - catch (const GSFileError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(-1); - } - catch (const GSAllocError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(-1); - } - catch (const GSDataError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(1); - } - catch (const GSError & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(1); - } - catch (const std::exception & ex) - { - std::cerr << "ERROR: " << ex.what() << std::endl; - exit(-1); - } - - return 0; -} +} // namespace gs_patterns \ No newline at end of file diff --git a/gspin_patterns.h b/gspin_patterns.h new file mode 100644 index 0000000..2e0afc2 --- /dev/null +++ b/gspin_patterns.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "gs_patterns.h" +#include "gs_patterns_core.h" +#include "utils.h" + +//Terminal colors +#define KNRM "\x1B[0m" +#define KRED "\x1B[31m" +#define KYEL "\x1B[33m" +#define KBLU "\x1B[34m" +#define KMAG "\x1B[35m" +#define KCYN "\x1B[36m" + +//address status +#define ADDREND (0xFFFFFFFFFFFFFFFFUL) +#define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) + +namespace gs_patterns +{ +namespace gspin_patterns +{ + //FROM DR SOURCE + //DR trace + struct _trace_entry_t { + unsigned short type; // 2 bytes: trace_type_t + unsigned short size; + union { + addr_t addr; + unsigned char length[sizeof(addr_t)]; + }; + } __attribute__((packed)); + typedef struct _trace_entry_t trace_entry_t; + + // An adapter for trace_entry_t + class InstrAddrAdapterForPin : public InstrAddrAdapter + { + public: + InstrAddrAdapterForPin(const trace_entry_t * te) + { + /// TODO: do we need to copy this, will we outlive trace_entry_t which is passed in ? + _te.type = te->type; + _te.size = te->size; + _te.addr = te->addr; + } + InstrAddrAdapterForPin(const trace_entry_t te) : _te(te) { } + + virtual ~InstrAddrAdapterForPin() { } + + virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } + virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } + virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } + + virtual mem_access_type get_mem_access_type() const override { + if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Access Type"); + // Must be 0x0 or 0x1 + if (_te.type == 0x0) return GATHER; + else return SCATTER; + } + virtual inline mem_instr_type get_mem_instr_type() const override { return VECTOR; } + + virtual size_t get_size() const override { return _te.size; } + virtual addr_t get_address() const override { return _te.addr; } + virtual addr_t get_iaddr() const override { return _te.addr; } + virtual addr_t get_maddr() const override { return _te.addr / _te.size; } + virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual int64_t min_size() const { return VBYTES; } + + virtual void output(std::ostream & os) const override { + os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; + } + + private: + trace_entry_t _te; + }; + + class MemPatternsForPin : public MemPatterns + { + public: + MemPatternsForPin() : _metrics(GATHER, SCATTER), + _iinfo(GATHER, SCATTER) { } + virtual ~MemPatternsForPin() override { } + + void handle_trace_entry(const InstrAddrAdapter & ia) override; + void generate_patterns() override; + + Metrics & get_metrics(mem_access_type) override; + InstrInfo & get_iinfo(mem_access_type) override; + + Metrics & get_gather_metrics() override { return _metrics.first; } + Metrics & get_scatter_metrics() override { return _metrics.second; } + InstrInfo & get_gather_iinfo () override { return _iinfo.first; } + InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } + TraceInfo & get_trace_info() override { return _trace_info; } + InstrWindow & get_instr_window() override { return _iw; } + + void set_trace_file(const std::string & trace_file_name) { _trace_file_name = trace_file_name; } + const std::string & get_trace_file_name() { return _trace_file_name; } + + void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } + const std::string & get_binary_file_name() { return _binary_file_name; } + + void update_metrics(); + + std::string get_file_prefix (); + + void process_traces(); + void update_source_lines(); + double update_source_lines_from_binary(mem_access_type); + void process_second_pass(gzFile & fp_drtrace); + + private: + std::pair _metrics; + std::pair _iinfo; + TraceInfo _trace_info; + InstrWindow _iw; + + std::string _trace_file_name; + std::string _binary_file_name; + }; + +} // namespace gspin_patterns + +} // namespace gs_patterns \ No newline at end of file diff --git a/nvbit_tracing/nvgs_trace/common.h b/nvbit_tracing/gsnv_trace/common.h similarity index 99% rename from nvbit_tracing/nvgs_trace/common.h rename to nvbit_tracing/gsnv_trace/common.h index 2945f19..148a8cb 100644 --- a/nvbit_tracing/nvgs_trace/common.h +++ b/nvbit_tracing/gsnv_trace/common.h @@ -25,6 +25,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#pragma once + #include /* information collected in the instrumentation function and passed diff --git a/nvbit_tracing/nvgs_trace/nvgs_trace.cu b/nvbit_tracing/gsnv_trace/gsnv_trace.cu similarity index 99% rename from nvbit_tracing/nvgs_trace/nvgs_trace.cu rename to nvbit_tracing/gsnv_trace/gsnv_trace.cu index 4c85fca..a56d6fd 100644 --- a/nvbit_tracing/nvgs_trace/nvgs_trace.cu +++ b/nvbit_tracing/gsnv_trace/gsnv_trace.cu @@ -51,6 +51,9 @@ #include #include +using namespace gs_patterns; +using namespace gs_patterns::gs_patterns_core; +using namespace gs_patterns::gsnv_patterns; #define HEX(x) \ "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ diff --git a/nvbit_tracing/nvgs_trace/inject_funcs.cu b/nvbit_tracing/gsnv_trace/inject_funcs.cu similarity index 100% rename from nvbit_tracing/nvgs_trace/inject_funcs.cu rename to nvbit_tracing/gsnv_trace/inject_funcs.cu diff --git a/utils.cpp b/utils.cpp index 69ba3c3..2c849f7 100644 --- a/utils.cpp +++ b/utils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "utils.h" @@ -107,3 +108,19 @@ int cnt_str(char *line, char c) { return cnt; } + +gzFile open_trace_file(const std::string & trace_file_name) +{ + gzFile fp; + + fp = gzopen(trace_file_name.c_str(), "hrb"); + if (NULL == fp) { + throw std::runtime_error("Could not open " + trace_file_name + "!"); + } + return fp; +} + +void close_trace_file (gzFile & fp) +{ + gzclose(fp); +} diff --git a/utils.h b/utils.h index f7bffa8..1a197a0 100644 --- a/utils.h +++ b/utils.h @@ -6,6 +6,8 @@ #include #include +#include +#include static inline int popcount(uint64_t x); @@ -17,4 +19,8 @@ const char *str_replace(const char *orig, const char *rep, const char *with); char *get_str(char *line, char *bparse, char *aparse); -int cnt_str(char *line, char c); \ No newline at end of file +int cnt_str(char *line, char c); + +gzFile open_trace_file(const std::string & trace_file_name); + +void close_trace_file (gzFile & fp); From a0b999312cca41b554d66448e734ecb99c85163b Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 24 Apr 2024 08:46:24 -0400 Subject: [PATCH 54/76] Makefile moved. --- nvbit_tracing/{nvgs_trace => gsnv_trace}/Makefile | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nvbit_tracing/{nvgs_trace => gsnv_trace}/Makefile (100%) diff --git a/nvbit_tracing/nvgs_trace/Makefile b/nvbit_tracing/gsnv_trace/Makefile similarity index 100% rename from nvbit_tracing/nvgs_trace/Makefile rename to nvbit_tracing/gsnv_trace/Makefile From 69db6dab434daf6d570d9aeb509c87046520b016 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 24 Apr 2024 10:44:53 -0400 Subject: [PATCH 55/76] If requested, write trace files out before generating patterns. --- gs_patterns_main.cpp | 6 ------ gsnv_patterns.cpp | 8 +++++++- gsnv_patterns.h | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index f0fe5f6..dbae5ae 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -47,7 +47,6 @@ int main(int argc, char **argv) MemPatternsForNV mp; - // nvbit trace file with memory access traces mp.set_trace_file(argv[1]); const char * config_file = std::getenv(NVGS_CONFIG_FILE); @@ -55,15 +54,10 @@ int main(int argc, char **argv) mp.set_config_file(config_file); } - // File to save nvbit memory accessses to - //mp.set_trace_out_file(mp.get_file_prefix() + ".nvbit.bin"); - // ----------------- Process Traces ----------------- mp.process_traces(); - mp.write_trace_out_file(); - // ----------------- Generate Patterns ----------------- mp.generate_patterns(); diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index c4cca4c..21386a9 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -143,6 +143,10 @@ void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) void MemPatternsForNV::generate_patterns() { + // ----------------- Write out Trace Files (if requested ) ----------------- + + write_trace_out_file(); + // ----------------- Update Source Lines ----------------- update_source_lines(); @@ -358,7 +362,9 @@ void MemPatternsForNV::process_traces() void MemPatternsForNV::update_source_lines() { - // Find source lines for gathers - Must have symbol + // Requires Kernel having been built with "--generate-line-info" so that trace file header contain mappings + + // Find source lines for gathers printf("\nSymbol table lookup for gathers..."); fflush(stdout); diff --git a/gsnv_patterns.h b/gsnv_patterns.h index a6fcfdd..9a75e6f 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -242,8 +242,8 @@ namespace gsnv_patterns std::map _id_to_opcode_map; std::map _id_to_opcode_short_map; - std::map _id_to_line_map; - std::unordered_map _addr_to_line_id; + std::map _id_to_line_map; // Contains source line_id to source line mappings + std::unordered_map _addr_to_line_id; // Contains address to line_id mappings const std::set _target_opcodes; }; From 7d848b2e1363db4273dafbdd5ade820bf9da5da8 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 24 Apr 2024 11:25:30 -0400 Subject: [PATCH 56/76] cleanup --- gs_patterns_main.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index dbae5ae..9b14d67 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -38,13 +38,13 @@ int main(int argc, char **argv) std::unique_ptr mp (use_gs_nv ? (MemPatterns *) new MemPatternsForNV : (MemPatterns *) new MemPatternsForPin); + if (argc != 3) { + usage(prog_name); + throw GSError("Invalid program arguments"); + } + if (use_gs_nv) { - if (argc != 3) { - usage(prog_name); - throw GSError("Invalid program arguments"); - } - MemPatternsForNV mp; mp.set_trace_file(argv[1]); @@ -64,11 +64,6 @@ int main(int argc, char **argv) } else { - if (argc != 3) { - usage(prog_name); - throw GSError("Invalid program arguments"); - } - MemPatternsForPin mp; mp.set_trace_file(argv[1]); From 9adbc8eedd7d8348bdcc61fb3b2353f9fdf92d82 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 24 Apr 2024 17:04:02 -0400 Subject: [PATCH 57/76] Add an option for verbose logging. --- gs_patterns.h | 3 + gs_patterns_main.cpp | 11 +++- gsnv_patterns.cpp | 145 +++++++++++++++++++++++++------------------ gsnv_patterns.h | 20 +++--- gspin_patterns.h | 21 ++++--- 5 files changed, 121 insertions(+), 79 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 964c822..0522bd5 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -255,6 +255,9 @@ namespace gs_patterns virtual InstrInfo & get_scatter_iinfo() = 0; virtual TraceInfo & get_trace_info() = 0; virtual InstrWindow & get_instr_window() = 0; + + virtual void set_log_level(int8_t ll) = 0; + virtual int8_t get_log_level() = 0; }; } // namespace gs_patterns diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index 9b14d67..a23b745 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -19,7 +19,8 @@ using namespace gs_patterns::gspin_patterns; void usage (const std::string & prog_name) { - std::cerr << "Usage: " << prog_name << " | " + prog_name + " [-nv]" << std::endl; + std::cerr << "Usage: " << prog_name << " \n" + << " " << prog_name << " -nv [-v]" << std::endl; } int main(int argc, char **argv) @@ -27,10 +28,14 @@ int main(int argc, char **argv) try { bool use_gs_nv = false; + bool verbose = false; for (int i = 0; i < argc; i++) { if (std::string(argv[i]) == "-nv") { use_gs_nv = true; } + else if (std::string(argv[i]) == "-v") { + verbose = true; + } } size_t pos = std::string(argv[0]).find_last_of("/"); @@ -38,7 +43,7 @@ int main(int argc, char **argv) std::unique_ptr mp (use_gs_nv ? (MemPatterns *) new MemPatternsForNV : (MemPatterns *) new MemPatternsForPin); - if (argc != 3) { + if (argc < 3) { usage(prog_name); throw GSError("Invalid program arguments"); } @@ -53,6 +58,7 @@ int main(int argc, char **argv) if (config_file) { mp.set_config_file(config_file); } + if (verbose) mp.set_log_level(1); // ----------------- Process Traces ----------------- @@ -68,6 +74,7 @@ int main(int argc, char **argv) mp.set_trace_file(argv[1]); mp.set_binary_file(argv[2]); + if (verbose) mp.set_log_level(1); // ----------------- Process Traces ----------------- diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 21386a9..4dd896c 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -287,9 +286,12 @@ void MemPatternsForNV::process_traces() uint32_t count = 0; trace_map_entry_t * p_map_entry = NULL; trace_map_entry_t map_entry[1]; - while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) - { - std::cout << "MAP: " << p_map_entry -> map_name << " entry [" << p_map_entry->id << "] -> [" << p_map_entry->val << "]" << std::endl; + while (count < p_header->num_map_entires && tline_read_maps(fp_trace, map_entry, &p_map_entry, &iret) ) { + + if (_log_level >= 1) { + std::cout << "MAP: " << p_map_entry->map_name << " entry [" << p_map_entry->id << "] -> [" + << p_map_entry->val << "]" << std::endl; + } if (std::string(p_map_entry->map_name) == ID_TO_OPCODE) { _id_to_opcode_map[p_map_entry->id] = p_map_entry->val; @@ -513,8 +515,7 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, return true; } -void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) -{ +void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { if (exceed_max_count()) { return; } if (!_first_trace_seen) { @@ -533,24 +534,25 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) if (_write_trace_file && _ofs_tmp.is_open()) { // Write entry to trace_output file - _ofs_tmp.write(reinterpret_cast(ma), sizeof *ma); + _ofs_tmp.write(reinterpret_cast(ma), sizeof *ma); _traces_written++; } -#if 0 - std::stringstream ss; - //ss << "CTX " << HEX(ctx) << " - grid_launch_id " - ss << "GSNV_TRACE: CTX " << " - grid_launch_id " - << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z - << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) - << " - shortOpcode: " << ma->opcode_short_id - << " isLoad: " << ma->is_load << " isStore: " << ma->is_store - << " size: " << ma->size << " - "; - - for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { - ss << HEX(ma->addrs[i]) << " "; - } - std::cout << ss.str() << std::endl; -#endif + + if (_log_level >= 2) { + std::stringstream ss; + //ss << "CTX " << HEX(ctx) << " - grid_launch_id " + ss << "GSNV_TRACE: CTX " << " - grid_launch_id " + << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z + << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) + << " - shortOpcode: " << ma->opcode_short_id + << " isLoad: " << ma->is_load << " isStore: " << ma->is_store + << " size: " << ma->size << " - "; + + for (int i = 0; i < MemPatternsForNV::CTA_LENGTH; i++) { + ss << HEX(ma->addrs[i]) << " "; + } + std::cout << ss.str() << std::endl; + } // Convert to vector of trace_entry_t if full warp. ignore partial warps. std::vector te_list; @@ -704,21 +706,23 @@ void MemPatternsForNV::write_trace_out_file() std::remove(_tmp_trace_out_file_name.c_str()); - std::cout << "Mappings found" << std::endl; + if (_log_level >= 1) { + std::cout << "Mappings found" << std::endl; - std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; - for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { - std::cout << itr->first << " -> " << itr->second << std::endl; - } + std::cout << "-- OPCODE_ID to OPCODE MAPPING -- " << std::endl; + for (auto itr = _id_to_opcode_map.begin(); itr != _id_to_opcode_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } - std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; - for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { - std::cout << itr->first << " -> " << itr->second << std::endl; - } + std::cout << "-- OPCODE_SHORT_ID to OPCODE_SHORT MAPPING -- " << std::endl; + for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } - std::cout << "-- LINE_ID to LINE MAPPING -- " << std::endl; - for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { - std::cout << itr->first << " -> " << itr->second << std::endl; + std::cout << "-- LINE_ID to LINE MAPPING -- " << std::endl; + for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { + std::cout << itr->first << " -> " << itr->second << std::endl; + } } } catch (const std::exception & ex) @@ -729,19 +733,16 @@ void MemPatternsForNV::write_trace_out_file() } } -void MemPatternsForNV::set_max_trace_count(const std::string & max_trace_count_str) +void MemPatternsForNV::set_max_trace_count(int64_t max_trace_count) { - try { - _max_trace_count = (int64_t) std::stoi(max_trace_count_str); - if (_max_trace_count < 0) { - throw GSError("Max Trace count must be greater than 0"); - } - _limit_trace_count = true; - std::cout << "Max Trace Count set to: " << _max_trace_count << std::endl; + if (max_trace_count < 0) { + throw GSError("Max Trace count must be greater than 0"); } - catch (const std::exception & ex) { - std::cerr << "Failed to set Max Trace Count from value: " << max_trace_count_str - << " with error: " << ex.what() << std::endl; + _max_trace_count = max_trace_count; + _limit_trace_count = true; + + if (_log_level >= 1) { + std::cout << "Max Trace Count set to: " << _max_trace_count << std::endl; } } @@ -753,6 +754,7 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) if (!ifs.is_open()) throw GSFileError("Unable to open config file: " + _config_file_name); + std::stringstream ss; while (!ifs.eof()) { std::string name; @@ -761,25 +763,40 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) if (name.empty() || value.empty() || name[0] == '#') continue; - std::cout << "CONFIG: name: " << name << " value: " << value << std::endl; + ss << "CONFIG: name: " << name << " value: " << value << std::endl; - if (NVGS_TARGET_KERNEL == name) { - _target_kernels.insert(value); - } - else if (NVGS_TRACE_OUT_FILE == name) { - set_trace_out_file(value); - } - else if (NVGS_FILE_PREFIX == name) { - set_file_prefix(value); - } - else if (NVGS_MAX_TRACE_COUNT == name) { - set_max_trace_count(value); + try { + if (GSNV_TARGET_KERNEL == name) { + _target_kernels.insert(value); + } + else if (GSNV_TRACE_OUT_FILE == name) { + set_trace_out_file(value); + } + else if (GSNV_FILE_PREFIX == name) { + set_file_prefix(value); + } + else if (GSNV_MAX_TRACE_COUNT == name) { + int64_t num_val = (int64_t) std::stoi(value); + set_max_trace_count(num_val); + } + else if (GSNV_LOG_LEVEL == name) { + int8_t level = atoi(value.c_str()); + set_log_level(level); + } + else { + std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " + << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; + } } - else { - std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " - << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; + catch (const std::exception & ex) { + std::cerr << "Failed to set config setting <" << name << "> with value <" << value << "> " + << "due to error: " << ex.what() << " ignoring ..." << std::endl; } } + + if (_log_level >= 1) { + std::cout << ss.str(); + } } bool MemPatternsForNV::should_instrument(const std::string & kernel_name) @@ -788,14 +805,18 @@ bool MemPatternsForNV::should_instrument(const std::string & kernel_name) // Instrument all if none specified if (_target_kernels.size() == 0) { - std::cout << "Instrumenting all : " << kernel_name << std::endl; + if (_log_level >= 1) { + std::cout << "Instrumenting all : " << kernel_name << std::endl; + } return true; } auto itr = _target_kernels.find (kernel_name); if ( itr != _target_kernels.end()) // Hard code for now { - std::cout << "Instrumenting: " << kernel_name << std::endl; + if (_log_level >= 1) { + std::cout << "Instrumenting: " << kernel_name << std::endl; + } return true; } diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 9a75e6f..17deeff 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -108,11 +108,12 @@ namespace gsnv_patterns static constexpr const char * ID_TO_OPCODE_SHORT = "ID_TO_OPCODE_SHORT"; static constexpr const char * ID_TO_LINE = "ID_TO_LINE"; - static constexpr const char * NVGS_TARGET_KERNEL = "NVGS_TARGET_KERNEL"; - static constexpr const char * NVGS_TRACE_OUT_FILE = "NVGS_TRACE_OUT_FILE"; - static constexpr const char * NVGS_PROGRAM_BINARY = "NVGS_PROGRAM_BINARY"; - static constexpr const char * NVGS_FILE_PREFIX = "NVGS_FILE_PREFIX"; - static constexpr const char * NVGS_MAX_TRACE_COUNT = "NVGS_MAX_TRACE_COUNT"; + static constexpr const char * GSNV_TARGET_KERNEL = "GSNV_TARGET_KERNEL"; + static constexpr const char * GSNV_TRACE_OUT_FILE = "GSNV_TRACE_OUT_FILE"; + static constexpr const char * GSNV_PROGRAM_BINARY = "GSNV_PROGRAM_BINARY"; + static constexpr const char * GSNV_FILE_PREFIX = "GSNV_FILE_PREFIX"; + static constexpr const char * GSNV_MAX_TRACE_COUNT = "GSNV_MAX_TRACE_COUNT"; + static constexpr const char * GSNV_LOG_LEVEL = "GSNV_LOG_LEVEL"; MemPatternsForNV(): _metrics(GATHER, SCATTER), @@ -135,13 +136,16 @@ namespace gsnv_patterns TraceInfo & get_trace_info() override { return _trace_info; } InstrWindow & get_instr_window() override { return _iw; } + void set_log_level(int8_t level) { _log_level = level; } + int8_t get_log_level() { return _log_level; } + void set_trace_file(const std::string & trace_file_name); inline const std::string & get_trace_file_name() { return _trace_file_name; } inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); - void set_max_trace_count(const std::string & max_trace_count_str); + void set_max_trace_count(int64_t max_trace_count); inline bool exceed_max_count() const { if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)) { return true; @@ -226,9 +230,11 @@ namespace gsnv_patterns bool _write_trace_file = false; bool _first_trace_seen = false; + int8_t _log_level = 0; + /* The output stream used to temporarily hold raw trace warp data (mem_access_t) before being writen to _trace_out_file_name */ std::fstream _ofs_tmp; - /* The output stream cooresponding to _trace_out_file_name */ + /* The output stream cooresponding to _trace_out_file_name. Used to store final nvbit trace data with header */ std::ofstream _ofs; #ifdef USE_VECTOR_FOR_SECOND_PASS diff --git a/gspin_patterns.h b/gspin_patterns.h index 2e0afc2..6a5cab4 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -97,18 +97,21 @@ namespace gspin_patterns Metrics & get_metrics(mem_access_type) override; InstrInfo & get_iinfo(mem_access_type) override; - Metrics & get_gather_metrics() override { return _metrics.first; } - Metrics & get_scatter_metrics() override { return _metrics.second; } - InstrInfo & get_gather_iinfo () override { return _iinfo.first; } - InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } - TraceInfo & get_trace_info() override { return _trace_info; } - InstrWindow & get_instr_window() override { return _iw; } + Metrics & get_gather_metrics() override { return _metrics.first; } + Metrics & get_scatter_metrics() override { return _metrics.second; } + InstrInfo & get_gather_iinfo () override { return _iinfo.first; } + InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } + TraceInfo & get_trace_info() override { return _trace_info; } + InstrWindow & get_instr_window() override { return _iw; } + + void set_log_level(int8_t level) override { _log_level = level; } + int8_t get_log_level() override { return _log_level; } void set_trace_file(const std::string & trace_file_name) { _trace_file_name = trace_file_name; } - const std::string & get_trace_file_name() { return _trace_file_name; } + const std::string & get_trace_file_name() { return _trace_file_name; } void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } - const std::string & get_binary_file_name() { return _binary_file_name; } + const std::string & get_binary_file_name() { return _binary_file_name; } void update_metrics(); @@ -125,6 +128,8 @@ namespace gspin_patterns TraceInfo _trace_info; InstrWindow _iw; + int8_t _log_level = 0; + std::string _trace_file_name; std::string _binary_file_name; }; From 07785ee6f5ab8a9514a972e01706acf1ee9fee08 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 24 Apr 2024 17:52:02 -0400 Subject: [PATCH 58/76] Fix shared lib linking, compiler warning. Remove extra calls to write_trace_out_file() in nvbit trace mode. --- CMakeLists.txt | 8 ++++---- nvbit_tracing/gsnv_trace/gsnv_trace.cu | 3 --- utils.h | 2 -- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 265c128..111b67a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,14 +14,14 @@ add_library(gs_patterns_core SHARED gs_patterns.h gs_patterns_core.h gs_patterns_core.cpp + gsnv_patterns.h + gsnv_patterns.cpp + gspin_patterns.h + gspin_patterns.cpp ) add_executable( gs_patterns gs_patterns_main.cpp - gsnv_patterns.cpp - gspin_patterns.cpp - gspin_patterns.h - gsnv_patterns.h ) target_link_libraries(gs_patterns gs_patterns_core) diff --git a/nvbit_tracing/gsnv_trace/gsnv_trace.cu b/nvbit_tracing/gsnv_trace/gsnv_trace.cu index a56d6fd..6208e79 100644 --- a/nvbit_tracing/gsnv_trace/gsnv_trace.cu +++ b/nvbit_tracing/gsnv_trace/gsnv_trace.cu @@ -443,9 +443,6 @@ void nvbit_at_ctx_term(CUcontext ctx) { try { - // Persist trace file - mp->write_trace_out_file(); - // Generate GS Pattern output fle mp->generate_patterns(); } diff --git a/utils.h b/utils.h index 1a197a0..43d1d78 100644 --- a/utils.h +++ b/utils.h @@ -9,8 +9,6 @@ #include #include -static inline int popcount(uint64_t x); - int startswith(const char *a, const char *b); int endswith(const char *a, const char *b); From bf58ba357427845cf08a2fce9f6d85dc707a1eca Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 25 Apr 2024 21:57:02 -0400 Subject: [PATCH 59/76] Small cleanups. --- CMakeLists.txt | 4 +--- gs_patterns.h | 4 +--- gspin_patterns.cpp | 2 +- utils.cpp | 9 +++++++++ utils.h | 9 +++++++++ 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 111b67a..9a29c61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,8 @@ cmake_minimum_required(VERSION 3.1...3.25) -set (CMAKE_VERBOSE_MAKEFILE "1") - project( gs_patterns VERSION 1.0 LANGUAGES CXX) -set(CMAKE_CXX_STANDARD 17) # was 20 +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED On) #set(CMAKE_CXX_EXTENSIONS Off) diff --git a/gs_patterns.h b/gs_patterns.h index 0522bd5..15bd465 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -34,8 +34,6 @@ namespace gs_patterns { - //using namespace std; - typedef uintptr_t addr_t; typedef enum { GATHER=0, SCATTER } mem_access_type; typedef enum { VECTOR=0, CTA } mem_instr_type; @@ -190,7 +188,7 @@ namespace gs_patterns int64_t ngs = 0; int64_t trace_lines = 0; - bool did_opcode = false; // revist this --------------- + bool did_opcode = false; // revist this --------------- double other_cnt = 0.0; double gather_score = 0.0; double gather_occ_avg = 0.0; diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index e29922a..1bc32d0 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -106,7 +106,7 @@ void MemPatternsForPin::update_metrics() gzFile fp_drtrace; try { - fp_drtrace = ::open_trace_file(get_trace_file_name()); + fp_drtrace = open_trace_file(get_trace_file_name()); } catch (const std::runtime_error & ex) { diff --git a/utils.cpp b/utils.cpp index 2c849f7..a3a5f61 100644 --- a/utils.cpp +++ b/utils.cpp @@ -10,6 +10,11 @@ #include "utils.h" +namespace gs_patterns +{ +namespace gs_patterns_core +{ + static inline int popcount(uint64_t x) { int c; @@ -124,3 +129,7 @@ void close_trace_file (gzFile & fp) { gzclose(fp); } + +} // gs_patterns_core + +} // gs_patterns \ No newline at end of file diff --git a/utils.h b/utils.h index 43d1d78..640cc95 100644 --- a/utils.h +++ b/utils.h @@ -9,6 +9,11 @@ #include #include +namespace gs_patterns +{ +namespace gs_patterns_core +{ + int startswith(const char *a, const char *b); int endswith(const char *a, const char *b); @@ -22,3 +27,7 @@ int cnt_str(char *line, char c); gzFile open_trace_file(const std::string & trace_file_name); void close_trace_file (gzFile & fp); + +} // namespace gs_gs_patterns_core + +} // namespace gs_patterns \ No newline at end of file From 6387353c30a9ae3546891129693f31fdd96117a7 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 26 Apr 2024 18:45:10 -0400 Subject: [PATCH 60/76] Small Fixes for compiler warnings on macos. --- gs_patterns_core.cpp | 8 ++++---- gsnv_patterns.cpp | 37 ++++++++++++++++++++++++------------- gsnv_patterns.h | 8 ++++---- gspin_patterns.cpp | 7 ++++--- gspin_patterns.h | 2 +- utils.cpp | 2 +- 6 files changed, 38 insertions(+), 26 deletions(-) diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index df3c7da..42bd714 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -14,8 +14,8 @@ namespace gs_patterns_core { using namespace gs_patterns; - void translate_iaddr(const std::string & binary, char *source_line, addr_t iaddr) { - + void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr) + { int i = 0; int ntranslated = 0; char path[MAX_LINE_LENGTH]; @@ -43,7 +43,7 @@ namespace gs_patterns_core } - static void create_metrics_file(FILE *fp, FILE *fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) + static void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) { int i = 0; int j = 0; @@ -549,7 +549,7 @@ namespace gs_patterns_core } } else { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo - throw GSDataError("Unknown Memory Access Type: " + ia.get_mem_access_type()); + throw GSDataError("Unknown Memory Access Type: " + std::to_string(ia.get_mem_access_type())); } } // MEM diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 4dd896c..f864751 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,8 @@ namespace gsnv_patterns using namespace gs_patterns::gs_patterns_core; -int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, int *edx) { +int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, int *edx) +{ int idx; @@ -56,7 +58,8 @@ int tline_read_header(gzFile fp, trace_header_t * val, trace_header_t **p_val, i return 1; } -int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_val, int *edx) { +int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_val, int *edx) +{ int idx; @@ -77,7 +80,8 @@ int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_va return 1; } -int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) { +int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) +{ int idx; @@ -107,7 +111,7 @@ Metrics & MemPatternsForNV::get_metrics(mem_access_type m) case SCATTER : return _metrics.second; break; default: - throw GSError("Unable to get Metrics - Invalid Metrics Type: " + m); + throw GSError("Unable to get Metrics - Invalid Metrics Type: " + std::to_string(m)); } } @@ -120,7 +124,7 @@ InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) case SCATTER : return _iinfo.second; break; default: - throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + m); + throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + std::to_string(m)); } } @@ -193,7 +197,8 @@ std::string MemPatternsForNV::get_file_prefix() } // Store opcode mappings -bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) { +bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & opcode) +{ auto it = _id_to_opcode_map.find(opcode_id); if (it == _id_to_opcode_map.end()) { _id_to_opcode_map[opcode_id] = opcode; @@ -204,7 +209,8 @@ bool MemPatternsForNV::add_or_update_opcode(int opcode_id, const std::string & o } // Retrieve opcode mapping by opcode_id -const std::string & MemPatternsForNV::get_opcode(int opcode_id) { +const std::string & MemPatternsForNV::get_opcode(int opcode_id) +{ auto result = _id_to_opcode_map.find(opcode_id); if (result != _id_to_opcode_map.end()) { return result->second; @@ -215,7 +221,8 @@ const std::string & MemPatternsForNV::get_opcode(int opcode_id) { } // Store opcode_short mappings -bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) { +bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std::string & opcode_short) +{ auto it = _id_to_opcode_short_map.find(opcode_short_id); if (it == _id_to_opcode_short_map.end()) { _id_to_opcode_short_map[opcode_short_id] = opcode_short; @@ -226,7 +233,8 @@ bool MemPatternsForNV::add_or_update_opcode_short(int opcode_short_id, const std } // Retrieve opcode_short mapping by opcode_short_id -const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { +const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) +{ auto result = _id_to_opcode_short_map.find(opcode_short_id); if (result != _id_to_opcode_short_map.end()) { return result->second; @@ -237,7 +245,8 @@ const std::string & MemPatternsForNV::get_opcode_short(int opcode_short_id) { } // Store line mappings -bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) { +bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) +{ auto it = _id_to_line_map.find(line_id); if (it == _id_to_line_map.end()) { _id_to_line_map[line_id] = line; @@ -248,7 +257,8 @@ bool MemPatternsForNV::add_or_update_line(int line_id, const std::string & line) } // Retrieve line number mapping by line_id -const std::string & MemPatternsForNV::get_line(int line_id) { +const std::string & MemPatternsForNV::get_line(int line_id) +{ auto result = _id_to_line_map.find(line_id); if (result != _id_to_line_map.end()) { return result->second; @@ -515,7 +525,8 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, return true; } -void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) { +void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) +{ if (exceed_max_count()) { return; } if (!_first_trace_seen) { @@ -587,7 +598,7 @@ bool MemPatternsForNV::valid_gs_stride(const std::vector & te_lis continue; } - uint64_t diff = std::labs (last_addr - (uint64_t)te.addr); + uint64_t diff = std::llabs ((int64_t)(last_addr - te.addr)); if (diff < min_stride) return false; diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 17deeff..4c8aa1e 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -136,13 +136,13 @@ namespace gsnv_patterns TraceInfo & get_trace_info() override { return _trace_info; } InstrWindow & get_instr_window() override { return _iw; } - void set_log_level(int8_t level) { _log_level = level; } - int8_t get_log_level() { return _log_level; } + void set_log_level(int8_t level) override { _log_level = level; } + int8_t get_log_level() override { return _log_level; } void set_trace_file(const std::string & trace_file_name); - inline const std::string & get_trace_file_name() { return _trace_file_name; } + inline const std::string & get_trace_file_name() { return _trace_file_name; } - inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } + inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); void set_max_trace_count(int64_t max_trace_count); diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index 1bc32d0..a76a04e 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -32,7 +32,8 @@ namespace gspin_patterns using namespace gs_patterns::gs_patterns_core; -int drline_read(gzFile fp, trace_entry_t *val, trace_entry_t **p_val, int *edx) { +int drline_read(gzFile fp, trace_entry_t * val, trace_entry_t ** p_val, int * edx) +{ int idx; @@ -62,7 +63,7 @@ Metrics & MemPatternsForPin::get_metrics(mem_access_type m) case SCATTER : return _metrics.second; break; default: - throw GSError("Unable to get Metrics - Invalid Metrics Type: " + m); + throw GSError("Unable to get Metrics - Invalid Metrics Type: " + std::to_string(m)); } } @@ -75,7 +76,7 @@ InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) case SCATTER : return _iinfo.second; break; default: - throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + m); + throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + std::to_string(m)); } } diff --git a/gspin_patterns.h b/gspin_patterns.h index 6a5cab4..a4c3caa 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -74,7 +74,7 @@ namespace gspin_patterns virtual addr_t get_iaddr() const override { return _te.addr; } virtual addr_t get_maddr() const override { return _te.addr / _te.size; } virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t min_size() const { return VBYTES; } + virtual int64_t min_size() const override { return VBYTES; } virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; diff --git a/utils.cpp b/utils.cpp index a3a5f61..9b6e828 100644 --- a/utils.cpp +++ b/utils.cpp @@ -68,7 +68,7 @@ const char *str_replace(const char *orig, const char *rep, const char *with) { // count the number of replacements needed ins = (char*)orig; - for (count = 0; tmp = strstr(ins, rep); ++count) { + for (count = 0; (tmp = strstr(ins, rep)); ++count) { ins = tmp + len_rep; } From 83135061d649fdd6958a2ba3879bf0f7bfbffe98 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 30 Apr 2024 15:48:04 -0400 Subject: [PATCH 61/76] gsnv_trace Readme --- nvbit_tracing/README.md | 113 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 nvbit_tracing/README.md diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md new file mode 100644 index 0000000..609b713 --- /dev/null +++ b/nvbit_tracing/README.md @@ -0,0 +1,113 @@ +# Setup +Download NVBit from the folliwing locations: + +https://github.com/NVlabs/NVBit + +#### Tested with version 1.5.5 + +https://github.com/NVlabs/NVBit/releases/tag/1.5.5 + +``` +# or (for example for Linux x86_64) + +wget https://github.com/NVlabs/NVBit/releases/download/1.5.5/nvbit-Linux-x86_64-1.5.5.tar.bz2 +``` + + +``` +module load gcc #or make sure you have gcc. Tested with 8.5.0 and 11.4.0 + +tar zxvf + +export NVBIT_DIR= # full path + +cp -rv nvbit_tracing/gsnv_trace $NVBIT_DIR/tools/ + +cd $NVBIT_DIR + +#Compile tools and test apps. Make sure the gsnv_trace tool compiled. $NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so +make -j +``` + + +*** NOTE *** make sure you gzip the nvbit trace output file before attempting to use with gs_patterns. + +# gsnv_trace + +The gsnv_trace tool will instrument one or more CUDA kernels within a CUDA application and pass the resulting memory traces to the gs_patterns gs_patterns_core library. +Once the application has completed and all kernels are retired the gs_patterns_core library will begin processing the trace data and automatically generate the pattern outputs and pattern output files. +This includes the JSON file containing Gather/Scatter Patterns. + +### configuration +gsnv_trace tool can be configured by setting the GSNV_CONFIG_FILE environment variable to a config file. +The config file should have 1 configuration setting perline. Configuration settings take the form " " where there is a space between the config item and its value. + +Example: + +``` +echo "GSNV_LOG_LEVEL 1" > ./gsnv_config.txt +echo "GSNV_TRACE_OUT_FILE trace_file.nvbit.bin" >> ./gsnv_config.txt +echo "GSNV_TARGET_KERNEL SweepUCBxyzKernel" >> ./gsnv_config.txt +echo "GSNV_FILE_PREFIX trace_file" >> ./gsnv_config.txt + +export GSNV_CONFIG_FILE=./gsnv_config.txt +``` + +Additional settings which are supported by NVBit can also be set via additional environment variables. To see these please visit the NVBit documentation. +Setting covered here are specific to the gsnv_trace tool. + +### Instrumenting an application + +To starat instrumenting a CUDA application using gsnv_trace. The gsnv_trace.so libary previously built will need to be specified using LD_PRELOAD. + +Example: + +``` +LD_PRELOAD=$NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so +gzip trace_file.nvbit.bin +``` + +This will load gsnv_trace.so and then execute the specified application. NVBit will instrument the application using gsnv_trace.so which will call into libgs_patterns_core to write out the resulting trace file and generate memory patterns withn the trace. +The gzip command will compress the resulting trace file for use by gs_patterns in a subsequent run. + +### Generating Memory Patterns using an existing trace file. + +In the previous section on Instrumnenting an application, we used nvgs_trace.so to instrument an application, the resulting trace file was then compressed. +The instrumentation run also generated pattern files. +If we want to rerun the pattern generation we can do so using the generated (and compressed) trace file without re-instrumenting the application as this is much faster. +To do this we just need to run the gs_pattern binary with the trace file and the "-nv " option. The "-nv" option indicates that the trace file is an nvbit trace. + +Example: + +``` +export GS_PATTERNS_DIR=/path/to/gs_patterns/binary/ +$GS_PATTERNS_DIR/gs_patterns -nv +``` + +### Important Notes + +As of NVBit 1.5.5, when building gsnv_trace within the NVBit source tree it *may* be required to specify a version of the CUDA which is older +in order to enable NVBit to correctly emit the runtime instructions. Without this the gsnv_trace libary will still be built but will be unable to instrument CUDA kernels. + +For instance we were able to build a working nvgs_trace using CUDA api version 11.7 and lower and use that on higher versions of the CUDA environment such as CUDA 12.3. +However as of NVBit 1.5.5 it was not possible to get a working version of nvgs_trace when we build it using 12.3 directly. + +Example: + +``` +export LD_LIBARY_PATH=/path/to/cuda/11.7/lib:$LD_LIBRARY_PATH +export PATH=/path/to/cuda/11.7/bin:$PATH +cd $NVBIT_DIR +make +``` + +Then in another shell simply load the desired CUDA library version using module load or manually, e.g: + +``` +export LD_LIBARY_PATH=/path/to/new/cuda/12.3/lib:$LD_LIBRARY_PATH +export PATH=/path/to/new/cuda/12.3/bin:$PATH + +# point to where you build gsnv_trace.so (We can now instrument under CUDA 12.3) +LD_PRELOAD=$NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so +gzip trace_file.nvbit.bin +``` From 3fd15599ea7d9792cff9c594a0b6216d2d13a885 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 30 Apr 2024 19:48:35 -0400 Subject: [PATCH 62/76] Description of config items. --- nvbit_tracing/README.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index 609b713..5da91d9 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -25,12 +25,12 @@ cp -rv nvbit_tracing/gsnv_trace $NVBIT_DIR/tools/ cd $NVBIT_DIR -#Compile tools and test apps. Make sure the gsnv_trace tool compiled. $NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so +#Compile tools and test apps. Make sure the gsnv_trace tool compiled. If successful will produced $NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so make -j ``` -*** NOTE *** make sure you gzip the nvbit trace output file before attempting to use with gs_patterns. +*** NOTE *** make sure you gzip the nvbit trace output file before attempting to use with gs_patterns. # gsnv_trace @@ -40,7 +40,19 @@ This includes the JSON file containing Gather/Scatter Patterns. ### configuration gsnv_trace tool can be configured by setting the GSNV_CONFIG_FILE environment variable to a config file. -The config file should have 1 configuration setting perline. Configuration settings take the form " " where there is a space between the config item and its value. +The config file should have 1 configuration setting per line. Configuration settings take the form " " where there is a space between the config item and its value. + +The following are a list of configuration items currently supported: + +| Config | Description | possible values | +|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------| +| GSNV_LOG_LEVEL | Sets the log level (only 0-2 are currently supported) | 0 to 255 | +| GSNV_TARGET_KERNEL | Specifies the names of Kernels which will be instrumented seperated by space, it none is provided all Kernels will be intrumented. | A String | +| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If non is provided one will be infered from the output file if that is provided | A String | +| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. | A String | +| GSNV_MAX_TRACE_COUNT| Specifies the maximum number of memory traces which are processed, once this number of traces are seen instrumentation is disabled (Can be useful to produce a small trace file for testing) | An Integer e.g 1000000 | + + Example: @@ -56,6 +68,8 @@ export GSNV_CONFIG_FILE=./gsnv_config.txt Additional settings which are supported by NVBit can also be set via additional environment variables. To see these please visit the NVBit documentation. Setting covered here are specific to the gsnv_trace tool. +NOTE: It is highly recommended to specify a target kernel using GSNV_TARGET_KERNEL as this alows the tool to be used more efficiently also results in smaller trace files + ### Instrumenting an application To starat instrumenting a CUDA application using gsnv_trace. The gsnv_trace.so libary previously built will need to be specified using LD_PRELOAD. From cd7104da11a1cb735468cb9949fec8e71c944c8b Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 1 May 2024 22:43:58 -0400 Subject: [PATCH 63/76] Update readme for nvbit support. --- README.md | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 51c5297..0bb4b60 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ # Description -Memory analysis tool for finding gather / scatter (gs) accesses from DynamoRio traces. gs_patterns discovers gather/scatters from analyzing access patterns in memory traces (doesn't just look for gs instructions). gs_patterns writes the "subtraces" to a binary trace and spatter yaml format. The source lines of the top aggressors are reported. Use the provided pin clients in the pin_tracing folder or use DynamoRio. Pin tends to be more reliable for larger applications. +Memory analysis tool for finding gather / scatter (gs) accesses from DynamoRio & NVBit traces. +gs_patterns discovers gather/scatters from analyzing access patterns in memory traces (doesn't just look for gs instructions). gs_patterns writes the "subtraces" to a binary trace and spatter yaml format. +The source lines of the top aggressors are reported. + +For CPU applications use the provided pin client in pin_tracing folder (or DynamoRio). Pin tends to be more reliable for larger applications. + +For CUDA kernels use the provided nvbit client in the nvbit_tracing folder. + +See the README in the respective folders for more detailed information on these tools. + # Build ``` @@ -10,13 +19,25 @@ make ``` # Use + +## For Pin/DynamoRio ``` -gs_pattern +gs_pattern ``` -trace file should be gzipped. Binary file should be compiled with symbols turned on (-g) + +## For NVBit (CUDA Kernels) + +``` +gs_pattern -nv +``` + +Trace file should be gzipped. For Pin or DynamoRio, Binary file should be compiled with symbols turned on (-g). + +For NVBit tracing the kernel must be compiled with line numbers (--generate-line-info). Please see nvbit_tracing/README.md for detailed information on how to extract traces for CUDA kernels which are compatible with gs_patterns. # How gs_patterns works -g/s accesses are found by looking at repeated instruction addresses (loops) that are memory instructions (scalar and vector). The first pass finds the top g/s's. The second pass focuses on the top g/s accesses and records the normalized address distances to a binary file and spatter yaml. +g/s accesses are found by looking at repeated instruction addresses (loops) that are memory instructions (scalar and vector). +The first pass finds the top g/s's. The second pass focuses on the top g/s accesses and records the normalized address distances to a binary file and spatter yaml. # License BSD-3 License. See [the LICENSE file](https://github.com/lanl/gs_patterns/blob/main/LICENSE). From 82c35811ecc02104adb31c9b561e0fafaf506222 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 2 May 2024 12:57:44 -0400 Subject: [PATCH 64/76] Increasing PSIZE - previous size caused PSIZE exceeded when running UMT with -d 3,3,3 -b 1 --- gs_patterns.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gs_patterns.h b/gs_patterns.h index 15bd465..fbad257 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -25,7 +25,7 @@ #define NSTRIDES 15 //Threshold for number of unique distances #define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram #define NTOP (10) -#define PSIZE (1<<27) // Was 2^23 (8mb) +#define PSIZE (1<<28) // Was 2^23 (8mb) //DONT CHANGE #define VBYTES (VBITS/8) From 5d8255da1d5470b02c4bdc94da7690485ccd453c Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 2 May 2024 13:02:27 -0400 Subject: [PATCH 65/76] Update log message. --- gsnv_patterns.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index f864751..ac925e6 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -664,8 +664,8 @@ void MemPatternsForNV::write_trace_out_file() /// TODO: COMPRESS trace_file try { - std::cout << "\nSaving trace file - traces_written: " << _traces_written - << " traced_handled: " << _traces_handled << "\n" << std::endl; + std::cout << "\nSaving trace file - writing: " << _traces_written + << " traces_handled: " << _traces_handled << " ... \n" << std::endl; _ofs_tmp.flush(); @@ -717,6 +717,8 @@ void MemPatternsForNV::write_trace_out_file() std::remove(_tmp_trace_out_file_name.c_str()); + std::cout << "Saving trace file - complete" << std::endl; + if (_log_level >= 1) { std::cout << "Mappings found" << std::endl; From e259a0bfe9b5055df6c9e22c082d3dab890418f2 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 15 May 2024 16:05:48 -0400 Subject: [PATCH 66/76] Formatting updates. --- gs_patterns_core.h | 30 +++++++++++++------------- gs_patterns_main.cpp | 2 +- nvbit_tracing/gsnv_trace/gsnv_trace.cu | 4 ++-- utils.cpp | 22 ++++++++----------- utils.h | 14 +++++------- 5 files changed, 32 insertions(+), 40 deletions(-) diff --git a/gs_patterns_core.h b/gs_patterns_core.h index 182f744..c918532 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -10,28 +10,28 @@ namespace gs_patterns { namespace gs_patterns_core { - void translate_iaddr(const std::string &binary, char *source_line, addr_t iaddr); + void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); - void handle_trace_entry(MemPatterns &mp, const InstrAddrAdapter &ia); + void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia); - void display_stats(MemPatterns &mp); + void display_stats(MemPatterns & mp); - int get_top_target(InstrInfo &target_iinfo, Metrics &target_metrics); + int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); - void normalize_stats(Metrics &target_metrics); + void normalize_stats(Metrics & target_metrics); - bool handle_2nd_pass_trace_entry(const InstrAddrAdapter &ia, - Metrics &gather_metrics, Metrics &scatter_metrics, - addr_t &iaddr, int64_t &maddr, uint64_t &mcnt, - addr_t *gather_base, addr_t *scatter_base); + bool handle_2nd_pass_trace_entry(const InstrAddrAdapter & ia, + Metrics & gather_metrics, Metrics & scatter_metrics, + addr_t & iaddr, int64_t & maddr, uint64_t & mcnt, + addr_t * gather_base, addr_t * scatter_base); - void create_metrics_file(FILE *fp, - FILE *fp2, - const std::string &file_prefix, - Metrics &target_metrics, - bool &first_spatter); + void create_metrics_file(FILE * fp, + FILE * fp2, + const std::string & file_prefix, + Metrics & target_metrics, + bool & first_spatter); - void create_spatter_file(MemPatterns &mp, const std::string &file_prefix); + void create_spatter_file(MemPatterns & mp, const std::string & file_prefix); } // gs_patterns_core diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index a23b745..478e538 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -23,7 +23,7 @@ void usage (const std::string & prog_name) << " " << prog_name << " -nv [-v]" << std::endl; } -int main(int argc, char **argv) +int main(int argc, char ** argv) { try { diff --git a/nvbit_tracing/gsnv_trace/gsnv_trace.cu b/nvbit_tracing/gsnv_trace/gsnv_trace.cu index 6208e79..237f0af 100644 --- a/nvbit_tracing/gsnv_trace/gsnv_trace.cu +++ b/nvbit_tracing/gsnv_trace/gsnv_trace.cu @@ -188,8 +188,8 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { // Line to Line_ID /* Get line information for a particular instruction offset if available, */ /* binary must be compiled with --generate-line-info (-lineinfo) */ - char *line_str; - char *dir_str; + char * line_str; + char * dir_str; uint32_t line_num; bool status = nvbit_get_line_info(ctx, func, instr->getOffset(), &line_str, &dir_str, &line_num); diff --git a/utils.cpp b/utils.cpp index 9b6e828..9d02c4f 100644 --- a/utils.cpp +++ b/utils.cpp @@ -1,7 +1,3 @@ -// -// Created by christopher on 4/2/24. -// - #include #include #include @@ -25,13 +21,13 @@ static inline int popcount(uint64_t x) { } //string tools -int startswith(const char *a, const char *b) { +int startswith(const char* a, const char* b) { if (strncmp(b, a, strlen(b)) == 0) return 1; return 0; } -int endswith(const char *a, const char *b) { +int endswith(const char* a, const char* b) { int idx = strlen(a); int preidx = strlen(b); @@ -43,10 +39,10 @@ int endswith(const char *a, const char *b) { } //https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c -const char *str_replace(const char *orig, const char *rep, const char *with) { - char *result; // the return string - char *ins; // the next insert point - char *tmp; // varies +const char* str_replace(const char* orig, const char* rep, const char* with) { + char* result; // the return string + char* ins; // the next insert point + char* tmp; // varies int len_rep; // length of rep (the string to remove) int len_with; // length of with (the string to replace rep with) int len_front; // distance between rep and end of last rep @@ -93,9 +89,9 @@ const char *str_replace(const char *orig, const char *rep, const char *with) { return result; } -char *get_str(char *line, char *bparse, char *aparse) { +char* get_str(char* line, char* bparse, char* aparse) { - char *sline; + char* sline; sline = (char*)str_replace(line, bparse, ""); sline = (char*)str_replace(sline, aparse, ""); @@ -103,7 +99,7 @@ char *get_str(char *line, char *bparse, char *aparse) { return sline; } -int cnt_str(char *line, char c) { +int cnt_str(char* line, char c) { int cnt = 0; for (int i = 0; line[i] != '\0'; i++) { diff --git a/utils.h b/utils.h index 640cc95..644bfbf 100644 --- a/utils.h +++ b/utils.h @@ -1,7 +1,3 @@ -// -// Created by christopher on 4/2/24. -// - #pragma once #include @@ -14,15 +10,15 @@ namespace gs_patterns namespace gs_patterns_core { -int startswith(const char *a, const char *b); +int startswith(const char* a, const char* b); -int endswith(const char *a, const char *b); +int endswith(const char* a, const char* b); -const char *str_replace(const char *orig, const char *rep, const char *with); +const char* str_replace(const char* orig, const char* rep, const char* with); -char *get_str(char *line, char *bparse, char *aparse); +char* get_str(char* line, char* bparse, char* aparse); -int cnt_str(char *line, char c); +int cnt_str(char* line, char c); gzFile open_trace_file(const std::string & trace_file_name); From 2cb3778bd0bbe7e29cf6db4378e104884cdd30e8 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 21 Jun 2024 13:30:22 -0400 Subject: [PATCH 67/76] Remove unneeded/unused allocation. --- gs_patterns_main.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index 478e538..c05e460 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -41,8 +41,6 @@ int main(int argc, char ** argv) size_t pos = std::string(argv[0]).find_last_of("/"); std::string prog_name = std::string(argv[0]).substr(pos+1); - std::unique_ptr mp (use_gs_nv ? (MemPatterns *) new MemPatternsForNV : (MemPatterns *) new MemPatternsForPin); - if (argc < 3) { usage(prog_name); throw GSError("Invalid program arguments"); From 7f112d7705ecadf60f0c48171c718d58e338e9fe Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 4 Jul 2024 15:23:50 -0400 Subject: [PATCH 68/76] Maximum memory access size is now templatized, can be different between Pin and NVBit. InstrWindow is now dynamically allocated to increase max supported size(required to support 2k). Reset and intialization now within InstrWindow class (rather than in gs_patters_core). --- gs_patterns.h | 74 +++++++++---- gs_patterns_core.cpp | 243 +------------------------------------------ gs_patterns_core.h | 239 +++++++++++++++++++++++++++++++++++++++++- gsnv_patterns.cpp | 5 +- gsnv_patterns.h | 10 +- gspin_patterns.cpp | 4 +- gspin_patterns.h | 12 ++- 7 files changed, 312 insertions(+), 275 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index fbad257..a3fc1c9 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -15,7 +15,6 @@ //info #define CLSIZE (64) -#define VBITS (512) #define NBUFS (1LL<<10) #define IWINDOW (1024) #define NGS (8096) @@ -27,9 +26,6 @@ #define NTOP (10) #define PSIZE (1<<28) // Was 2^23 (8mb) -//DONT CHANGE -#define VBYTES (VBITS/8) - #define MAX_LINE_LENGTH 1024 namespace gs_patterns @@ -197,34 +193,73 @@ namespace gs_patterns uint64_t mcnt = 0; }; + template class InstrWindow { public: InstrWindow() { - //init window arrays + // First dimension is 0=GATHER/1=SCATTER + _w_iaddrs = new int64_t[2][IWINDOW]; + _w_bytes = new int64_t[2][IWINDOW]; + _w_maddr = new int64_t[2][IWINDOW][MAX_ACCESS_SIZE]; + _w_cnt = new int64_t[2][IWINDOW]; + + init(); + } + + virtual ~InstrWindow() { + delete [] _w_iaddrs; + delete [] _w_bytes; + delete [] _w_maddr; + delete [] _w_cnt; + } + + void init() { for (int w = 0; w < 2; w++) { for (int i = 0; i < IWINDOW; i++) { - w_iaddrs[w][i] = -1; - w_bytes[w][i] = 0; - w_cnt[w][i] = 0; - for (int j = 0; j < VBYTES; j++) - w_maddr[w][i][j] = -1; + _w_iaddrs[w][i] = -1; + _w_bytes[w][i] = 0; + _w_cnt[w][i] = 0; + for (int j = 0; j < MAX_ACCESS_SIZE; j++) + _w_maddr[w][i][j] = -1; } } } - ~InstrWindow() { } + void reset(int w) { + for (int i = 0; i < IWINDOW; i++) { + _w_iaddrs[w][i] = -1; + _w_bytes[w][i] = 0; + _w_cnt[w][i] = 0; + for (int j = 0; j < MAX_ACCESS_SIZE; j++) + _w_maddr[w][i][j] = -1; + } + } + + void reset() { + for (int w = 0; w < 2; w++) { + reset(w); + } + } InstrWindow(const InstrWindow &) = delete; InstrWindow & operator=(const InstrWindow & right) = delete; - // moved from static storage to instance variables (watch out for stack overflow) - // Revisit and move to heap if an issue - estimate of 2k*3 + 128k + int64_t & w_iaddrs(int32_t i, int32_t j) { return _w_iaddrs[i][j]; } + int64_t & w_bytes(int32_t i, int32_t j) { return _w_bytes[i][j]; } + int64_t & w_maddr(int32_t i, int32_t j, int32_t k) { return _w_maddr[i][j][k]; } + int64_t & w_cnt(int32_t i, int32_t j) { return _w_cnt[i][j]; } + + addr_t & get_iaddr() { return iaddr; } + int64_t & get_maddr_prev() { return maddr_prev; } + int64_t & get_maddr() { return maddr; } + + private: // First dimension is 0=GATHER/1=SCATTER - int64_t w_iaddrs[2][IWINDOW]; - int64_t w_bytes[2][IWINDOW]; - int64_t w_maddr[2][IWINDOW][VBYTES]; - int64_t w_cnt[2][IWINDOW]; + int64_t (*_w_iaddrs)[IWINDOW]; + int64_t (*_w_bytes)[IWINDOW]; + int64_t (*_w_maddr)[IWINDOW][MAX_ACCESS_SIZE]; + int64_t (*_w_cnt)[IWINDOW]; // State which must be carried with each call to handle a trace addr_t iaddr; @@ -232,6 +267,7 @@ namespace gs_patterns int64_t maddr; }; + template class MemPatterns { public: @@ -252,8 +288,8 @@ namespace gs_patterns virtual InstrInfo & get_gather_iinfo() = 0; virtual InstrInfo & get_scatter_iinfo() = 0; virtual TraceInfo & get_trace_info() = 0; - virtual InstrWindow & get_instr_window() = 0; - + virtual InstrWindow & + get_instr_window() = 0; virtual void set_log_level(int8_t ll) = 0; virtual int8_t get_log_level() = 0; }; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 42bd714..1c85c7c 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -42,8 +42,7 @@ namespace gs_patterns_core return; } - - static void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) + void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) { int i = 0; int j = 0; @@ -156,40 +155,6 @@ namespace gs_patterns_core } } - void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) - { - // Create spatter file - FILE *fp, *fp2; - - if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); - - std::string json_name = file_prefix + ".json"; - fp = fopen(json_name.c_str(), "w"); - if (NULL == fp) { - throw GSFileError("Could not open " + json_name + "!"); - } - - std::string gs_info = file_prefix + ".txt"; - fp2 = fopen(gs_info.c_str(), "w"); - if (NULL == fp2) { - throw GSFileError("Could not open " + gs_info + "!"); - } - - //Header - fprintf(fp, "[ "); - fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); - - bool first_spatter = true; - create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); - - create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); - - //Footer - fprintf(fp, " ]"); - fclose(fp); - fclose(fp2); - } - void normalize_stats(Metrics & target_metrics) { //Normalize @@ -212,212 +177,6 @@ namespace gs_patterns_core } } - void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) - { - int i, j, k, w = 0; - int w_rw_idx; // Index into instruction window first dimension (RW: 0=Gather(R) or 1=Scatter(W)) - int w_idx; - int gs; - - auto & trace_info = mp.get_trace_info(); - auto & gather_iinfo = mp.get_gather_iinfo(); - auto & scatter_iinfo = mp.get_scatter_iinfo(); - auto & gather_metrics = mp.get_gather_metrics(); - auto & scatter_metrics = mp.get_scatter_metrics(); - auto & iw = mp.get_instr_window(); - - if (!ia.is_valid()) { - std::ostringstream os; - os << "Invalid " << ia; - throw GSDataError(os.str()); - } - - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (ia.is_other_instr()) { - - iw.iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() - - //nops - trace_info.opcodes++; - trace_info.did_opcode = true; - - /***********************/ - /** MEM 0x00 and 0x01 **/ - /***********************/ - } else if (ia.is_mem_instr()) { - - if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() == ia.get_address()) { - iw.iaddr = ia.get_iaddr(); - trace_info.opcodes++; - trace_info.did_opcode = true; - } - - w_rw_idx = ia.get_type(); - - //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", - // iw.iaddr, ia.get_address(), ia.get_address() % 64, ia.get_size()); - - if ((++trace_info.mcnt % PERSAMPLE) == 0) { - #if SAMPLE - break; - #endif - printf("."); - fflush(stdout); - } - - //is iaddr in window - w_idx = -1; - for (i = 0; i < IWINDOW; i++) { - - //new iaddr - if (iw.w_iaddrs[w_rw_idx][i] == -1) { - w_idx = i; - break; - - //iaddr exists - } else if (iw.w_iaddrs[w_rw_idx][i] == iw.iaddr) { - w_idx = i; - break; - } - } - - //new window - if ((w_idx == -1) || (iw.w_bytes[w_rw_idx][w_idx] >= ia.min_size()) || // was >= VBYTES - (iw.w_cnt[w_rw_idx][w_idx] >= ia.min_size())) { // was >= VBYTES - - /***************************/ - //do analysis - /***************************/ - //i = each window - for (w = 0; w < 2; w++) { // 2 - - for (i = 0; i < IWINDOW; i++) { // 1024 - - if (iw.w_iaddrs[w][i] == -1) - break; - - int byte = iw.w_bytes[w][i] / iw.w_cnt[w][i]; - - //First pass - //Determine - //gather/scatter? - gs = -1; - for (j = 0; j < iw.w_cnt[w][i]; j++) { - - //address and cl - iw.maddr = iw.w_maddr[w][i][j]; - assert(iw.maddr > -1); - - //previous addr - if (j == 0) - iw.maddr_prev = iw.maddr - 1; - - //gather / scatter - if (iw.maddr != iw.maddr_prev) { - if ((gs == -1) && (abs(iw.maddr - iw.maddr_prev) > 1)) // ? > 1 stride (non-contiguous) <-------------------- - gs = w; - } - iw.maddr_prev = iw.maddr; - } - - // Update other_cnt - if (gs == -1) trace_info.other_cnt += iw.w_cnt[w][i]; - - // GATHER or SCATTER handling - if (gs == 0 || gs == 1) { - InstrInfo & target_iinfo = (gs == 0) ? gather_iinfo : scatter_iinfo; - - if (gs == 0) { - trace_info.gather_occ_avg += iw.w_cnt[w][i]; - gather_metrics.cnt += 1.0; - } - else { - trace_info.scatter_occ_avg += iw.w_cnt[w][i]; - scatter_metrics.cnt += 1.0; - } - - for (k = 0; k < NGS; k++) { - if (target_iinfo.get_iaddrs()[k] == 0) { - target_iinfo.get_iaddrs()[k] = iw.w_iaddrs[w][i]; - (target_iinfo.get_icnt()[k])++; - target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - - if (target_iinfo.get_iaddrs()[k] == iw.w_iaddrs[w][i]) { - (target_iinfo.get_icnt()[k])++; - target_iinfo.get_occ()[k] += iw.w_cnt[w][i]; - break; - } - } - } - } //WINDOW i - - w_idx = 0; - - //reset windows - for (i = 0; i < IWINDOW; i++) { - iw.w_iaddrs[w][i] = -1; - iw.w_bytes[w][i] = 0; - iw.w_cnt[w][i] = 0; - for (j = 0; j < VBYTES; j++) - iw.w_maddr[w][i][j] = -1; - } - } // rw w - } //analysis - - //Set window values - iw.w_iaddrs[w_rw_idx][w_idx] = iw.iaddr; - iw.w_maddr[w_rw_idx][w_idx][iw.w_cnt[w_rw_idx][w_idx]] = ia.get_maddr(); - iw.w_bytes[w_rw_idx][w_idx] += ia.get_size(); - - //num access per iaddr in loop - iw.w_cnt[w_rw_idx][w_idx]++; - - if (trace_info.did_opcode) { - - trace_info.opcodes_mem++; - trace_info.addrs++; - trace_info.did_opcode = false; - - } else { - trace_info.addrs++; - } - - /***********************/ - /** SOMETHING ELSE **/ - /***********************/ - } else { - trace_info.other++; - } - - trace_info.trace_lines++; - } - - void display_stats(MemPatterns & mp) - { - printf("\n RESULTS \n"); - - printf("DRTRACE STATS\n"); - printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().trace_lines); - printf("OPCODES: %16lu\n", mp.get_trace_info().opcodes); - printf("MEMOPCODES: %16lu\n", mp.get_trace_info().opcodes_mem); - printf("LOAD/STORES: %16lu\n", mp.get_trace_info().addrs); - printf("OTHER: %16lu\n", mp.get_trace_info().other); - - printf("\n"); - - printf("GATHER/SCATTER STATS: \n"); - printf("LOADS per GATHER: %16.3f\n", mp.get_trace_info().gather_occ_avg); - printf("STORES per SCATTER: %16.3f\n", mp.get_trace_info().scatter_occ_avg); - printf("GATHER COUNT: %16.3f (log2)\n", log(mp.get_gather_metrics().cnt) / log(2.0)); - printf("SCATTER COUNT: %16.3f (log2)\n", log(mp.get_scatter_metrics().cnt) / log(2.0)); - printf("OTHER COUNT: %16.3f (log2)\n", log(mp.get_trace_info().other_cnt) / log(2.0)); - } - - int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) { int target_ntop = 0; diff --git a/gs_patterns_core.h b/gs_patterns_core.h index c918532..11b24c3 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -2,6 +2,8 @@ #pragma once #include +#include /// TODO: use cassert instead +#include #include #include "gs_patterns.h" @@ -12,9 +14,207 @@ namespace gs_patterns_core { void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); - void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia); + template + void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) + { + int i, j, k, w = 0; + int w_rw_idx; // Index into instruction window first dimension (RW: 0=Gather(R) or 1=Scatter(W)) + int w_idx; + int gs; - void display_stats(MemPatterns & mp); + auto & trace_info = mp.get_trace_info(); + auto & gather_iinfo = mp.get_gather_iinfo(); + auto & scatter_iinfo = mp.get_scatter_iinfo(); + auto & gather_metrics = mp.get_gather_metrics(); + auto & scatter_metrics = mp.get_scatter_metrics(); + auto & iw = mp.get_instr_window(); + + if (!ia.is_valid()) { + std::ostringstream os; + os << "Invalid " << ia; + throw GSDataError(os.str()); + } + + /*****************************/ + /** INSTR 0xa-0x10 and 0x1e **/ + /*****************************/ + if (ia.is_other_instr()) { + + iw.get_iaddr() = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() + + //nops + trace_info.opcodes++; + trace_info.did_opcode = true; + + /***********************/ + /** MEM 0x00 and 0x01 **/ + /***********************/ + } else if (ia.is_mem_instr()) { + + if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() == ia.get_address()) { + iw.get_iaddr() = ia.get_iaddr(); + trace_info.opcodes++; + trace_info.did_opcode = true; + } + + w_rw_idx = ia.get_type(); + + //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", + // iw.iaddr, ia.get_address(), ia.get_address() % 64, ia.get_size()); + + if ((++trace_info.mcnt % PERSAMPLE) == 0) { +#if SAMPLE + break; +#endif + printf("."); + fflush(stdout); + } + + //is iaddr in window + w_idx = -1; + for (i = 0; i < IWINDOW; i++) { + + //new iaddr + if (iw.w_iaddrs(w_rw_idx, i) == -1) { + w_idx = i; + break; + + //iaddr exists + } else if (iw.w_iaddrs(w_rw_idx, i) == iw.get_iaddr()) { + w_idx = i; + break; + } + } + + //new window + if ((w_idx == -1) || (iw.w_bytes(w_rw_idx, w_idx) >= ia.min_size()) || // was >= VBYTES + (iw.w_cnt(w_rw_idx, w_idx) >= ia.min_size())) { // was >= VBYTES + + /***************************/ + //do analysis + /***************************/ + //i = each window + for (w = 0; w < 2; w++) { // 2 + + for (i = 0; i < IWINDOW; i++) { // 1024 + + if (iw.w_iaddrs(w,i) == -1) + break; + + int byte = iw.w_bytes(w, i) / iw.w_cnt(w, i); + + //First pass + //Determine + //gather/scatter? + gs = -1; + for (j = 0; j < iw.w_cnt(w, i); j++) { + + //address and cl + iw.get_maddr() = iw.w_maddr(w, i, j); + assert(iw.get_maddr() > -1); + + //previous addr + if (j == 0) + iw.get_maddr_prev() = iw.get_maddr() - 1; + + //gather / scatter + if (iw.get_maddr() != iw.get_maddr_prev()) { + if ((gs == -1) && (abs(iw.get_maddr() - iw.get_maddr_prev()) > 1)) // ? > 1 stride (non-contiguous) <-------------------- + gs = w; + } + iw.get_maddr_prev() = iw.get_maddr(); + } + + // Update other_cnt + if (gs == -1) trace_info.other_cnt += iw.w_cnt(w, i); + + // GATHER or SCATTER handling + if (gs == 0 || gs == 1) { + InstrInfo & target_iinfo = (gs == 0) ? gather_iinfo : scatter_iinfo; + + if (gs == 0) { + trace_info.gather_occ_avg += iw.w_cnt(w, i); + gather_metrics.cnt += 1.0; + } + else { + trace_info.scatter_occ_avg += iw.w_cnt(w, i); + scatter_metrics.cnt += 1.0; + } + + for (k = 0; k < NGS; k++) { + if (target_iinfo.get_iaddrs()[k] == 0) { + target_iinfo.get_iaddrs()[k] = iw.w_iaddrs(w, i); + (target_iinfo.get_icnt()[k])++; + target_iinfo.get_occ()[k] += iw.w_cnt(w, i); + break; + } + + if (target_iinfo.get_iaddrs()[k] == iw.w_iaddrs(w, i)) { + (target_iinfo.get_icnt()[k])++; + target_iinfo.get_occ()[k] += iw.w_cnt(w, i); + break; + } + } + } + } //WINDOW i + + w_idx = 0; + + //reset windows + iw.reset(w); + } // rw w + } //analysis + + //Set window values + iw.w_iaddrs(w_rw_idx, w_idx) = iw.get_iaddr(); + //iw.w_maddr(w_rw_idx, w_idx, iw.w_cnt[w_rw_idx][w_idx]]) = ia.get_maddr(); + iw.w_maddr(w_rw_idx, w_idx, iw.w_cnt(w_rw_idx, w_idx)) = ia.get_maddr(); + iw.w_bytes(w_rw_idx, w_idx) += ia.get_size(); + + //num access per iaddr in loop + iw.w_cnt(w_rw_idx, w_idx)++; + + if (trace_info.did_opcode) { + + trace_info.opcodes_mem++; + trace_info.addrs++; + trace_info.did_opcode = false; + + } else { + trace_info.addrs++; + } + + /***********************/ + /** SOMETHING ELSE **/ + /***********************/ + } else { + trace_info.other++; + } + + trace_info.trace_lines++; + } + + template + void display_stats(MemPatterns & mp) + { + printf("\n RESULTS \n"); + + printf("DRTRACE STATS\n"); + printf("DRTRACE LINES: %16lu\n", mp.get_trace_info().trace_lines); + printf("OPCODES: %16lu\n", mp.get_trace_info().opcodes); + printf("MEMOPCODES: %16lu\n", mp.get_trace_info().opcodes_mem); + printf("LOAD/STORES: %16lu\n", mp.get_trace_info().addrs); + printf("OTHER: %16lu\n", mp.get_trace_info().other); + + printf("\n"); + + printf("GATHER/SCATTER STATS: \n"); + printf("LOADS per GATHER: %16.3f\n", mp.get_trace_info().gather_occ_avg); + printf("STORES per SCATTER: %16.3f\n", mp.get_trace_info().scatter_occ_avg); + printf("GATHER COUNT: %16.3f (log2)\n", log(mp.get_gather_metrics().cnt) / log(2.0)); + printf("SCATTER COUNT: %16.3f (log2)\n", log(mp.get_scatter_metrics().cnt) / log(2.0)); + printf("OTHER COUNT: %16.3f (log2)\n", log(mp.get_trace_info().other_cnt) / log(2.0)); + } int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics); @@ -31,7 +231,40 @@ namespace gs_patterns_core Metrics & target_metrics, bool & first_spatter); - void create_spatter_file(MemPatterns & mp, const std::string & file_prefix); + template + void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) + { + // Create spatter file + FILE *fp, *fp2; + + if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); + + std::string json_name = file_prefix + ".json"; + fp = fopen(json_name.c_str(), "w"); + if (NULL == fp) { + throw GSFileError("Could not open " + json_name + "!"); + } + + std::string gs_info = file_prefix + ".txt"; + fp2 = fopen(gs_info.c_str(), "w"); + if (NULL == fp2) { + throw GSFileError("Could not open " + gs_info + "!"); + } + + //Header + fprintf(fp, "[ "); + fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); + + bool first_spatter = true; + create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); + + create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); + + //Footer + fprintf(fp, " ]"); + fclose(fp); + fclose(fp2); + } } // gs_patterns_core diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index ac925e6..a055a49 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -160,7 +160,7 @@ void MemPatternsForNV::generate_patterns() // ----------------- Create Spatter File ----------------- - create_spatter_file(*this, get_file_prefix()); + create_spatter_file(*this, get_file_prefix()); } @@ -276,7 +276,6 @@ void MemPatternsForNV::process_traces() { int iret = 0; mem_access_t * t_line; - InstrWindow iw; gzFile fp_trace; try @@ -368,7 +367,7 @@ void MemPatternsForNV::process_traces() get_trace_info().gather_occ_avg /= get_gather_metrics().cnt; get_trace_info().scatter_occ_avg /= get_scatter_metrics().cnt; - display_stats(*this); + display_stats(*this); } diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 4c8aa1e..92a83f6 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -33,6 +33,8 @@ namespace gs_patterns { namespace gsnv_patterns { + constexpr std::size_t MEMORY_ACCESS_SIZE = 2048 / 8; + struct _trace_entry_t { unsigned short type; // 2 bytes: trace_type_t unsigned short size; @@ -99,7 +101,7 @@ namespace gsnv_patterns const trace_entry_t _te; }; - class MemPatternsForNV : public MemPatterns + class MemPatternsForNV : public MemPatterns { public: static const uint8_t CTA_LENGTH = 32; @@ -134,7 +136,9 @@ namespace gsnv_patterns InstrInfo & get_gather_iinfo () override { return _iinfo.first; } InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } TraceInfo & get_trace_info() override { return _trace_info; } - InstrWindow & get_instr_window() override { return _iw; } + + InstrWindow & + get_instr_window() override { return _iw; } void set_log_level(int8_t level) override { _log_level = level; } int8_t get_log_level() override { return _log_level; } @@ -213,7 +217,7 @@ namespace gsnv_patterns std::pair _metrics; std::pair _iinfo; TraceInfo _trace_info; - InstrWindow _iw; + InstrWindow _iw; std::string _trace_file_name; // Input compressed nvbit trace file std::string _file_prefix; // Used by gs_patterns_core to write out pattern files diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index a76a04e..1ab3dc2 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -98,7 +98,7 @@ void MemPatternsForPin::generate_patterns() // ----------------- Create Spatter File ----------------- - create_spatter_file(*this, get_file_prefix()); + create_spatter_file(*this, get_file_prefix()); } @@ -208,7 +208,7 @@ void MemPatternsForPin::process_traces() get_trace_info().gather_occ_avg /= get_gather_metrics().cnt; get_trace_info().scatter_occ_avg /= get_scatter_metrics().cnt; - display_stats(*this); + display_stats(*this); } diff --git a/gspin_patterns.h b/gspin_patterns.h index a4c3caa..3dffff2 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -26,10 +26,15 @@ #define ADDREND (0xFFFFFFFFFFFFFFFFUL) #define ADDRUSYNC (0xFFFFFFFFFFFFFFFEUL) +#define VBITS (512) +#define VBYTES (VBITS/8) //DONT CHANGE + namespace gs_patterns { namespace gspin_patterns { + constexpr std::size_t MEMORY_ACCESS_SIZE = VBYTES; + //FROM DR SOURCE //DR trace struct _trace_entry_t { @@ -84,7 +89,7 @@ namespace gspin_patterns trace_entry_t _te; }; - class MemPatternsForPin : public MemPatterns + class MemPatternsForPin : public MemPatterns { public: MemPatternsForPin() : _metrics(GATHER, SCATTER), @@ -102,7 +107,8 @@ namespace gspin_patterns InstrInfo & get_gather_iinfo () override { return _iinfo.first; } InstrInfo & get_scatter_iinfo () override { return _iinfo.second; } TraceInfo & get_trace_info() override { return _trace_info; } - InstrWindow & get_instr_window() override { return _iw; } + InstrWindow & + get_instr_window() override { return _iw; } void set_log_level(int8_t level) override { _log_level = level; } int8_t get_log_level() override { return _log_level; } @@ -126,7 +132,7 @@ namespace gspin_patterns std::pair _metrics; std::pair _iinfo; TraceInfo _trace_info; - InstrWindow _iw; + InstrWindow _iw; int8_t _log_level = 0; From 4046f356f9efb1f4eda64df7f2874509448a670e Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 6 Jul 2024 16:16:31 -0400 Subject: [PATCH 69/76] Added determining of instruction address (iaddr) to traces generated by nvgs_trace. Use iaddr field rather than base_addr for CTAs. Use base_addr comparison to addr to determine mem instructions which are part of warp for CTAs. Removed remaining malloc calls in gs_patterns.h (a couple are still left in unused functions in utils.h). Added partial string match to kernel name lookup to make it easier to specify kernel to trace in config. Fixed name of config variable. Renamed min_size to max_access_size in InstrAddrAdapter to be consistent. --- gs_patterns.h | 19 ++++++++------ gs_patterns_core.cpp | 2 +- gs_patterns_core.h | 6 ++--- gs_patterns_main.cpp | 4 +-- gsnv_patterns.cpp | 28 ++++++++++++++++++--- gsnv_patterns.h | 32 +++++++++++------------- gspin_patterns.h | 21 ++++++++-------- nvbit_tracing/README.md | 6 ++--- nvbit_tracing/gsnv_trace/common.h | 21 ++++++++-------- nvbit_tracing/gsnv_trace/gsnv_trace.cu | 28 +++++++++++++-------- nvbit_tracing/gsnv_trace/inject_funcs.cu | 3 ++- 11 files changed, 101 insertions(+), 69 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index a3fc1c9..9ef75a8 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -79,11 +79,12 @@ namespace gs_patterns virtual mem_instr_type get_mem_instr_type() const = 0; virtual size_t get_size() const = 0; + virtual addr_t get_base_addr() const = 0; virtual addr_t get_address() const = 0; virtual addr_t get_iaddr() const = 0; virtual addr_t get_maddr() const = 0; virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t min_size() const = 0; + virtual int64_t max_access_size() const = 0; virtual bool is_gather() const { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } @@ -102,20 +103,22 @@ namespace gs_patterns public: Metrics(mem_access_type mType) : _mType(mType) { - /// TODO: Convert to new/delete - for (int j = 0; j < NTOP; j++) { - patterns[j] = (int64_t *) calloc(PSIZE, sizeof(int64_t)); - if (patterns[j] == NULL) { - throw GSAllocError("Could not allocate patterns for " + type_as_string() + "!"); + try + { + for (int j = 0; j < NTOP; j++) { + patterns[j] = new int64_t[PSIZE]; } } + catch (const std::exception & ex) + { + throw GSAllocError("Could not allocate patterns for " + type_as_string() + "! due to: " + ex.what()); + } } ~Metrics() { - /// TODO: Convert to new/delete for (int i = 0; i < NTOP; i++) { - free(patterns[i]); + delete [] patterns[i]; } delete [] srcline; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 1c85c7c..d160be6 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -250,7 +250,7 @@ namespace gs_patterns_core maddr = ia.get_maddr(); - if (CTA == ia.get_mem_instr_type() && ia.get_address() == ia.get_iaddr()) { + if (CTA == ia.get_mem_instr_type() && ia.get_address() == ia.get_base_addr()) { iaddr = ia.get_iaddr(); } diff --git a/gs_patterns_core.h b/gs_patterns_core.h index 11b24c3..ab7d23c 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -51,7 +51,7 @@ namespace gs_patterns_core /***********************/ } else if (ia.is_mem_instr()) { - if (CTA == ia.get_mem_instr_type() && ia.get_iaddr() == ia.get_address()) { + if (CTA == ia.get_mem_instr_type() && ia.get_base_addr() == ia.get_address()) { iw.get_iaddr() = ia.get_iaddr(); trace_info.opcodes++; trace_info.did_opcode = true; @@ -87,8 +87,8 @@ namespace gs_patterns_core } //new window - if ((w_idx == -1) || (iw.w_bytes(w_rw_idx, w_idx) >= ia.min_size()) || // was >= VBYTES - (iw.w_cnt(w_rw_idx, w_idx) >= ia.min_size())) { // was >= VBYTES + if ((w_idx == -1) || (iw.w_bytes(w_rw_idx, w_idx) >= ia.max_access_size()) || // was >= VBYTES + (iw.w_cnt(w_rw_idx, w_idx) >= ia.max_access_size())) { // was >= VBYTES /***************************/ //do analysis diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index c05e460..83679da 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -10,7 +10,7 @@ #include "gsnv_patterns.h" #include "utils.h" -#define NVGS_CONFIG_FILE "NVGS_CONFIG_FILE" +#define GSNV_CONFIG_FILE "GSNV_CONFIG_FILE" using namespace gs_patterns; using namespace gs_patterns::gs_patterns_core; @@ -52,7 +52,7 @@ int main(int argc, char ** argv) mp.set_trace_file(argv[1]); - const char * config_file = std::getenv(NVGS_CONFIG_FILE); + const char * config_file = std::getenv(GSNV_CONFIG_FILE); if (config_file) { mp.set_config_file(config_file); } diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index a055a49..9487781 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -146,6 +147,11 @@ void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) void MemPatternsForNV::generate_patterns() { + if (_traces_handled < 1) { + std::cout << "No traces match criteria, skipping pattern generation" << std::endl; + return; + } + // ----------------- Write out Trace Files (if requested ) ----------------- write_trace_out_file(); @@ -508,7 +514,7 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, { if (ma.addrs[i] != 0) { - trace_entry_t te { mem_type_code, mem_size, ma.addrs[i], base_addr }; + trace_entry_t te { mem_type_code, mem_size, ma.addrs[i], base_addr, ma.iaddr }; te_list.push_back(te); if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { @@ -548,12 +554,14 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) _traces_written++; } - if (_log_level >= 2) { + if (_log_level >= 3) { std::stringstream ss; //ss << "CTX " << HEX(ctx) << " - grid_launch_id " ss << "GSNV_TRACE: CTX " << " - grid_launch_id " << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z - << " - warp " << ma->warp_id << " - " << get_opcode(ma->opcode_id) + << " - warp " << ma->warp_id << " - " + << " - iaddr: " << HEX(ma->iaddr) + << " - " << get_opcode(ma->opcode_id) << " - shortOpcode: " << ma->opcode_short_id << " isLoad: " << ma->is_load << " isStore: " << ma->is_store << " size: " << ma->size << " - "; @@ -824,14 +832,26 @@ bool MemPatternsForNV::should_instrument(const std::string & kernel_name) } auto itr = _target_kernels.find (kernel_name); - if ( itr != _target_kernels.end()) // Hard code for now + if ( itr != _target_kernels.end()) { if (_log_level >= 1) { std::cout << "Instrumenting: " << kernel_name << std::endl; } return true; } + else { + // Try substring match + auto itr = std::find_if(_target_kernels.begin(), _target_kernels.end(), + [kernel_name](const std::string & t_kernel) { + return (t_kernel.compare(kernel_name.substr(0, t_kernel.length())) == 0); } ); + if (itr != _target_kernels.end()) + return true; + } + + if (_log_level >= 2) { + std::cout << "Not Instrumenting: " << kernel_name << std::endl; + } return false; } diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 92a83f6..ef45a3b 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -23,10 +23,6 @@ // Enable to use a vector for storing trace data for use by second pass (if not defined data is stored to a temp file //#define USE_VECTOR_FOR_SECOND_PASS 1 -#define HEX(x) \ - "0x" << std::setfill('0') << std::setw(16) << std::hex << (uint64_t)x \ - << std::dec - #include "nvbit_tracing/gsnv_trace/common.h" namespace gs_patterns @@ -43,6 +39,7 @@ namespace gsnv_patterns unsigned char length[sizeof(addr_t)]; }; addr_t base_addr; + addr_t iaddr; char padding[4]; } __attribute__((packed)); typedef struct _trace_entry_t trace_entry_t; @@ -51,8 +48,8 @@ namespace gsnv_patterns #define MAP_VALUE_SIZE 22 #define MAP_VALUE_LONG_SIZE 94 #define NUM_MAPS 3 - // Setting this to fit within a 4k page e.g 170 * 24 bytes <= 4k - #define TRACE_BUFFER_LENGTH 170 + // Setting this to fit within a 4k page e.g. 170 * 32 bytes <= 4k + #define TRACE_BUFFER_LENGTH 128 struct _trace_map_entry_t { @@ -79,23 +76,24 @@ namespace gsnv_patterns virtual ~InstrAddrAdapterForNV() { } - virtual inline bool is_valid() const override { return true; } - virtual inline bool is_mem_instr() const override { return true; } - virtual inline bool is_other_instr() const override { return false; } + virtual inline bool is_valid() const override { return true; } + virtual inline bool is_mem_instr() const override { return true; } + virtual inline bool is_other_instr() const override { return false; } virtual inline mem_access_type get_mem_access_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } + virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } - virtual inline size_t get_size() const override { return _te.size; } // in bytes - virtual inline addr_t get_address() const override { return _te.addr; } - virtual inline addr_t get_iaddr () const override { return _te.base_addr; } - virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr - virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual inline int64_t min_size() const override { return 256; } // 32 * 8 bytes + virtual inline size_t get_size() const override { return _te.size; } // in bytes + virtual inline addr_t get_base_addr() const override { return _te.base_addr; } + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline addr_t get_iaddr () const override { return _te.iaddr; } + virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline int64_t max_access_size() const override { return MEMORY_ACCESS_SIZE; } // 32 * 8 bytes virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } - const trace_entry_t & get_trace_entry() const { return _te; } + const trace_entry_t & get_trace_entry() const { return _te; } private: const trace_entry_t _te; diff --git a/gspin_patterns.h b/gspin_patterns.h index 3dffff2..1ea0131 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -62,11 +62,11 @@ namespace gspin_patterns virtual ~InstrAddrAdapterForPin() { } - virtual bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } - virtual bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } - virtual bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } + virtual inline bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } + virtual inline bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } + virtual inline bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } - virtual mem_access_type get_mem_access_type() const override { + virtual mem_access_type get_mem_access_type() const override { if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Access Type"); // Must be 0x0 or 0x1 if (_te.type == 0x0) return GATHER; @@ -74,12 +74,13 @@ namespace gspin_patterns } virtual inline mem_instr_type get_mem_instr_type() const override { return VECTOR; } - virtual size_t get_size() const override { return _te.size; } - virtual addr_t get_address() const override { return _te.addr; } - virtual addr_t get_iaddr() const override { return _te.addr; } - virtual addr_t get_maddr() const override { return _te.addr / _te.size; } - virtual unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t min_size() const override { return VBYTES; } + virtual inline size_t get_size() const override { return _te.size; } + virtual inline addr_t get_base_addr() const override { return _te.addr; } + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline addr_t get_iaddr() const override { return _te.addr; } + virtual inline addr_t get_maddr() const override { return _te.addr / _te.size; } + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline int64_t max_access_size() const override { return MEMORY_ACCESS_SIZE; } virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index 5da91d9..55ac9f4 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -86,7 +86,7 @@ The gzip command will compress the resulting trace file for use by gs_patterns i ### Generating Memory Patterns using an existing trace file. -In the previous section on Instrumnenting an application, we used nvgs_trace.so to instrument an application, the resulting trace file was then compressed. +In the previous section on Instrumnenting an application, we used gsnv_trace.so to instrument an application, the resulting trace file was then compressed. The instrumentation run also generated pattern files. If we want to rerun the pattern generation we can do so using the generated (and compressed) trace file without re-instrumenting the application as this is much faster. To do this we just need to run the gs_pattern binary with the trace file and the "-nv " option. The "-nv" option indicates that the trace file is an nvbit trace. @@ -103,8 +103,8 @@ $GS_PATTERNS_DIR/gs_patterns -nv As of NVBit 1.5.5, when building gsnv_trace within the NVBit source tree it *may* be required to specify a version of the CUDA which is older in order to enable NVBit to correctly emit the runtime instructions. Without this the gsnv_trace libary will still be built but will be unable to instrument CUDA kernels. -For instance we were able to build a working nvgs_trace using CUDA api version 11.7 and lower and use that on higher versions of the CUDA environment such as CUDA 12.3. -However as of NVBit 1.5.5 it was not possible to get a working version of nvgs_trace when we build it using 12.3 directly. +For instance we were able to build a working gsnv_trace using CUDA api version 11.7 and lower and use that on higher versions of the CUDA environment such as CUDA 12.3. +However as of NVBit 1.5.5 it was not possible to get a working version of gsnv_trace when we build it using 12.3 directly. Example: diff --git a/nvbit_tracing/gsnv_trace/common.h b/nvbit_tracing/gsnv_trace/common.h index 148a8cb..cff31b7 100644 --- a/nvbit_tracing/gsnv_trace/common.h +++ b/nvbit_tracing/gsnv_trace/common.h @@ -33,15 +33,16 @@ * on the channel from the GPU to the CPU */ typedef struct { uint64_t grid_launch_id; - int cta_id_x; - int cta_id_y; - int cta_id_z; - int warp_id; - int opcode_id; - int opcode_short_id; - int is_load; - int is_store; - int size; - int line_id; + int cta_id_x; + int cta_id_y; + int cta_id_z; + int warp_id; + int opcode_id; + int opcode_short_id; + int is_load; + int is_store; + int size; + int line_id; + uint64_t iaddr; uint64_t addrs[32]; } mem_access_t; diff --git a/nvbit_tracing/gsnv_trace/gsnv_trace.cu b/nvbit_tracing/gsnv_trace/gsnv_trace.cu index 237f0af..c0843b2 100644 --- a/nvbit_tracing/gsnv_trace/gsnv_trace.cu +++ b/nvbit_tracing/gsnv_trace/gsnv_trace.cu @@ -84,7 +84,7 @@ bool skip_callback_flag = false; uint32_t instr_begin_interval = 0; uint32_t instr_end_interval = UINT32_MAX; int verbose = 0; -std::string nvgs_config_file; +std::string gsnv_config_file; /* opcode to id map and reverse map */ std::map opcode_to_id_map; @@ -109,7 +109,7 @@ void nvbit_at_init() { "End of the instruction interval where to apply instrumentation"); GET_VAR_INT(verbose, "TOOL_VERBOSE", 0, "Enable verbosity inside the tool"); - GET_VAR_STR(nvgs_config_file, "NVGS_CONFIG_FILE", "Specify a NVGS config file"); + GET_VAR_STR(gsnv_config_file, "GSNV_CONFIG_FILE", "Specify a GSNV config file"); std::string pad(100, '-'); printf("%s\n", pad.c_str()); @@ -151,11 +151,14 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { if (verbose) { printf( - "NVGS_TRACE: CTX %p, Inspecting CUfunction %p name %s at address " + "GSNV_TRACE: CTX %p, Inspecting CUfunction %p name %s at address " "0x%lx\n", ctx, f, nvbit_get_func_name(ctx, f), nvbit_get_func_addr(f)); } + // Get address of function PC + uint64_t func_addr = nvbit_get_func_addr(f); + uint32_t cnt = 0; /* iterate on all the static instructions in the function */ for (auto instr : instrs) { @@ -213,6 +216,8 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { mp->add_or_update_opcode_short(opcode_short_id, instr->getOpcodeShort()); if (status) { mp->add_or_update_line(line_id, line); } + // Compute instruction address (function address + instruction offset) + uint64_t instr_addr = func_addr + instr->getOffset(); int mref_idx = 0; /* iterate on the operands */ @@ -240,6 +245,9 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { /* line number id */ nvbit_add_call_arg_const_val32(instr, line_id); + /* Memory instruction address */ + nvbit_add_call_arg_const_val64(instr, instr_addr); + /* memory reference 64 bit address */ nvbit_add_call_arg_mref_addr64(instr, mref_idx); /* add "space" for kernel function pointer that will be set @@ -317,7 +325,7 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, nvbit_enable_instrumented(ctx, p->f, true); printf( - "NVGS_TRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - Kernel " + "GSNV_TRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - Kernel " "name %s - grid launch id %ld - grid size %d,%d,%d - block " "size %d,%d,%d - nregs %d - shmem %d - cuda stream id %ld\n", (uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX, @@ -366,13 +374,13 @@ void* recv_thread_fun(void* args) { << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z << " - warp " << ma->warp_id << " - " << id_to_opcode_map[ma->opcode_id] - << " - "; + << " - iaddr " << HEX(ma->iaddr) << " - "; for (int i = 0; i < 32; i++) { ss << HEX(ma->addrs[i]) << " "; } - printf("NVGS_TRACE: %s\n", ss.str().c_str()); + printf("GSNV_TRACE: %s\n", ss.str().c_str()); #endif num_processed_bytes += sizeof(mem_access_t); @@ -396,7 +404,7 @@ void nvbit_at_ctx_init(CUcontext ctx) { pthread_mutex_lock(&mutex); //if (verbose) { if (1) { - printf("NVGS_TRACE: STARTING CONTEXT %p\n", ctx); + printf("GSNV_TRACE: STARTING CONTEXT %p\n", ctx); } CTXstate* ctx_state = new CTXstate; assert(ctx_state_map.find(ctx) == ctx_state_map.end()); @@ -409,8 +417,8 @@ void nvbit_at_ctx_init(CUcontext ctx) { // -- init #2 - whats the difference try { - if (!nvgs_config_file.empty()) { - mp->set_config_file(nvgs_config_file); + if (!gsnv_config_file.empty()) { + mp->set_config_file(gsnv_config_file); } } catch (const std::exception & ex) { @@ -423,7 +431,7 @@ void nvbit_at_ctx_term(CUcontext ctx) { skip_callback_flag = true; //if (verbose) { if (1) { - printf("NVGS_TRACE: TERMINATING CONTEXT %p\n", ctx); + printf("GSNV_TRACE: TERMINATING CONTEXT %p\n", ctx); } /* get context state from map */ assert(ctx_state_map.find(ctx) != ctx_state_map.end()); diff --git a/nvbit_tracing/gsnv_trace/inject_funcs.cu b/nvbit_tracing/gsnv_trace/inject_funcs.cu index 35b375b..8998ce3 100644 --- a/nvbit_tracing/gsnv_trace/inject_funcs.cu +++ b/nvbit_tracing/gsnv_trace/inject_funcs.cu @@ -43,6 +43,7 @@ extern "C" __device__ __noinline__ void instrument_mem(int pred, int is_store, int size, int line_id, + uint64_t iaddr, uint64_t addr, uint64_t grid_launch_id, uint64_t pchannel_dev) { @@ -74,7 +75,7 @@ extern "C" __device__ __noinline__ void instrument_mem(int pred, ma.is_store = is_store; ma.size = size; ma.line_id = line_id; - + ma.iaddr = iaddr; /* first active lane pushes information on the channel */ if (first_laneid == laneid) { From 1b2b15086916c98997e4605ff935151476d98999 Mon Sep 17 00:00:00 2001 From: christopher Date: Sun, 7 Jul 2024 17:31:21 -0400 Subject: [PATCH 70/76] Rename a method. --- gs_patterns.h | 14 +++++++------- gs_patterns_core.h | 4 ++-- gsnv_patterns.h | 24 ++++++++++++------------ gspin_patterns.h | 26 +++++++++++++------------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 9ef75a8..4f1775f 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -78,13 +78,13 @@ namespace gs_patterns virtual mem_access_type get_mem_access_type() const = 0; virtual mem_instr_type get_mem_instr_type() const = 0; - virtual size_t get_size() const = 0; - virtual addr_t get_base_addr() const = 0; - virtual addr_t get_address() const = 0; - virtual addr_t get_iaddr() const = 0; - virtual addr_t get_maddr() const = 0; - virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t max_access_size() const = 0; + virtual size_t get_size() const = 0; + virtual addr_t get_base_addr() const = 0; + virtual addr_t get_address() const = 0; + virtual addr_t get_iaddr() const = 0; + virtual addr_t get_maddr() const = 0; + virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! + virtual int64_t get_max_access_size() const = 0; virtual bool is_gather() const { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } diff --git a/gs_patterns_core.h b/gs_patterns_core.h index ab7d23c..1c87dd6 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -87,8 +87,8 @@ namespace gs_patterns_core } //new window - if ((w_idx == -1) || (iw.w_bytes(w_rw_idx, w_idx) >= ia.max_access_size()) || // was >= VBYTES - (iw.w_cnt(w_rw_idx, w_idx) >= ia.max_access_size())) { // was >= VBYTES + if ((w_idx == -1) || (iw.w_bytes(w_rw_idx, w_idx) >= ia.get_max_access_size()) || // was >= VBYTES + (iw.w_cnt(w_rw_idx, w_idx) >= ia.get_max_access_size())) { // was >= VBYTES /***************************/ //do analysis diff --git a/gsnv_patterns.h b/gsnv_patterns.h index ef45a3b..c304c80 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -76,24 +76,24 @@ namespace gsnv_patterns virtual ~InstrAddrAdapterForNV() { } - virtual inline bool is_valid() const override { return true; } - virtual inline bool is_mem_instr() const override { return true; } - virtual inline bool is_other_instr() const override { return false; } + virtual inline bool is_valid() const override { return true; } + virtual inline bool is_mem_instr() const override { return true; } + virtual inline bool is_other_instr() const override { return false; } virtual inline mem_access_type get_mem_access_type() const override { return (_te.type == 0) ? GATHER : SCATTER; } - virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } + virtual inline mem_instr_type get_mem_instr_type() const override { return CTA; } - virtual inline size_t get_size() const override { return _te.size; } // in bytes - virtual inline addr_t get_base_addr() const override { return _te.base_addr; } - virtual inline addr_t get_address() const override { return _te.addr; } - virtual inline addr_t get_iaddr () const override { return _te.iaddr; } - virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr - virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual inline int64_t max_access_size() const override { return MEMORY_ACCESS_SIZE; } // 32 * 8 bytes + virtual inline size_t get_size() const override { return _te.size; } // in bytes + virtual inline addr_t get_base_addr() const override { return _te.base_addr; } + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline addr_t get_iaddr () const override { return _te.iaddr; } + virtual inline addr_t get_maddr () const override { return _te.addr; } // was _base_addr + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline int64_t get_max_access_size() const override { return MEMORY_ACCESS_SIZE; } // 32 * 8 bytes virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForNV: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; } - const trace_entry_t & get_trace_entry() const { return _te; } + const trace_entry_t & get_trace_entry() const { return _te; } private: const trace_entry_t _te; diff --git a/gspin_patterns.h b/gspin_patterns.h index 1ea0131..f943a41 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -62,25 +62,25 @@ namespace gspin_patterns virtual ~InstrAddrAdapterForPin() { } - virtual inline bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } - virtual inline bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } - virtual inline bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } + virtual inline bool is_valid() const override { return !(0 == _te.type && 0 == _te.size); } + virtual inline bool is_mem_instr() const override { return ((_te.type == 0x0) || (_te.type == 0x1)); } + virtual inline bool is_other_instr() const override { return ((_te.type >= 0xa) && (_te.type <= 0x10)) || (_te.type == 0x1e); } - virtual mem_access_type get_mem_access_type() const override { + virtual mem_access_type get_mem_access_type() const override { if (!is_mem_instr()) throw GSDataError("Not a Memory Instruction - unable to determine Access Type"); // Must be 0x0 or 0x1 if (_te.type == 0x0) return GATHER; else return SCATTER; } - virtual inline mem_instr_type get_mem_instr_type() const override { return VECTOR; } - - virtual inline size_t get_size() const override { return _te.size; } - virtual inline addr_t get_base_addr() const override { return _te.addr; } - virtual inline addr_t get_address() const override { return _te.addr; } - virtual inline addr_t get_iaddr() const override { return _te.addr; } - virtual inline addr_t get_maddr() const override { return _te.addr / _te.size; } - virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! - virtual inline int64_t max_access_size() const override { return MEMORY_ACCESS_SIZE; } + virtual inline mem_instr_type get_mem_instr_type() const override { return VECTOR; } + + virtual inline size_t get_size() const override { return _te.size; } + virtual inline addr_t get_base_addr() const override { return _te.addr; } + virtual inline addr_t get_address() const override { return _te.addr; } + virtual inline addr_t get_iaddr() const override { return _te.addr; } + virtual inline addr_t get_maddr() const override { return _te.addr / _te.size; } + virtual inline unsigned short get_type() const override { return _te.type; } // must be 0 for GATHER, 1 for SCATTER !! + virtual inline int64_t get_max_access_size() const override { return MEMORY_ACCESS_SIZE; } virtual void output(std::ostream & os) const override { os << "InstrAddrAdapterForPin: trace entry: type: [" << _te.type << "] size: [" << _te.size << "]"; From 3ded852fbf851a7f91e65ad41d8b7ea911bf4dc7 Mon Sep 17 00:00:00 2001 From: christopher Date: Thu, 11 Jul 2024 17:04:19 -0400 Subject: [PATCH 71/76] When converting mem_access_t to trace_entry ensure addr_to_line uses iaddr for lookup. --- gs_patterns_core.cpp | 52 +++++++++++++++++++++++------------------ gs_patterns_core.h | 55 ++++++++++++++++++++++---------------------- gsnv_patterns.cpp | 9 ++++---- 3 files changed, 62 insertions(+), 54 deletions(-) diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index d160be6..4e0bd5a 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -182,14 +182,14 @@ namespace gs_patterns_core int target_ntop = 0; int bestcnt; - for (int j = 0; j < NTOP; j++) { - + for (int j = 0; j < NTOP; j++) + { int bestcnt = 0; addr_t best_iaddr = 0; int bestidx = -1; - for (int k = 0; k < NGS; k++) { - + for (int k = 0; k < NGS; k++) + { if (target_iinfo.get_icnt()[k] == 0) continue; @@ -204,9 +204,12 @@ namespace gs_patterns_core } } - if (best_iaddr == 0) { + if (best_iaddr == 0) + { break; - } else { + } + else + { target_ntop++; target_metrics.top[j] = best_iaddr; target_metrics.top_idx[j] = bestidx; @@ -215,7 +218,7 @@ namespace gs_patterns_core //printf("%sIADDR -- %016lx: %16lu -- %s\n", target_metrics.getShortName().c_str(), target_metrics.top[j], target_metrics.tot[j], target_metrics.get_srcline()[bestidx]); } - } + } // for return target_ntop; } @@ -239,14 +242,15 @@ namespace gs_patterns_core throw GSDataError(os.str()); } - if (ia.is_other_instr()) { + if (ia.is_other_instr()) + { iaddr = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() - + } + else if (ia.is_mem_instr()) + { /***********************/ - /** MEM 0x00 and 0x01 **/ + /** MEM **/ /***********************/ - } - else if (ia.is_mem_instr()) { maddr = ia.get_maddr(); @@ -263,10 +267,10 @@ namespace gs_patterns_core } // gather ? - if (GATHER == ia.get_mem_access_type()) { - - for (i = 0; i < gather_metrics.ntop; i++) { - + if (GATHER == ia.get_mem_access_type()) + { + for (i = 0; i < gather_metrics.ntop; i++) + { //found it if (iaddr == gather_metrics.top[i]) { @@ -286,13 +290,14 @@ namespace gs_patterns_core } } // scatter ? - else if (SCATTER == ia.get_mem_access_type()) { - - for (i = 0; i < scatter_metrics.ntop; i++) { + else if (SCATTER == ia.get_mem_access_type()) + { + for (i = 0; i < scatter_metrics.ntop; i++) + { //found it - if (iaddr == scatter_metrics.top[i]) { - + if (iaddr == scatter_metrics.top[i]) + { //set base if (scatter_base[i] == 0) scatter_base[i] = maddr; @@ -307,7 +312,8 @@ namespace gs_patterns_core } } } - else { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo + else + { // belt and suspenders, yep = but helps to validate correct logic in children of InstrAddresInfo throw GSDataError("Unknown Memory Access Type: " + std::to_string(ia.get_mem_access_type())); } } // MEM @@ -323,6 +329,6 @@ std::ostream & operator<<(std::ostream & os, const gs_patterns::InstrAddrAdapter return os; } -} //namespace gs_patterns +} // namespace gs_patterns diff --git a/gs_patterns_core.h b/gs_patterns_core.h index 1c87dd6..3b67f5e 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -35,21 +35,23 @@ namespace gs_patterns_core throw GSDataError(os.str()); } - /*****************************/ - /** INSTR 0xa-0x10 and 0x1e **/ - /*****************************/ - if (ia.is_other_instr()) { + if (ia.is_other_instr()) + { + /*****************************/ + /** INSTR **/ + /*****************************/ iw.get_iaddr() = ia.get_iaddr(); // was get_address in orig code -> get_iaddr() //nops trace_info.opcodes++; trace_info.did_opcode = true; - + } + else if (ia.is_mem_instr()) + { /***********************/ - /** MEM 0x00 and 0x01 **/ + /** MEM instruction **/ /***********************/ - } else if (ia.is_mem_instr()) { if (CTA == ia.get_mem_instr_type() && ia.get_base_addr() == ia.get_address()) { iw.get_iaddr() = ia.get_iaddr(); @@ -91,9 +93,9 @@ namespace gs_patterns_core (iw.w_cnt(w_rw_idx, w_idx) >= ia.get_max_access_size())) { // was >= VBYTES /***************************/ - //do analysis + // do analysis /***************************/ - //i = each window + // i = each window for (w = 0; w < 2; w++) { // 2 for (i = 0; i < IWINDOW; i++) { // 1024 @@ -103,21 +105,19 @@ namespace gs_patterns_core int byte = iw.w_bytes(w, i) / iw.w_cnt(w, i); - //First pass - //Determine - //gather/scatter? + // First pass - Determine gather/scatter? gs = -1; for (j = 0; j < iw.w_cnt(w, i); j++) { - //address and cl + // address and cl iw.get_maddr() = iw.w_maddr(w, i, j); assert(iw.get_maddr() > -1); - //previous addr + // previous addr if (j == 0) iw.get_maddr_prev() = iw.get_maddr() - 1; - //gather / scatter + // gather / scatter if (iw.get_maddr() != iw.get_maddr_prev()) { if ((gs == -1) && (abs(iw.get_maddr() - iw.get_maddr_prev()) > 1)) // ? > 1 stride (non-contiguous) <-------------------- gs = w; @@ -155,23 +155,22 @@ namespace gs_patterns_core break; } } - } - } //WINDOW i + } // - if + } //WINDOW i - for w_idx = 0; - //reset windows + // reset windows iw.reset(w); - } // rw w - } //analysis + } // rw w - for + } // analysis - if - //Set window values + // Set window values iw.w_iaddrs(w_rw_idx, w_idx) = iw.get_iaddr(); - //iw.w_maddr(w_rw_idx, w_idx, iw.w_cnt[w_rw_idx][w_idx]]) = ia.get_maddr(); iw.w_maddr(w_rw_idx, w_idx, iw.w_cnt(w_rw_idx, w_idx)) = ia.get_maddr(); iw.w_bytes(w_rw_idx, w_idx) += ia.get_size(); - //num access per iaddr in loop + // num access per iaddr in loop iw.w_cnt(w_rw_idx, w_idx)++; if (trace_info.did_opcode) { @@ -183,11 +182,13 @@ namespace gs_patterns_core } else { trace_info.addrs++; } - + } + else + { /***********************/ /** SOMETHING ELSE **/ /***********************/ - } else { + trace_info.other++; } @@ -251,7 +252,7 @@ namespace gs_patterns_core throw GSFileError("Could not open " + gs_info + "!"); } - //Header + // Header fprintf(fp, "[ "); fprintf(fp2, "#sourceline, g/s, indices, percentage of g/s in trace\n"); @@ -260,7 +261,7 @@ namespace gs_patterns_core create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); - //Footer + // Footer fprintf(fp, " ]"); fclose(fp); fclose(fp2); diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 9487781..aa92fff 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -518,7 +518,7 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, te_list.push_back(te); if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { - _addr_to_line_id[base_addr] = ma.line_id; + _addr_to_line_id[ma.iaddr] = ma.line_id; } } else if (ignore_partial_warps) @@ -557,10 +557,11 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) if (_log_level >= 3) { std::stringstream ss; //ss << "CTX " << HEX(ctx) << " - grid_launch_id " - ss << "GSNV_TRACE: CTX " << " - grid_launch_id " - << ma->grid_launch_id << " - CTA " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z - << " - warp " << ma->warp_id << " - " + ss << "GSNV_TRACE: grid_launch_id: " + << ma->grid_launch_id << " - CTA: " << ma->cta_id_x << "," << ma->cta_id_y << "," << ma->cta_id_z + << " - warp: " << ma->warp_id << " - iaddr: " << HEX(ma->iaddr) + << " line_id: " << ma->line_id << " - " << get_opcode(ma->opcode_id) << " - shortOpcode: " << ma->opcode_short_id << " isLoad: " << ma->is_load << " isStore: " << ma->is_store From 011c45da3a857d3f5612763fdd7027cd0228ca49 Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 20 Jul 2024 00:25:42 -0400 Subject: [PATCH 72/76] Bug fix: Use iaddr for mapping addr to line. Add support for optional one_warp mode (handles traces from warp 0 only) which can be set via cmd line or config setting. Pattern size length (PSIZE) can now dynamically grow between INITIAL_PSIZE(2^15) and MAX_PSIZE(2^30) on each pattern array. --- gs_patterns.h | 38 +++++++++++++++++++++++++++++++++++--- gs_patterns_core.cpp | 23 ++++++++++++----------- gs_patterns_main.cpp | 7 ++++++- gsnv_patterns.cpp | 13 ++++++++++--- gsnv_patterns.h | 7 +++++-- nvbit_tracing/README.md | 29 +++++++++++++++-------------- 6 files changed, 83 insertions(+), 34 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index 4f1775f..db0af28 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -3,6 +3,8 @@ #include #include +#include +#include #define MAX(X, Y) (((X) < (Y)) ? Y : X) #define MIN(X, Y) (((X) > (Y)) ? Y : X) @@ -24,7 +26,8 @@ #define NSTRIDES 15 //Threshold for number of unique distances #define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram #define NTOP (10) -#define PSIZE (1<<28) // Was 2^23 (8mb) +#define INITIAL_PSIZE (1<<15) +#define MAX_PSIZE (1<<30) #define MAX_LINE_LENGTH 1024 @@ -101,12 +104,13 @@ namespace gs_patterns class Metrics { public: - Metrics(mem_access_type mType) : _mType(mType) + Metrics(mem_access_type mType) : _mType(mType), _pattern_sizes(NTOP) { try { for (int j = 0; j < NTOP; j++) { - patterns[j] = new int64_t[PSIZE]; + patterns[j] = new int64_t[INITIAL_PSIZE]; + _pattern_sizes[j] = INITIAL_PSIZE; } } catch (const std::exception & ex) @@ -124,6 +128,32 @@ namespace gs_patterns delete [] srcline; } + size_t get_pattern_size(int pattern_index) { + return _pattern_sizes[pattern_index]; + } + + bool grow(int pattern_index) { + try { + size_t old_size = _pattern_sizes[pattern_index]; + size_t new_size = old_size * 2; + if (new_size > MAX_PSIZE) { + return false; + } + + int64_t *tmp = new int64_t[new_size]; + memcpy(tmp, patterns[pattern_index], old_size * sizeof(int64_t)); + + delete [] patterns[pattern_index]; + patterns[pattern_index] = tmp; + _pattern_sizes[pattern_index] = new_size; + + return true; + } + catch (...) { + return false; + } + } + Metrics(const Metrics &) = delete; Metrics & operator=(const Metrics & right) = delete; @@ -147,6 +177,8 @@ namespace gs_patterns char (*srcline)[NGS][MAX_LINE_LENGTH] = new char[2][NGS][MAX_LINE_LENGTH]; mem_access_type _mType; + + std::vector _pattern_sizes; }; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index 4e0bd5a..dbaa08b 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -272,19 +272,19 @@ namespace gs_patterns_core for (i = 0; i < gather_metrics.ntop; i++) { //found it - if (iaddr == gather_metrics.top[i]) { - + if (iaddr == gather_metrics.top[i]) + { if (gather_base[i] == 0) gather_base[i] = maddr; //Add index - if (gather_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; + if (gather_metrics.offset[i] >= gather_metrics.get_pattern_size(i)) { + if (!gather_metrics.grow(i)) { + printf("WARNING: Unable to increase PSIZE. Truncating trace...\n"); + breakout = true; + } } - //printf("g -- %d % d\n", i, gather_offset[i]); fflush(stdout); gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]); - break; } } @@ -292,7 +292,6 @@ namespace gs_patterns_core // scatter ? else if (SCATTER == ia.get_mem_access_type()) { - for (i = 0; i < scatter_metrics.ntop; i++) { //found it @@ -303,9 +302,11 @@ namespace gs_patterns_core scatter_base[i] = maddr; //Add index - if (scatter_metrics.offset[i] >= PSIZE) { - printf("WARNING: Need to increase PSIZE. Truncating trace...\n"); - breakout = true; + if (scatter_metrics.offset[i] >= scatter_metrics.get_pattern_size(i)) { + if (!scatter_metrics.grow(i)) { + printf("WARNING: Unable to increase PSIZE. Truncating trace...\n"); + breakout = true; + } } scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]); break; diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index 83679da..3794c28 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -20,7 +20,7 @@ using namespace gs_patterns::gspin_patterns; void usage (const std::string & prog_name) { std::cerr << "Usage: " << prog_name << " \n" - << " " << prog_name << " -nv [-v]" << std::endl; + << " " << prog_name << " -nv [-ow] [-v]" << std::endl; } int main(int argc, char ** argv) @@ -29,6 +29,7 @@ int main(int argc, char ** argv) { bool use_gs_nv = false; bool verbose = false; + bool one_warp = false; for (int i = 0; i < argc; i++) { if (std::string(argv[i]) == "-nv") { use_gs_nv = true; @@ -36,6 +37,9 @@ int main(int argc, char ** argv) else if (std::string(argv[i]) == "-v") { verbose = true; } + else if (std::string(argv[i]) == "-ow") { + one_warp = true; + } } size_t pos = std::string(argv[0]).find_last_of("/"); @@ -57,6 +61,7 @@ int main(int argc, char ** argv) mp.set_config_file(config_file); } if (verbose) mp.set_log_level(1); + if (one_warp) mp.set_one_warp_mode(one_warp); // ----------------- Process Traces ----------------- diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index aa92fff..713f2fc 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -490,6 +490,10 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps, std::vector & te_list) { + // Optionally, use traces from warp_id 0 only + if (_one_warp_mode && ma.warp_id != 0 ) + return false; + uint16_t mem_size = ma.size; uint16_t mem_type_code; @@ -517,7 +521,7 @@ bool MemPatternsForNV::convert_to_trace_entry(const mem_access_t & ma, trace_entry_t te { mem_type_code, mem_size, ma.addrs[i], base_addr, ma.iaddr }; te_list.push_back(te); - if (_addr_to_line_id.find(base_addr) == _addr_to_line_id.end()) { + if (_addr_to_line_id.find(ma.iaddr) == _addr_to_line_id.end()) { _addr_to_line_id[ma.iaddr] = ma.line_id; } } @@ -698,7 +702,6 @@ void MemPatternsForNV::write_trace_out_file() } strncpy(m_entry.map_name, ID_TO_OPCODE_SHORT, MAP_NAME_SIZE-1); - //uint64_t opcode_short_map_len = _id_to_opcode_short_map.size(); for (auto itr = _id_to_opcode_short_map.begin(); itr != _id_to_opcode_short_map.end(); itr++) { m_entry.id = itr->first; @@ -707,7 +710,6 @@ void MemPatternsForNV::write_trace_out_file() } strncpy(m_entry.map_name, ID_TO_LINE, MAP_NAME_SIZE-1); - //uint64_t line_map_len = _id_to_line_map.size(); for (auto itr = _id_to_line_map.begin(); itr != _id_to_line_map.end(); itr++) { m_entry.id = itr->first; @@ -804,6 +806,11 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) int8_t level = atoi(value.c_str()); set_log_level(level); } + else if (GSNV_ONE_WARP_MODE == name) { + int8_t val = atoi(value.c_str()); + bool mode = val ? true : false; + set_one_warp_mode(mode); + } else { std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; diff --git a/gsnv_patterns.h b/gsnv_patterns.h index c304c80..133beb9 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -48,7 +48,7 @@ namespace gsnv_patterns #define MAP_VALUE_SIZE 22 #define MAP_VALUE_LONG_SIZE 94 #define NUM_MAPS 3 - // Setting this to fit within a 4k page e.g. 170 * 32 bytes <= 4k + // Setting this to fit within a 4k page e.g. 128 * 32 bytes <= 4k #define TRACE_BUFFER_LENGTH 128 struct _trace_map_entry_t @@ -114,6 +114,7 @@ namespace gsnv_patterns static constexpr const char * GSNV_FILE_PREFIX = "GSNV_FILE_PREFIX"; static constexpr const char * GSNV_MAX_TRACE_COUNT = "GSNV_MAX_TRACE_COUNT"; static constexpr const char * GSNV_LOG_LEVEL = "GSNV_LOG_LEVEL"; + static constexpr const char * GSNV_ONE_WARP_MODE = "GSNV_ONE_WARP_MODE"; MemPatternsForNV(): _metrics(GATHER, SCATTER), @@ -147,6 +148,8 @@ namespace gsnv_patterns inline void set_file_prefix(const std::string & prefix) { _file_prefix = prefix; } std::string get_file_prefix(); + void set_one_warp_mode(bool val) { _one_warp_mode = val; } + void set_max_trace_count(int64_t max_trace_count); inline bool exceed_max_count() const { if (_limit_trace_count && (_trace_info.trace_lines >= _max_trace_count)) { @@ -158,7 +161,6 @@ namespace gsnv_patterns // Mainly Called by nvbit kernel void set_config_file (const std::string & config_file); - void update_metrics(); void process_traces(); @@ -233,6 +235,7 @@ namespace gsnv_patterns bool _first_trace_seen = false; int8_t _log_level = 0; + bool _one_warp_mode = false; /* The output stream used to temporarily hold raw trace warp data (mem_access_t) before being writen to _trace_out_file_name */ std::fstream _ofs_tmp; diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index 55ac9f4..de7cae3 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -1,5 +1,5 @@ # Setup -Download NVBit from the folliwing locations: +Download NVBit from the following locations: https://github.com/NVlabs/NVBit @@ -44,13 +44,14 @@ The config file should have 1 configuration setting per line. Configuration set The following are a list of configuration items currently supported: -| Config | Description | possible values | -|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------| -| GSNV_LOG_LEVEL | Sets the log level (only 0-2 are currently supported) | 0 to 255 | -| GSNV_TARGET_KERNEL | Specifies the names of Kernels which will be instrumented seperated by space, it none is provided all Kernels will be intrumented. | A String | -| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If non is provided one will be infered from the output file if that is provided | A String | -| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. | A String | -| GSNV_MAX_TRACE_COUNT| Specifies the maximum number of memory traces which are processed, once this number of traces are seen instrumentation is disabled (Can be useful to produce a small trace file for testing) | An Integer e.g 1000000 | +| Config | Description | possible values | +|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| +| GSNV_LOG_LEVEL | Sets the log level (only 0-2 are currently supported) | 0 to 255 | +| GSNV_TARGET_KERNEL | Specifies the names of Kernels which will be instrumented seperated by space, it none is provided all Kernels will be instrumented. If no exact match found, Will match all kernels which starts with the string provided. | A String | +| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If non is provided one will be infered from the output file if that is provided | A String | +| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. | A String | +| GSNV_MAX_TRACE_COUNT | Specifies the maximum number of memory traces which are processed, once this number of traces are seen instrumentation is disabled (Can be useful to produce a small trace file for testing) | An Integer e.g 1000000 | +| GSNV_ONE_WARP_MODE | Enable handling traces for a single warp (defaults to warp 0 if enabled). Analogous to trace of first thread in CPU mode. | 1 (on) or 0 (off) the default) | @@ -68,7 +69,7 @@ export GSNV_CONFIG_FILE=./gsnv_config.txt Additional settings which are supported by NVBit can also be set via additional environment variables. To see these please visit the NVBit documentation. Setting covered here are specific to the gsnv_trace tool. -NOTE: It is highly recommended to specify a target kernel using GSNV_TARGET_KERNEL as this alows the tool to be used more efficiently also results in smaller trace files +NOTE: It is highly recommended to specify a target kernel using GSNV_TARGET_KERNEL as this allows the tool to be used more efficiently also results in smaller trace files. ### Instrumenting an application @@ -86,10 +87,10 @@ The gzip command will compress the resulting trace file for use by gs_patterns i ### Generating Memory Patterns using an existing trace file. -In the previous section on Instrumnenting an application, we used gsnv_trace.so to instrument an application, the resulting trace file was then compressed. +In the previous section on Instrumenting an application, we used gsnv_trace.so to instrument an application, the resulting trace file was then compressed. The instrumentation run also generated pattern files. If we want to rerun the pattern generation we can do so using the generated (and compressed) trace file without re-instrumenting the application as this is much faster. -To do this we just need to run the gs_pattern binary with the trace file and the "-nv " option. The "-nv" option indicates that the trace file is an nvbit trace. +To do this we just need to run the gs_pattern binary with the trace file and the "-nv " option. The "-nv" option indicates that the trace file is a NVBit trace. Example: @@ -101,10 +102,10 @@ $GS_PATTERNS_DIR/gs_patterns -nv ### Important Notes As of NVBit 1.5.5, when building gsnv_trace within the NVBit source tree it *may* be required to specify a version of the CUDA which is older -in order to enable NVBit to correctly emit the runtime instructions. Without this the gsnv_trace libary will still be built but will be unable to instrument CUDA kernels. +in order to enable NVBit to correctly emit the runtime instructions. Without this the gsnv_trace library will still be built but will be unable to instrument CUDA kernels. -For instance we were able to build a working gsnv_trace using CUDA api version 11.7 and lower and use that on higher versions of the CUDA environment such as CUDA 12.3. -However as of NVBit 1.5.5 it was not possible to get a working version of gsnv_trace when we build it using 12.3 directly. +For instance, we were able to build a working gsnv_trace using CUDA api version 11.7 and lower and use that on higher versions of the CUDA environment such as CUDA 12.3. +However, as of NVBit 1.5.5 it was not possible to get a working version of gsnv_trace when we build it using 12.3 directly. Example: From 60a9672ce1821f08003dc6cbdf0b677e700dbd11 Mon Sep 17 00:00:00 2001 From: christopher Date: Sat, 20 Jul 2024 17:00:13 -0400 Subject: [PATCH 73/76] Fix compiler warnings (unused variables, unreachable stmts etc) --- gs_patterns_core.cpp | 10 +++------- gsnv_patterns.cpp | 6 ------ gspin_patterns.cpp | 4 ---- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index dbaa08b..db36838 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -16,8 +16,6 @@ namespace gs_patterns_core void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr) { - int i = 0; - int ntranslated = 0; char path[MAX_LINE_LENGTH]; char cmd[MAX_LINE_LENGTH]; FILE *fp; @@ -50,9 +48,8 @@ namespace gs_patterns_core //Create stride histogram and create spatter int sidx; int unique_strides; - int64_t idx, pidx; int64_t n_stride[1027]; - double outbounds; +// double outbounds; if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); @@ -79,10 +76,10 @@ namespace gs_patterns_core } } - outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; + //outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i]; //if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){ - if (1) { + if (true) { //create a binary file FILE *fp_bin; @@ -180,7 +177,6 @@ namespace gs_patterns_core int get_top_target(InstrInfo & target_iinfo, Metrics & target_metrics) { int target_ntop = 0; - int bestcnt; for (int j = 0; j < NTOP; j++) { diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 713f2fc..11e84e5 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -108,9 +108,7 @@ Metrics & MemPatternsForNV::get_metrics(mem_access_type m) switch (m) { case GATHER : return _metrics.first; - break; case SCATTER : return _metrics.second; - break; default: throw GSError("Unable to get Metrics - Invalid Metrics Type: " + std::to_string(m)); } @@ -121,9 +119,7 @@ InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) switch (m) { case GATHER : return _iinfo.first; - break; case SCATTER : return _iinfo.second; - break; default: throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + std::to_string(m)); } @@ -425,7 +421,6 @@ double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) void MemPatternsForNV::process_second_pass() { uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. - int iret = 0; // State carried thru addr_t iaddr; @@ -597,7 +592,6 @@ void MemPatternsForNV::handle_cta_memory_access(const mem_access_t * ma) bool MemPatternsForNV::valid_gs_stride(const std::vector & te_list, const uint32_t min_stride) { - bool valid_stride = false; uint32_t min_stride_found = INT32_MAX; uint64_t last_addr = 0; bool first = true; diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index 1ab3dc2..a475b75 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -59,9 +59,7 @@ Metrics & MemPatternsForPin::get_metrics(mem_access_type m) switch (m) { case GATHER : return _metrics.first; - break; case SCATTER : return _metrics.second; - break; default: throw GSError("Unable to get Metrics - Invalid Metrics Type: " + std::to_string(m)); } @@ -72,9 +70,7 @@ InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) switch (m) { case GATHER : return _iinfo.first; - break; case SCATTER : return _iinfo.second; - break; default: throw GSError("Unable to get InstrInfo - Invalid Metrics Type: " + std::to_string(m)); } From d95be9d28f48aa9c493e2481724bd28cd9056b76 Mon Sep 17 00:00:00 2001 From: christopher Date: Fri, 2 Aug 2024 22:45:52 -0400 Subject: [PATCH 74/76] Support for building NVBit 1.7 --- nvbit_tracing/README.md | 41 +++--- nvbit_tracing/gsnv_trace/Makefile | 25 +++- nvbit_tracing/gsnv_trace/gsnv_trace.cu | 170 ++++++++++++++++--------- 3 files changed, 143 insertions(+), 93 deletions(-) diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index de7cae3..b12303c 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -3,25 +3,27 @@ Download NVBit from the following locations: https://github.com/NVlabs/NVBit -#### Tested with version 1.5.5 +#### Tested with version 1.7 -https://github.com/NVlabs/NVBit/releases/tag/1.5.5 +https://github.com/NVlabs/NVBit/releases/tag/1.7 + +#### From the parent directory of the gs_patterns distribution ``` -# or (for example for Linux x86_64) +# For example for Linux x86_64) -wget https://github.com/NVlabs/NVBit/releases/download/1.5.5/nvbit-Linux-x86_64-1.5.5.tar.bz2 +wget https://github.com/NVlabs/NVBit/releases/download/1.7/nvbit-Linux-aarch64-1.7.tar.bz2 ``` ``` module load gcc #or make sure you have gcc. Tested with 8.5.0 and 11.4.0 -tar zxvf +tar xvf -export NVBIT_DIR= # full path +export NVBIT_DIR= # full path -cp -rv nvbit_tracing/gsnv_trace $NVBIT_DIR/tools/ +cp -rv gs_patterns/nvbit_tracing/gsnv_trace $NVBIT_DIR/tools/ cd $NVBIT_DIR @@ -48,8 +50,8 @@ The following are a list of configuration items currently supported: |----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| | GSNV_LOG_LEVEL | Sets the log level (only 0-2 are currently supported) | 0 to 255 | | GSNV_TARGET_KERNEL | Specifies the names of Kernels which will be instrumented seperated by space, it none is provided all Kernels will be instrumented. If no exact match found, Will match all kernels which starts with the string provided. | A String | -| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If non is provided one will be infered from the output file if that is provided | A String | -| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. | A String | +| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If none is provided one will be inferred from the input trace file if provided. | A String | +| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. Trace file will not be written if this is not provided. | A String | | GSNV_MAX_TRACE_COUNT | Specifies the maximum number of memory traces which are processed, once this number of traces are seen instrumentation is disabled (Can be useful to produce a small trace file for testing) | An Integer e.g 1000000 | | GSNV_ONE_WARP_MODE | Enable handling traces for a single warp (defaults to warp 0 if enabled). Analogous to trace of first thread in CPU mode. | 1 (on) or 0 (off) the default) | @@ -73,7 +75,7 @@ Setting covered here are specific to the gsnv_trace tool. ### Instrumenting an application -To starat instrumenting a CUDA application using gsnv_trace. The gsnv_trace.so libary previously built will need to be specified using LD_PRELOAD. +To start instrumenting a CUDA application using gsnv_trace. The gsnv_trace.so libary previously built will need to be specified using LD_PRELOAD. Example: @@ -101,28 +103,15 @@ $GS_PATTERNS_DIR/gs_patterns -nv ### Important Notes -As of NVBit 1.5.5, when building gsnv_trace within the NVBit source tree it *may* be required to specify a version of the CUDA which is older -in order to enable NVBit to correctly emit the runtime instructions. Without this the gsnv_trace library will still be built but will be unable to instrument CUDA kernels. - -For instance, we were able to build a working gsnv_trace using CUDA api version 11.7 and lower and use that on higher versions of the CUDA environment such as CUDA 12.3. -However, as of NVBit 1.5.5 it was not possible to get a working version of gsnv_trace when we build it using 12.3 directly. +This version of gsnv_trace works with NVBit >= 1.7 Example: -``` -export LD_LIBARY_PATH=/path/to/cuda/11.7/lib:$LD_LIBRARY_PATH -export PATH=/path/to/cuda/11.7/bin:$PATH -cd $NVBIT_DIR -make -``` - -Then in another shell simply load the desired CUDA library version using module load or manually, e.g: - ``` export LD_LIBARY_PATH=/path/to/new/cuda/12.3/lib:$LD_LIBRARY_PATH export PATH=/path/to/new/cuda/12.3/bin:$PATH -# point to where you build gsnv_trace.so (We can now instrument under CUDA 12.3) -LD_PRELOAD=$NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so +# Point to where you built gsnv_trace.so and invoke the application with its command line arguments +LD_PRELOAD=$NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so gzip trace_file.nvbit.bin ``` diff --git a/nvbit_tracing/gsnv_trace/Makefile b/nvbit_tracing/gsnv_trace/Makefile index 5971afa..1a0020e 100644 --- a/nvbit_tracing/gsnv_trace/Makefile +++ b/nvbit_tracing/gsnv_trace/Makefile @@ -1,4 +1,5 @@ NVCC=nvcc -ccbin=$(CXX) -D_FORCE_INLINES +PTXAS=ptxas NVCC_VER_REQ=10.1 NVCC_VER=$(shell $(NVCC) --version | grep release | cut -f2 -d, | cut -f3 -d' ') @@ -8,10 +9,23 @@ ifeq ($(NVCC_VER_CHECK),0) $(error ERROR: nvcc version >= $(NVCC_VER_REQ) required to compile an nvbit tool! Instrumented applications can still use lower versions of nvcc.) endif +PTXAS_VER_ADD_FLAG=12.3 +PTXAS_VER=$(shell $(PTXAS) --version | grep release | cut -f2 -d, | cut -f3 -d' ') +PTXAS_VER_CHECK=$(shell echo "${PTXAS_VER} >= $(PTXAS_VER_ADD_FLAG)" | bc) + +ifeq ($(PTXAS_VER_CHECK), 0) +MAXRREGCOUNT_FLAG=-maxrregcount=24 +else +MAXRREGCOUNT_FLAG= +endif + NVBIT_PATH=../../core GSPATTERNS_CORE_PATH=../../../gs_patterns INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_PATH) +NVBIT_PATH=../../core +GSPATTERNS_CORE_PATH=../../../gs_patterns +INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_PATH) LIBS=-L$(NVBIT_PATH) -lnvbit -L$(GSPATTERNS_CORE_PATH)/build -lgs_patterns_core NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /)) @@ -19,7 +33,7 @@ NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /)) SOURCES=$(wildcard *.cu) OBJECTS=$(SOURCES:.cu=.o) -ARCH?=35 +ARCH?=all mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path)))) @@ -29,16 +43,13 @@ NVBIT_TOOL=$(current_dir).so all: $(NVBIT_TOOL) $(NVBIT_TOOL): $(OBJECTS) $(NVBIT_PATH)/libnvbit.a - $(NVCC) -arch=sm_$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@ -# $(NVCC) -arch=sm_$(ARCH) --device-debug -O3 -g $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@ + $(NVCC) -arch=$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@ %.o: %.cu - $(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=sm_$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ -# $(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=sm_$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ + $(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=$(ARCH) -O3 -Xcompiler -fPIC $< -o $@ inject_funcs.o: inject_funcs.cu - $(NVCC) $(INCLUDES) -maxrregcount=16 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ -# $(NVCC) $(INCLUDES) -maxrregcount=24 -Xptxas -astoolspatch --keep-device-functions -arch=sm_$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@ + $(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Xptxas -astoolspatch --keep-device-functions -Xcompiler -Wall -arch=$(ARCH) -Xcompiler -fPIC -c $< -o $@ clean: rm -f *.so *.o diff --git a/nvbit_tracing/gsnv_trace/gsnv_trace.cu b/nvbit_tracing/gsnv_trace/gsnv_trace.cu index c0843b2..40872d5 100644 --- a/nvbit_tracing/gsnv_trace/gsnv_trace.cu +++ b/nvbit_tracing/gsnv_trace/gsnv_trace.cu @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -45,7 +46,7 @@ #include "utils/channel.hpp" /* contains definition of the mem_access_t structure */ -//#include "common.h" +#include "common.h" #include #include @@ -68,10 +69,13 @@ struct CTXstate { /* Channel used to communicate from GPU to CPU receiving thread */ ChannelDev* channel_dev; ChannelHost channel_host; + + volatile bool recv_thread_done = false; }; /* lock */ pthread_mutex_t mutex; +pthread_mutex_t cuda_event_mutex; /* map to store context state */ std::unordered_map ctx_state_map; @@ -99,6 +103,8 @@ std::unique_ptr mp(new MemPatternsForNV); /* grid launch id, incremented at every launch */ uint64_t grid_launch_id = 0; +void* recv_thread_fun(void* args); + void nvbit_at_init() { setenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC", "1", 1); GET_VAR_INT( @@ -120,7 +126,7 @@ void nvbit_at_init() { pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); pthread_mutex_init(&mutex, &attr); - // -- init #1 + pthread_mutex_init(&cuda_event_mutex, &attr); } /* Set used to avoid re-instrumenting the same functions multiple times */ @@ -178,6 +184,7 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { opcode_to_id_map[instr->getOpcode()] = opcode_id; id_to_opcode_map[opcode_id] = std::string(instr->getOpcode()); } + int opcode_id = opcode_to_id_map[instr->getOpcode()]; // Opcode_Short to OpCode_Short_ID @@ -265,77 +272,125 @@ void instrument_function_if_needed(CUcontext ctx, CUfunction func) { } } -__global__ void flush_channel(ChannelDev* ch_dev) { - /* set a CTA id = -1 to indicate communication thread that this is the - * termination flag */ - mem_access_t ma; - ma.cta_id_x = -1; - ch_dev->push(&ma, sizeof(mem_access_t)); - /* flush channel */ - ch_dev->flush(); +/* flush channel */ +__global__ void flush_channel(ChannelDev* ch_dev) { ch_dev->flush(); } + +void init_context_state(CUcontext ctx) { + CTXstate* ctx_state = ctx_state_map[ctx]; + ctx_state->recv_thread_done = false; + cudaMallocManaged(&ctx_state->channel_dev, sizeof(ChannelDev)); + ctx_state->channel_host.init((int)ctx_state_map.size() - 1, CHANNEL_SIZE, + ctx_state->channel_dev, recv_thread_fun, ctx); + nvbit_set_tool_pthread(ctx_state->channel_host.get_thread()); } void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, const char* name, void* params, CUresult* pStatus) { - pthread_mutex_lock(&mutex); + pthread_mutex_lock(&cuda_event_mutex); /* we prevent re-entry on this callback when issuing CUDA functions inside * this function */ if (skip_callback_flag) { - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&cuda_event_mutex); return; } skip_callback_flag = true; - assert(ctx_state_map.find(ctx) != ctx_state_map.end()); - CTXstate* ctx_state = ctx_state_map[ctx]; - if (cbid == API_CUDA_cuLaunchKernel_ptsz || - cbid == API_CUDA_cuLaunchKernel) { - cuLaunchKernel_params* p = (cuLaunchKernel_params*)params; - - /* Make sure GPU is idle */ - cudaDeviceSynchronize(); - assert(cudaGetLastError() == cudaSuccess); + cbid == API_CUDA_cuLaunchKernel || + cbid == API_CUDA_cuLaunchCooperativeKernel || + cbid == API_CUDA_cuLaunchCooperativeKernel_ptsz || + cbid == API_CUDA_cuLaunchKernelEx || + cbid == API_CUDA_cuLaunchKernelEx_ptsz) { + CTXstate* ctx_state = ctx_state_map[ctx]; + + CUfunction func; + if (cbid == API_CUDA_cuLaunchKernelEx_ptsz || + cbid == API_CUDA_cuLaunchKernelEx) { + cuLaunchKernelEx_params* p = (cuLaunchKernelEx_params*)params; + func = p->f; + } else { + cuLaunchKernel_params* p = (cuLaunchKernel_params*)params; + func = p->f; + } - if (!is_exit && mp->should_instrument(nvbit_get_func_name(ctx, p->f))) + if (!is_exit && mp->should_instrument(nvbit_get_func_name(ctx, func))) { + /* Make sure GPU is idle */ + cudaDeviceSynchronize(); + assert(cudaGetLastError() == cudaSuccess); + /* instrument */ - instrument_function_if_needed(ctx, p->f); + instrument_function_if_needed(ctx, func); int nregs = 0; CUDA_SAFECALL( - cuFuncGetAttribute(&nregs, CU_FUNC_ATTRIBUTE_NUM_REGS, p->f)); + cuFuncGetAttribute(&nregs, CU_FUNC_ATTRIBUTE_NUM_REGS, func)); int shmem_static_nbytes = 0; CUDA_SAFECALL( cuFuncGetAttribute(&shmem_static_nbytes, - CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, p->f)); + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, func)); /* get function name and pc */ - const char* func_name = nvbit_get_func_name(ctx, p->f); - uint64_t pc = nvbit_get_func_addr(p->f); + const char* func_name = nvbit_get_func_name(ctx, func); + uint64_t pc = nvbit_get_func_addr(func); /* set grid launch id at launch time */ - nvbit_set_at_launch(ctx, p->f, &grid_launch_id, sizeof(uint64_t)); - /* increment grid launch id for next launch */ - grid_launch_id++; + nvbit_set_at_launch(ctx, func, (uint64_t)&grid_launch_id); /* enable instrumented code to run */ - nvbit_enable_instrumented(ctx, p->f, true); + nvbit_enable_instrumented(ctx, func, true); + + if (cbid == API_CUDA_cuLaunchKernelEx_ptsz || + cbid == API_CUDA_cuLaunchKernelEx) + { + cuLaunchKernelEx_params *p = (cuLaunchKernelEx_params *) params; + printf( + "GSNV_TRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - " + "Kernel name %s - grid launch id %ld - grid size %d,%d,%d " + "- block size %d,%d,%d - nregs %d - shmem %d - cuda stream " + "id %ld\n", + (uint64_t)ctx, pc, func_name, grid_launch_id, + p->config->gridDimX, p->config->gridDimY, + p->config->gridDimZ, p->config->blockDimX, + p->config->blockDimY, p->config->blockDimZ, nregs, + shmem_static_nbytes + p->config->sharedMemBytes, + (uint64_t)p->config->hStream); + } + else + { + cuLaunchKernel_params* p = (cuLaunchKernel_params*)params; + printf( + "GSNV_TRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - " + "Kernel name %s - grid launch id %ld - grid size %d,%d,%d " + "- block size %d,%d,%d - nregs %d - shmem %d - cuda stream " + "id %ld\n", + (uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX, + p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY, + p->blockDimZ, nregs, + shmem_static_nbytes + p->sharedMemBytes, + (uint64_t)p->hStream); + } - printf( - "GSNV_TRACE: CTX 0x%016lx - LAUNCH - Kernel pc 0x%016lx - Kernel " - "name %s - grid launch id %ld - grid size %d,%d,%d - block " - "size %d,%d,%d - nregs %d - shmem %d - cuda stream id %ld\n", - (uint64_t)ctx, pc, func_name, grid_launch_id, p->gridDimX, - p->gridDimY, p->gridDimZ, p->blockDimX, p->blockDimY, - p->blockDimZ, nregs, shmem_static_nbytes + p->sharedMemBytes, - (uint64_t)p->hStream); + } + else + { + // make sure user kernel finishes to avoid deadlock + cudaDeviceSynchronize(); + /* push a flush channel kernel */ + flush_channel<<<1, 1>>>(ctx_state->channel_dev); + + /* Make sure GPU is idle */ + cudaDeviceSynchronize(); + assert(cudaGetLastError() == cudaSuccess); + + /* increment grid launch id for next launch */ + grid_launch_id++; } } skip_callback_flag = false; - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&cuda_event_mutex); } void* recv_thread_fun(void* args) { @@ -350,8 +405,7 @@ void* recv_thread_fun(void* args) { pthread_mutex_unlock(&mutex); char* recv_buffer = (char*)malloc(CHANNEL_SIZE); - bool done = false; - while (!done) { + while (!ctx_state->recv_thread_done) { /* receive buffer from channel */ uint32_t num_recv_bytes = ch_host->recv(recv_buffer, CHANNEL_SIZE); if (num_recv_bytes > 0) { @@ -360,14 +414,6 @@ void* recv_thread_fun(void* args) { mem_access_t* ma = (mem_access_t*)&recv_buffer[num_processed_bytes]; - /* when we receive a CTA_id_x it means all the kernels - * completed, this is the special token we receive from the - * flush channel kernel that is issues at the end of the - * context */ - if (ma->cta_id_x == -1) { - done = true; - break; - } #if 0 std::stringstream ss; ss << "CTX " << HEX(ctx) << " - grid_launch_id " @@ -396,6 +442,7 @@ void* recv_thread_fun(void* args) { } } } + ctx_state->recv_thread_done = false; free(recv_buffer); return NULL; } @@ -406,13 +453,9 @@ void nvbit_at_ctx_init(CUcontext ctx) { if (1) { printf("GSNV_TRACE: STARTING CONTEXT %p\n", ctx); } - CTXstate* ctx_state = new CTXstate; assert(ctx_state_map.find(ctx) == ctx_state_map.end()); + CTXstate* ctx_state = new CTXstate; ctx_state_map[ctx] = ctx_state; - cudaMallocManaged(&ctx_state->channel_dev, sizeof(ChannelDev)); - ctx_state->channel_host.init((int)ctx_state_map.size() - 1, CHANNEL_SIZE, - ctx_state->channel_dev, recv_thread_fun, ctx); - nvbit_set_tool_pthread(ctx_state->channel_host.get_thread()); pthread_mutex_unlock(&mutex); // -- init #2 - whats the difference @@ -426,6 +469,13 @@ void nvbit_at_ctx_init(CUcontext ctx) { } } +void nvbit_tool_init(CUcontext ctx) { + pthread_mutex_lock(&mutex); + assert(ctx_state_map.find(ctx) != ctx_state_map.end()); + init_context_state(ctx); + pthread_mutex_unlock(&mutex); +} + void nvbit_at_ctx_term(CUcontext ctx) { pthread_mutex_lock(&mutex); skip_callback_flag = true; @@ -437,11 +487,11 @@ void nvbit_at_ctx_term(CUcontext ctx) { assert(ctx_state_map.find(ctx) != ctx_state_map.end()); CTXstate* ctx_state = ctx_state_map[ctx]; - /* flush channel */ - flush_channel<<<1, 1>>>(ctx_state->channel_dev); - /* Make sure flush of channel is complete */ - cudaDeviceSynchronize(); - assert(cudaGetLastError() == cudaSuccess); + /* Notify receiver thread and wait for receiver thread to + * notify back */ + ctx_state->recv_thread_done = true; + while (!ctx_state->recv_thread_done) + ; ctx_state->channel_host.destroy(false); cudaFree(ctx_state->channel_dev); From 8c9606e2e81e329055fded158edf39a9a2e7efb0 Mon Sep 17 00:00:00 2001 From: christopher Date: Sun, 4 Aug 2024 13:28:25 -0400 Subject: [PATCH 75/76] Remove duplicate include settings --- nvbit_tracing/gsnv_trace/Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nvbit_tracing/gsnv_trace/Makefile b/nvbit_tracing/gsnv_trace/Makefile index 1a0020e..1a6a9ec 100644 --- a/nvbit_tracing/gsnv_trace/Makefile +++ b/nvbit_tracing/gsnv_trace/Makefile @@ -23,10 +23,6 @@ NVBIT_PATH=../../core GSPATTERNS_CORE_PATH=../../../gs_patterns INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_PATH) -NVBIT_PATH=../../core -GSPATTERNS_CORE_PATH=../../../gs_patterns -INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_PATH) - LIBS=-L$(NVBIT_PATH) -lnvbit -L$(GSPATTERNS_CORE_PATH)/build -lgs_patterns_core NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /)) From 9316c5fefb18c45a9342e459d3e23966efc0a0a1 Mon Sep 17 00:00:00 2001 From: christopher Date: Sun, 4 Aug 2024 20:01:45 -0400 Subject: [PATCH 76/76] Fix another compiler warning --- gs_patterns.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index db0af28..c81c75c 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -255,7 +255,7 @@ namespace gs_patterns _w_iaddrs[w][i] = -1; _w_bytes[w][i] = 0; _w_cnt[w][i] = 0; - for (int j = 0; j < MAX_ACCESS_SIZE; j++) + for (uint64_t j = 0; j < MAX_ACCESS_SIZE; j++) _w_maddr[w][i][j] = -1; } } @@ -266,7 +266,7 @@ namespace gs_patterns _w_iaddrs[w][i] = -1; _w_bytes[w][i] = 0; _w_cnt[w][i] = 0; - for (int j = 0; j < MAX_ACCESS_SIZE; j++) + for (uint64_t j = 0; j < MAX_ACCESS_SIZE; j++) _w_maddr[w][i][j] = -1; } }