forked from databricks/tpch-dbgen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdss.h
549 lines (506 loc) · 15 KB
/
dss.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
/*
* $Id: dss.h,v 1.10 2008/03/21 18:07:13 jms Exp $
*
* Revision History
* ===================
* $Log: dss.h,v $
* Revision 1.10 2008/03/21 18:07:13 jms
* update copyright date
*
* Revision 1.9 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.8 2006/03/09 18:55:29 jms
* remove vestigial cvs merge marker
*
* Revision 1.7 2005/10/28 03:05:05 jms
* up maximum scale to 100TB
*
* Revision 1.6 2005/10/28 02:55:26 jms
* add release.h changes
*
* Revision 1.5 2005/10/27 18:13:03 jms
* a_rnd() prototype correction
*
* Revision 1.4 2005/10/25 17:58:59 jms
* update version stamp
*
* Revision 1.3 2005/03/04 19:48:39 jms
* Changes from Doug Johnson to address very large scale factors
*
* Revision 1.2 2005/01/03 20:08:58 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:46 jms
* re-establish external server
*
* Revision 1.5 2004/04/08 17:34:15 jms
* cleanup SOLARIS/SUN ifdefs; now all use SUN
*
* Revision 1.4 2004/04/07 20:17:29 jms
* bug #58 (join fails between order/lineitem)
*
* Revision 1.3 2004/03/16 14:37:53 jms
* update version and copyright date; correct comment typo
*
* Revision 1.2 2004/02/18 14:07:20 jms
* change to version 2.1.0
*
* Revision 1.1.1.1 2003/08/08 21:50:33 jms
* recreation after CVS crash
*
* Revision 1.3 2003/08/08 21:35:26 jms
* first integration of rng64 for o_custkey and l_partkey
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* general definitions and control information for the DSS code
* generator; if it controls the data set, it's here
*/
#ifndef DSS_H
#define DSS_H
#ifdef TPCH
#define NAME "TPC-H"
#endif
#ifdef TPCR
#define NAME "TPC-R"
#endif
#ifndef NAME
#error Benchmark version must be defined in config.h
#endif
#define TPC "Transaction Processing Performance Council"
#define C_DATES "1994 - 2010"
#include "config.h"
#include "shared.h"
#include <stdio.h>
#include <stdlib.h>
#define NONE -1
#define PART 0
#define PSUPP 1
#define SUPP 2
#define CUST 3
#define ORDER 4
#define LINE 5
#define ORDER_LINE 6
#define PART_PSUPP 7
#define NATION 8
#define REGION 9
#define UPDATE 10
#define MAX_TABLE 11
#define ONE_STREAM 1
#define ADD_AT_END 2
#ifdef MAX
#undef MAX
#endif
#ifdef MIN
#undef MIN
#endif
#define MAX(a,b) ((a > b )?a:b)
#define MIN(A,B) ( (A) < (B) ? (A) : (B))
#define INTERNAL_ERROR(p) {fprintf(stderr,"%s", p);abort();}
#define LN_CNT 4
static char lnoise[4] = {'|', '/', '-', '\\' };
#define LIFENOISE(n, var) \
if (verbose > 0) fprintf(stderr, "%c\b", lnoise[(var%LN_CNT)])
#define MALLOC_CHECK(var) \
if ((var) == NULL) \
{ \
fprintf(stderr, "Malloc failed at %s:%d\n", \
__FILE__, __LINE__); \
exit(1);\
}
#define OPEN_CHECK(var, path) \
if ((var) == NULL) \
{ \
fprintf(stderr, "Open failed for %s at %s:%d\n", \
path, __FILE__, __LINE__); \
exit(1);\
}
#ifndef MAX_CHILDREN
#define MAX_CHILDREN 1000
#endif
/*
* macros that control sparse keys
*
* refer to Porting.Notes for a complete explanation
*/
#ifndef BITS_PER_LONG
#define BITS_PER_LONG 32
#define MAX_LONG 0x7FFFFFFF
#endif /* BITS_PER_LONG */
#define SPARSE_BITS 2
#define SPARSE_KEEP 3
#define MK_SPARSE(key, seq) \
(((((key>>3)<<2)|(seq & 0x0003))<<3)|(key & 0x0007))
#define RANDOM(tgt, lower, upper, stream) dss_random(&tgt, lower, upper, stream)
#define RANDOM64(tgt, lower, upper, stream) dss_random64(&tgt, lower, upper, stream)
typedef struct
{
long weight;
char *text;
} set_member;
typedef struct
{
int count;
int max;
set_member *list;
long *permute;
} distribution;
/*
* some handy access functions
*/
#define DIST_SIZE(d) d->count
#define DIST_MEMBER(d, i) ((set_member *)((d)->list + i))->text
#define DIST_PERMUTE(d, i) (d->permute[i])
typedef struct
{
char *name;
char *comment;
DSS_HUGE base;
int (*loader) ();
long (*gen_seed)();
int child;
DSS_HUGE vtotal;
} tdef;
typedef struct SEED_T {
long table;
DSS_HUGE value;
DSS_HUGE usage;
DSS_HUGE boundary;
#ifdef RNG_TEST
DSS_HUGE nCalls;
#endif
} seed_t;
#if defined(__STDC__)
#define PROTO(s) s
#else
#define PROTO(s) ()
#endif
/* bm_utils.c */
char *env_config PROTO((char *var, char *dflt));
long yes_no PROTO((char *prompt));
void a_rnd PROTO((int min, int max, int column, char *dest));
int tx_rnd PROTO((long min, long max, long column, char *tgt));
long julian PROTO((long date));
long unjulian PROTO((long date));
FILE *tbl_open PROTO((int tbl, char *mode));
long dssncasecmp PROTO((char *s1, char *s2, int n));
long dsscasecmp PROTO((char *s1, char *s2));
int pick_str PROTO((distribution * s, int c, char *target));
void agg_str PROTO((distribution *set, long count, long col, char *dest));
void read_dist PROTO((char *path, char *name, distribution * target));
void embed_str PROTO((distribution *d, int min, int max, int stream, char *dest));
#ifndef STDLIB_HAS_GETOPT
int getopt PROTO((int arg_cnt, char **arg_vect, char *oprions));
#endif /* STDLIB_HAS_GETOPT */
DSS_HUGE set_state PROTO((int t, long scale, long procs, long step, DSS_HUGE *e));
/* rnd.c */
DSS_HUGE NextRand PROTO((DSS_HUGE nSeed));
DSS_HUGE UnifInt PROTO((DSS_HUGE nLow, DSS_HUGE nHigh, long nStream));
void dss_random(DSS_HUGE *tgt, DSS_HUGE min, DSS_HUGE max, long seed);
void row_start(int t);
void row_stop(int t);
void dump_seeds(int t);
/* text.c */
#define MAX_GRAMMAR_LEN 12 /* max length of grammar component */
#define MAX_SENT_LEN 256 /* max length of populated sentence */
#define RNG_PER_SENT 27 /* max number of RNG calls per sentence */
void dbg_text PROTO((char * t, int min, int max, int s));
#ifdef DECLARER
#define EXTERN
#else
#define EXTERN extern
#endif /* DECLARER */
EXTERN distribution nations;
EXTERN distribution nations2;
EXTERN distribution regions;
EXTERN distribution o_priority_set;
EXTERN distribution l_instruct_set;
EXTERN distribution l_smode_set;
EXTERN distribution l_category_set;
EXTERN distribution l_rflag_set;
EXTERN distribution c_mseg_set;
EXTERN distribution colors;
EXTERN distribution p_types_set;
EXTERN distribution p_cntr_set;
/* distributions that control text generation */
EXTERN distribution articles;
EXTERN distribution nouns;
EXTERN distribution adjectives;
EXTERN distribution adverbs;
EXTERN distribution prepositions;
EXTERN distribution verbs;
EXTERN distribution terminators;
EXTERN distribution auxillaries;
EXTERN distribution np;
EXTERN distribution vp;
EXTERN distribution grammar;
EXTERN long scale;
EXTERN int refresh;
EXTERN int resume;
EXTERN long verbose;
EXTERN long force;
EXTERN long updates;
EXTERN long table;
EXTERN long children;
EXTERN int step;
EXTERN int set_seeds;
EXTERN char *d_path;
/* added for segmented updates */
EXTERN int insert_segments;
EXTERN int delete_segments;
EXTERN int insert_orders_segment;
EXTERN int insert_lineitem_segment;
EXTERN int delete_segment;
#ifndef DECLARER
extern tdef tdefs[];
#endif /* DECLARER */
/*****************************************************************
** table level defines use the following naming convention: t_ccc_xxx
** with: t, a table identifier
** ccc, a column identifier
** xxx, a limit type
****************************************************************
*/
/*
* defines which control the parts table
*/
#define P_SIZE 126
#define P_NAME_SCL 5
#define P_MFG_TAG "Manufacturer#"
#define P_MFG_FMT "%%s%%0%d%s"
#define P_MFG_MIN 1
#define P_MFG_MAX 5
#define P_BRND_TAG "Brand#"
#define P_BRND_FMT "%%s%%0%d%s"
#define P_BRND_MIN 1
#define P_BRND_MAX 5
#define P_SIZE_MIN 1
#define P_SIZE_MAX 50
#define P_MCST_MIN 100
#define P_MCST_MAX 99900
#define P_MCST_SCL 100.0
#define P_RCST_MIN 90000
#define P_RCST_MAX 200000
#define P_RCST_SCL 100.0
/*
* defines which control the suppliers table
*/
#define S_SIZE 145
#define S_NAME_TAG "Supplier#"
#define S_NAME_FMT "%%s%%0%d%s"
#define S_ABAL_MIN -99999
#define S_ABAL_MAX 999999
#define S_CMNT_MAX 101
#define S_CMNT_BBB 10 /* number of BBB comments/SF */
#define BBB_DEADBEATS 50 /* % that are complaints */
#define BBB_BASE "Customer "
#define BBB_COMPLAIN "Complaints"
#define BBB_COMMEND "Recommends"
#define BBB_CMNT_LEN 19
#define BBB_BASE_LEN 9
#define BBB_TYPE_LEN 10
/*
* defines which control the partsupp table
*/
#define PS_SIZE 145
#define PS_SKEY_MIN 0
#define PS_SKEY_MAX ((tdefs[SUPP].base - 1) * scale)
#define PS_SCST_MIN 100
#define PS_SCST_MAX 100000
#define PS_QTY_MIN 1
#define PS_QTY_MAX 9999
/*
* defines which control the customers table
*/
#define C_SIZE 165
#define C_NAME_TAG "Customer#"
#define C_NAME_FMT "%%s%%0%d%s"
#define C_MSEG_MAX 5
#define C_ABAL_MIN -99999
#define C_ABAL_MAX 999999
/*
* defines which control the order table
*/
#define O_SIZE 109
#define O_CKEY_MIN 1
#define O_CKEY_MAX (tdefs[CUST].base * scale)
#define O_ODATE_MIN STARTDATE
#define O_ODATE_MAX (STARTDATE + TOTDATE - \
(L_SDTE_MAX + L_RDTE_MAX) - 1)
#define O_CLRK_TAG "Clerk#"
#define O_CLRK_FMT "%%s%%0%d%s"
#define O_CLRK_SCL 1000
#define O_LCNT_MIN 1
#define O_LCNT_MAX 7
/*
* defines which control the lineitem table
*/
#define L_SIZE 144L
#define L_QTY_MIN 1
#define L_QTY_MAX 50
#define L_TAX_MIN 0
#define L_TAX_MAX 8
#define L_DCNT_MIN 0
#define L_DCNT_MAX 10
#define L_PKEY_MIN 1
#define L_PKEY_MAX (tdefs[PART].base * scale)
#define L_SDTE_MIN 1
#define L_SDTE_MAX 121
#define L_CDTE_MIN 30
#define L_CDTE_MAX 90
#define L_RDTE_MIN 1
#define L_RDTE_MAX 30
/*
* defines which control the time table
*/
#define T_SIZE 30
#define T_START_DAY 3 /* wednesday ? */
#define LEAP(y) ((!(y % 4) && (y % 100))?1:0)
/*******************************************************************
*******************************************************************
***
*** general or inter table defines
***
*******************************************************************
*******************************************************************/
#define SUPP_PER_PART 4
#define ORDERS_PER_CUST 10 /* sync this with CUST_MORTALITY */
#define CUST_MORTALITY 3 /* portion with have no orders */
#define NATIONS_MAX 90 /* limited by country codes in phone numbers */
#define PHONE_FMT "%02d-%03d-%03d-%04d"
#define STARTDATE 92001
#define CURRENTDATE 95168
#define ENDDATE 98365
#define TOTDATE 2557
#define UPD_PCT 10
#define MAX_STREAM 47
#define V_STR_LOW 0.4
#define PENNIES 100 /* for scaled int money arithmetic */
#define Q11_FRACTION (double)0.0001
/*
* max and min SF in GB; Larger SF will require changes to the build routines
*/
#define MIN_SCALE 1.0
#define MAX_SCALE 100000.0
/*
* beyond this point we need to allow for BCD calculations
*/
#define MAX_32B_SCALE 1000.0
#define LONG2HUGE(src, dst) *dst = (DSS_HUGE)src
#define HUGE2LONG(src, dst) *dst = (long)src
#define HUGE_SET(src, dst) *dst = *src
#define HUGE_MUL(op1, op2) *op1 *= op2
#define HUGE_DIV(op1, op2) *op1 /= op2
#define HUGE_ADD(op1, op2, dst) *dst = *op1 + op2
#define HUGE_SUB(op1, op2, dst) *dst = *op1 - op2
#define HUGE_MOD(op1, op2) *op1 % op2
#define HUGE_CMP(op1, op2) (*op1 == *op2)?0:(*op1 < *op2)-1:1
/******** environmental variables and defaults ***************/
#define DIST_TAG "DSS_DIST" /* environment var to override ... */
#define DIST_DFLT "dists.dss" /* default file to hold distributions */
#define PATH_TAG "DSS_PATH" /* environment var to override ... */
#define PATH_DFLT "." /* default directory to hold tables */
#define CONFIG_TAG "DSS_CONFIG" /* environment var to override ... */
#define CONFIG_DFLT "." /* default directory to config files */
#define ADHOC_TAG "DSS_ADHOC" /* environment var to override ... */
#define ADHOC_DFLT "adhoc.dss" /* default file name for adhoc vars */
/******* output macros ********/
#ifndef SEPARATOR
#define SEPARATOR '|' /* field spearator for generated flat files */
#endif
/* Data type flags for a single print routine */
#define DT_STR 0
#ifndef MVS
#define DT_VSTR DT_STR
#else
#define DT_VSTR 1
#endif /* MVS */
#define DT_INT 2
#define DT_HUGE 3
#define DT_KEY 4
#define DT_MONEY 5
#define DT_CHR 6
int dbg_print(int dt, FILE *tgt, void *data, int len, int eol);
#define PR_STR(f, str, len) dbg_print(DT_STR, f, (void *)str, len, 1)
#define PR_VSTR(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 1)
#define PR_VSTR_LAST(f, str, len) dbg_print(DT_VSTR, f, (void *)str, len, 0)
#define PR_INT(f, str) dbg_print(DT_INT, f, (void *)str, 0, 1)
#define PR_HUGE(f, str) dbg_print(DT_HUGE, f, (void *)str, 0, 1)
#define PR_HUGE_LAST(f, str) dbg_print(DT_HUGE, f, (void *)str, 0, 0)
#define PR_KEY(f, str) dbg_print(DT_KEY, f, (void *)str, 0, -1)
#define PR_MONEY(f, str) dbg_print(DT_MONEY, f, (void *)str, 0, 1)
#define PR_CHR(f, str) dbg_print(DT_CHR, f, (void *)str, 0, 1)
#define PR_STRT(fp) /* any line prep for a record goes here */
#define PR_END(fp) fprintf(fp, "\n") /* finish the record here */
#ifdef MDY_DATE
#define PR_DATE(tgt, yr, mn, dy) \
sprintf(tgt, "%02d-%02d-19%02d", mn, dy, yr)
#else
#define PR_DATE(tgt, yr, mn, dy) \
sprintf(tgt, "19%02d-%02d-%02d", yr, mn, dy)
#endif /* DATE_FORMAT */
/*
* verification macros
*/
#define VRF_STR(t, d) {char *xx = d; while (*xx) tdefs[t].vtotal += *xx++;}
#define VRF_INT(t,d) tdefs[t].vtotal += d
#define VRF_HUGE(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1))
/* assume float is a 64 bit quantity */
#define VRF_MONEY(t,d) tdefs[t].vtotal = *((long *)&d) + *((long *)(&d + 1))
#define VRF_CHR(t,d) tdefs[t].vtotal += d
#define VRF_STRT(t)
#define VRF_END(t)
/*********** distribuitons currently defined *************/
#define UNIFORM 0
/*
* seed indexes; used to separate the generation of individual columns
*/
#define P_MFG_SD 0
#define P_BRND_SD 1
#define P_TYPE_SD 2
#define P_SIZE_SD 3
#define P_CNTR_SD 4
#define P_RCST_SD 5
#define PS_QTY_SD 7
#define PS_SCST_SD 8
#define O_SUPP_SD 10
#define O_CLRK_SD 11
#define O_ODATE_SD 13
#define L_QTY_SD 14
#define L_DCNT_SD 15
#define L_TAX_SD 16
#define L_SHIP_SD 17
#define L_SMODE_SD 18
#define L_PKEY_SD 19
#define L_SKEY_SD 20
#define L_SDTE_SD 21
#define L_CDTE_SD 22
#define L_RDTE_SD 23
#define L_RFLG_SD 24
#define C_NTRG_SD 27
#define C_PHNE_SD 28
#define C_ABAL_SD 29
#define C_MSEG_SD 30
#define S_NTRG_SD 33
#define S_PHNE_SD 34
#define S_ABAL_SD 35
#define P_NAME_SD 37
#define O_PRIO_SD 38
#define HVAR_SD 39
#define O_CKEY_SD 40
#define N_CMNT_SD 41
#define R_CMNT_SD 42
#define O_LCNT_SD 43
#define BBB_JNK_SD 44
#define BBB_TYPE_SD 45
#define BBB_CMNT_SD 46
#define BBB_OFFSET_SD 47
#endif /* DSS_H */