Skip to content
forked from BOINC/boinc

Commit

Permalink
- client/scheduler: standardize the FLOPS estimate between NVIDIA and…
Browse files Browse the repository at this point in the history
… ATI.

    Make them both peak FLOPS,
    according to the formula supplied by the manufacturer.

    The impact on the client is minor:
    - the startup message describing the GPU
    - the weight of the resource type in computing long-term debt

    On the server, I changed the example app_plan() function
    to assume that app FLOPS is 20% of peak FLOPS
    (that's about what it is for SETI@home)

svn path=/trunk/boinc/; revision=19310
  • Loading branch information
davidpanderson committed Oct 16, 2009
1 parent 40c159c commit fe2a18f
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 47 deletions.
29 changes: 25 additions & 4 deletions checkin_notes
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ David 14 Jan 2009
app_control.cpp

David 14 Jan 2009
- client: clamp long term debts tp +- 1 week
- client: clamp long term debts to +- 1 week
- client: fix CUDA debt calculation
- client: don't accumulate debt if project->dont_request_more_work
- client: improves messages
Expand Down Expand Up @@ -1027,7 +1027,7 @@ David 30 Jan 2009
scheduler_op.cpp

David 31 Jan 2009
- client: there was a problem with how the round simulator
- client: there was a problem with how the round-robin simulator
worked in the presence of coprocessors.
The simulator maintained per-project queues of pending jobs.
When a job finished (in the simulation) it would get
Expand Down Expand Up @@ -5236,7 +5236,7 @@ David 10 June 2009
sched_result.cpp

David 10 June 2009
- web: allow projects to account email addresses in certain domains.
- web: allow projects to ban email addresses in certain domains.
Add the following to html/project/project.inc:

$banned_email_domains = array(
Expand Down Expand Up @@ -6455,7 +6455,7 @@ David 23 July 2009

David 24 July 2009
- client: in get_project_config_poll() GUI RPC,
return ERR_IN_PROGRESS is the reference site check is in progress.
return ERR_IN_PROGRESS if the reference site check is in progress.
This hopefully fixes a bug where:
- the user is connected via a proxy
- the manager is run for the first time, and an attach is tried
Expand Down Expand Up @@ -8674,3 +8674,24 @@ David 14 Oct 2009

David 14 Oct 2009
- undo the above

David 15 Oct 2009
- client/scheduler: standardize the FLOPS estimate between NVIDIA and ATI.
Make them both peak FLOPS,
according to the formula supplied by the manufacturer.

The impact on the client is minor:
- the startup message describing the GPU
- the weight of the resource type in computing long-term debt

On the server, I changed the example app_plan() function
to assume that app FLOPS is 20% of peak FLOPS
(that's about what it is for SETI@home)

client/
client_state.cpp
work_fetch.cpp
lib/
coproc.cpp,h
sched/
sched_customize.cpp
4 changes: 2 additions & 2 deletions checkin_notes_2008
Original file line number Diff line number Diff line change
Expand Up @@ -9253,7 +9253,7 @@ David 5 Nov 2008
David 5 Nov 2008
- client: add OS name into the hash for host CPID
(for multi-OS hosts)
- scheduler: use sqrt(x) instead of x in stop-checking
- scheduler: use sqrt(x) instead of x in spot-checking
for single redundancy.

client/
Expand Down Expand Up @@ -9285,7 +9285,7 @@ Charlie 5 Nov 2008
project.pbxproj

David 6 Nov
- API: remove debugging printf from trickly down code
- API: remove debugging printf from trickle down code
- API: use non-verbose option to zip
- scheduler: if multiple_client_per_host is set,
don't mark results as over if get repeat CPID
Expand Down
2 changes: 0 additions & 2 deletions client/client_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,9 @@ int CLIENT_STATE::init() {
// assume app will run at peak CPU speed, not peak GPU
//
if (avp->ncudas) {
//avp->flops += avp->ncudas * coproc_cuda->flops_estimate();
avp->flops += avp->ncudas * host_info.p_fpops;
}
if (avp->natis) {
//avp->flops += avp->natis * coproc_ati->flops_estimate();
avp->flops += avp->natis * host_info.p_fpops;
}
}
Expand Down
6 changes: 4 additions & 2 deletions client/work_fetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,17 +899,19 @@ void WORK_FETCH::set_initial_work_request() {
void WORK_FETCH::init() {
cpu_work_fetch.init(RSC_TYPE_CPU, gstate.ncpus, 1);

// use 20% as a rough estimate of GPU efficiency

if (coproc_cuda) {
cuda_work_fetch.init(
RSC_TYPE_CUDA, coproc_cuda->count,
coproc_cuda->flops_estimate()/gstate.host_info.p_fpops
0.2*coproc_cuda->peak_flops()/gstate.host_info.p_fpops
);
}
if (coproc_ati) {
ati_work_fetch.init(
RSC_TYPE_ATI,
coproc_ati->count,
coproc_ati->flops_estimate()/gstate.host_info.p_fpops
0.2*coproc_ati->peak_flops()/gstate.host_info.p_fpops
);
}

Expand Down
1 change: 1 addition & 0 deletions doc/links.php
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ function site($url, $name) {
site("http://www.boinc.prv.pl", "BOINC@Kolobrzeg"),
site("http://www.boincatpoland.org", "BOINC@Poland"),
//site("http://www.boinc.pl", "www.boinc.pl"),
site("http://www.tomaszpawel.republika.pl/", "TomaszPawelTeam"),
));
language("Portuguese", array(
site( "http://portugalathome.pt.vu/", "Portugal@home"),
Expand Down
14 changes: 7 additions & 7 deletions lib/coproc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
}
if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
double s1 = c1.flops_estimate();
double s2 = c2.flops_estimate();
double s1 = c1.peak_flops();
double s2 = c2.peak_flops();
if (s1 > s2) return 1;
if (s1 < s2) return -1;
return 0;
Expand Down Expand Up @@ -488,9 +488,9 @@ void COPROC_CUDA::description(char* buf) {
} else {
strcpy(vers, "unknown");
}
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, est. %.0fGFLOPS)",
sprintf(buf, "%s (driver version %s, CUDA version %d, compute capability %d.%d, %.0fMB, %.0f GFLOPS peak)",
prop.name, vers, cuda_version, prop.major, prop.minor,
prop.totalGlobalMem/(1024.*1024.), flops_estimate()/1e9
prop.totalGlobalMem/(1024.*1024.), peak_flops()/1e9
);
}

Expand Down Expand Up @@ -918,7 +918,7 @@ void COPROC_ATI::get(COPROCS& coprocs,
char buf[256], buf2[256];
if (i == 0) {
best = gpus[i];
} else if (gpus[i].flops_estimate() > best.flops_estimate()) {
} else if (gpus[i].peak_flops() > best.peak_flops()) {
best = gpus[i];
}
gpus[i].description(buf);
Expand Down Expand Up @@ -1102,8 +1102,8 @@ int COPROC_ATI::parse(FILE* fin) {
}

void COPROC_ATI::description(char* buf) {
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0fGFLOPS)",
name, version, attribs.localRAM/1024.*1024., flops_estimate()/1.e9
sprintf(buf, "%s (CAL version %s, %.0fMB, %.0f GFLOPS peak)",
name, version, attribs.localRAM/1024.*1024., peak_flops()/1.e9
);
}

Expand Down
17 changes: 10 additions & 7 deletions lib/coproc.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,16 @@ struct COPROC_CUDA : public COPROC {
int parse(FILE*);
virtual bool is_usable();

// rough estimate of FLOPS
// The following is based on SETI@home CUDA,
// which gets 50 GFLOPS on a Quadro FX 3700,
// which has 14 MPs and a clock rate of 1.25 MHz
// Estimate of peak FLOPS.
// FLOPS for a given app may be much less;
// e.g. for SETI@home it's about 0.18 of the peak
//
inline double flops_estimate() {
double x = (prop.clockRate * prop.multiProcessorCount)*5e10/(14*1.25e6);
inline double peak_flops() {
// clock rate is scaled down by 1000;
// each processor has 8 cores;
// each core can do 2 ops per clock
//
double x = (1000.*prop.clockRate) * prop.multiProcessorCount * 8. * 2.;
return x?x:5e10;
}

Expand Down Expand Up @@ -314,7 +317,7 @@ struct COPROC_ATI : public COPROC {
void clear();
int parse(FILE*);
virtual bool is_usable();
inline double flops_estimate() {
inline double peak_flops() {
double x = attribs.numberOfSIMD * attribs.wavefrontSize * 2.5 * attribs.engineClock * 1.e6;
// clock is in MHz
return x?x:5e10;
Expand Down
34 changes: 11 additions & 23 deletions sched/sched_customize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ bool wu_is_infeasible_custom(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
//
if (bav.host_usage.ncudas) {
if (!strstr(wu.name, "slow")) {
bav.host_usage.flops = g_request->coproc_cuda->flops_estimate()/2;
bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/10;
} else {
bav.host_usage.flops = g_request->coproc_cuda->flops_estimate();
bav.host_usage.flops = g_request->coproc_cuda->peak_flops()/5;
}
}
#endif
Expand Down Expand Up @@ -217,7 +217,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
// 2. ati13ati
// 3. ati13amd
// 4. ati
hu.flops = cp->flops_estimate();
hu.flops = cp->peak_flops()/5;
if (!strcmp(plan_class, "ati13amd")) {
hu.flops *= 1.01;
}
Expand Down Expand Up @@ -273,6 +273,8 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
return false;
}

double min_ram;

// for CUDA 2.3, we need to check the CUDA RT version.
// Old BOINC clients report display driver version;
// newer ones report CUDA RT version
Expand All @@ -298,22 +300,7 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
add_no_work_message("CUDA version 2.3 needed");
return false;
}
#ifdef PLAN_CUDA23_MIN_RAM
if (cp->prop.dtotalGlobalMem < PLAN_CUDA23_MIN_RAM) {
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] CUDA23 mem %d < %d\n",
cp->prop.dtotalGlobalMem, PLAN_CUDA23_MIN_RAM
);
}
sprintf(buf,
"Your NVIDIA GPU has insufficient memory (need %.0fMB)",
PLAN_CUDA23_MIN_RAM/MEGA
);
add_no_work_message(buf);
return false;
}
#endif
min_ram = PLAN_CUDA23_MIN_RAM;
} else {
if (cp->display_driver_version && cp->display_driver_version < PLAN_CUDA_MIN_DRIVER_VERSION) {
if (config.debug_version_select) {
Expand All @@ -328,24 +315,25 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
add_no_work_message(buf);
return false;
}
min_ram = PLAN_CUDA_MIN_RAM;
}

if (cp->prop.dtotalGlobalMem < PLAN_CUDA_MIN_RAM) {
if (cp->prop.dtotalGlobalMem < min_ram) {
if (config.debug_version_select) {
log_messages.printf(MSG_NORMAL,
"[version] CUDA mem %d < %d\n",
cp->prop.dtotalGlobalMem, PLAN_CUDA_MIN_RAM
cp->prop.dtotalGlobalMem, min_ram
);
}
sprintf(buf,
"Your NVIDIA GPU has insufficient memory (need %.0fMB)",
PLAN_CUDA_MIN_RAM/MEGA
min_ram/MEGA
);
add_no_work_message(buf);
return false;
}

hu.flops = cp->flops_estimate();
hu.flops = cp->peak_flops()/5;
if (!strcmp(plan_class, "cuda23")) {
hu.flops *= 1.01;
}
Expand Down

0 comments on commit fe2a18f

Please sign in to comment.