forked from datalad/datalad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
usecases.py
98 lines (88 loc) · 4.59 KB
/
usecases.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Benchmarks for some use cases, typically at datalad.api level"""
import sys
import tempfile
from datalad.utils import get_tempfile_kwargs
import os.path as osp
from os.path import join as opj
from datalad.api import create
from datalad.utils import (
create_tree,
rmtree,
)
from .common import SuprocBenchmarks
class study_forrest(SuprocBenchmarks):
"""
Benchmarks for Study Forrest use cases
"""
timeout = 180 # especially with profiling might take longer than default 60s
def setup(self):
self.path = tempfile.mkdtemp(**get_tempfile_kwargs({}, prefix='bm_forrest'))
def teardown(self):
if osp.exists(self.path):
rmtree(self.path)
def time_make_studyforrest_mockup(self):
path = self.path
# Carries a copy of the
# datalad.tests.utils_testdatasets.py:make_studyforrest_mockup
# as of 0.12.0rc2-76-g6ba6d53b
# A copy is made so we do not reflect in the benchmark results changes
# to that helper's code. This copy only tests on 2 not 3 analyses
# subds
public = create(opj(path, 'public'), description="umbrella dataset")
# the following tries to capture the evolution of the project
phase1 = public.create('phase1',
description='old-style, no connection to RAW')
structural = public.create('structural', description='anatomy')
tnt = public.create('tnt', description='image templates')
tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True)
aligned = public.create('aligned', description='aligned image data')
aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
# new acquisition
labet = create(opj(path, 'private', 'labet'), description="raw data ET")
phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI")
phase2 = public.create('phase2',
description='new-style, RAW connection')
phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True)
phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True)
# add to derivatives
tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
# never to be published media files
media = create(opj(path, 'private', 'media'), description="raw data ET")
# assuming all annotations are in one dataset (in reality this is also
# a superdatasets with about 10 subdatasets
annot = public.create('annotations', description='stimulus annotation')
annot.clone(source=media.path, path=opj('src', 'media'), reckless=True)
# a few typical analysis datasets
# (just doing 2, actual status quo is just shy of 10)
# and also the real goal -> meta analysis
metaanalysis = public.create('metaanalysis', description="analysis of analyses")
for i in range(1, 2):
ana = public.create('analysis{}'.format(i),
description='analysis{}'.format(i))
ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True)
ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
# link to metaanalysis
metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)),
reckless=True)
# simulate change in an input (but not raw) dataset
create_tree(
aligned.path,
{'modification{}.txt'.format(i): 'unique{}'.format(i)})
aligned.save('.')
# finally aggregate data
aggregate = public.create('aggregate', description='aggregate data')
aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
# the toplevel dataset is intentionally left dirty, to reflect the
# most likely condition for the joint dataset to be in at any given
# point in time