-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_baseline_values.py
218 lines (190 loc) · 6.69 KB
/
process_baseline_values.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import os
import time
import sys
import numpy
import pyrap.tables
import math
from multiprocessing import Process, Queue
try:
import cPickle as pickle
except:
import pickle
def baseline_worker(input, output):
for func, args in iter(input.get, 'STOP'):
result = func(*args)
output.put(result)
def process_baseline(num_avg, array_to_process, UVWs, key):
num_points = 0
max_i = array_to_process.shape[0]
for i in range(0,max_i,num_avg):
num_points = num_points + 1
sum = 0.0
sum_uvw = 0.0
num_sum = 0
for j in range(i, i+num_avg):
if j < max_i:
sum = sum + array_to_process[j,:,:]
sum_uvw = sum_uvw + UVWs[j,:]
num_sum = num_sum + 1
avg = sum / num_sum
avg_uvw = sum_uvw / num_sum
for j in range(i, i+num_avg):
try:
if j < max_i:
array_to_process[j,:,:] = avg[:,:]
UVWs[j,:] = avg_uvw[:]
except:
pass
# print 'processed array ', key, num_avg, num_points
# print 'shapes ', avg.shape, avg_uvw.shape
return max_i, num_points, key, array_to_process, UVWs
def analyse_baselines(data_ms,data_values,data_flag,salvini_flag):
data_flag = int(data_flag)
salvini_flag = int(salvini_flag)
# read in pickle file with information about baseline lengths
infile = open(data_values, 'rb')
baseline_data = pickle.load(infile, encoding='bytes')
print('number of keys ', len(baseline_data))
min_value = 1.0e6
max_value = 0.0
keys = baseline_data.keys()
for key in keys:
if baseline_data[key][0] < min_value:
min_value = baseline_data[key][0]
min_key = key
if baseline_data[key][0] > max_value:
max_value = baseline_data[key][0]
max_key = key
print('min and max keys ', min_key, max_key)
print('min and max baselines ', min_value, max_value)
# baselines 160, 80, 40, 20, 10, 5 km zones
# avg 0 2 4 8 16 32
baseline_zones = [30,15,7.5,3.75,1.875,0.9375,0.46875,0.234375]
baseline_avg = [0,6,12,24,48,96,192,384,768]
# tried the following - result gives greater residuals than prev
# and not much more compression
# baseline_avg = [0,7,15,31,62,112,225,450]
# checking on this (averaging method 1)
baseline_zones = [80,40,20,10,5,2.5,1.25,0.625,0.3125]
baseline_avg = [1,2,4,8,16,32,64,128,256,512]
baseline_zones = [80, 40, 30, 20, 15, 10, 7.5, 5, 3.75, 2.5, 1.875, 1.25, 0.9375, 0.625, 0.5625, 0.375, 0.28125]
baseline_avg = [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512]
L = len(baseline_zones)
# open MS
t = pyrap.tables.table(data_ms,readonly=False)
if data_flag == 2:
print(' ++++ averaging CORRECTED_DATA')
data = t.getcol('CORRECTED_DATA')
if data_flag == 1:
print(' ++++ averaging MODEL_DATA')
data = t.getcol('MODEL_DATA')
if data_flag == 0:
print(' ++++ averaging DATA')
data = t.getcol('DATA')
# store output in MODEL_DATA column
# data_flag = 1
uvws = t.getcol('UVW')
print('uvws shape ', uvws.shape)
print('salvini flag is ', salvini_flag)
process_num = 0
initial_points = 0
final_points = 0
num_long_baselines = 0
eps = numpy.spacing(1)
maxbmult = 125
maxbmult = 500
maxbmult = 63
maxbmult = 32
maxbmult = 250
bmax = max_value / 1000.0 # maximum baseline in km
TASKS = []
for key in keys:
initial_points = initial_points + len(baseline_data[key]) - 1
baseline_length = baseline_data[key][0] / 1000.0 # baseline length in km
if salvini_flag == 0:
num_avg = baseline_avg[L]
for i in range(L):
if baseline_length > baseline_zones[i]:
num_avg = baseline_avg[i]
break
else:
# Following line give the Stef Salvini algorithm for determining the number of points to average
num_avg = min(maxbmult,int(numpy.fix((bmax+100*eps)/baseline_length)));
print('key salvini num_avg ', key, num_avg)
if num_avg > 1:
array_to_process = data[baseline_data[key][1:]]
UVWs = uvws[baseline_data[key][1:]]
TASKS.append((process_baseline, (num_avg, array_to_process, UVWs, key)))
else:
num_long_baselines = num_long_baselines + 1
process_num = process_num + 1
num_points = len(baseline_data[key]) - 1
final_points = final_points + num_points
print('number of long baselines ', num_long_baselines)
print('number of averaging tasks ', len(TASKS))
# Create queues
task_queue = Queue()
done_queue = Queue()
# Submit tasks
for task in TASKS:
task_queue.put(task)
# Start worker processes
num_processors = 1
if num_processors <= 2:
try:
import multiprocessing
processors = multiprocessing.cpu_count()
if processors > num_processors:
num_processors = processors
print('*** setting number of processors to',num_processors)
except:
pass
num_processors = 1
for i in range(num_processors):
Process(target=baseline_worker, args=(task_queue, done_queue)).start()
# Get and print results
for i in range(len(TASKS)):
init_points, num_points, key, modified_array, modified_uvws = done_queue.get(timeout=300)
# print 'for baseline ', key, 'initial and final number of points ', init_points, num_points, float(init_points) / num_points
data[baseline_data[key][1:]] = modified_array
uvws[baseline_data[key][1:]] = modified_uvws
final_points = final_points + num_points
process_num = process_num + 1
if process_num % 1000 == 0:
print('processed baseline number ', process_num)
# Tell child processes to stop
for i in range(num_processors):
task_queue.put('STOP')
if data_flag == 2:
print( ' ++++ storing CORRECTED_DATA')
t.putcol('CORRECTED_DATA', data)
if data_flag == 1:
print( ' ++++ storing MODEl_DATA')
t.putcol('MODEL_DATA', data)
if data_flag == 0:
print( ' ++++ storing DATA')
t.putcol('DATA', data)
print (' ++++ storing averaged UVWs')
t.putcol('UVW', uvws)
t.flush()
t.close()
print('finished processing')
print('initial and final points ', initial_points, final_points)
if salvini_flag:
print('Salvini percentage decrease ', ((initial_points - final_points) * 100.0) / initial_points)
else:
print('Willis percentage decrease ', ((initial_points - final_points) * 100.0) / initial_points)
def main( argv ):
# argv[1] = name of MS
# argv[2] = name of pickle file with baseline info
# argv[3] = flag to get MODEL_DATA or DATA column
# argv[4] = flag to use Salvini or Willis algorithm
startime = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
print("Start at %s" % startime)
analyse_baselines(argv[1], argv[2], argv[3], argv[4])
endtime = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
print("End at %s" % endtime)
return
#=============================
if __name__ == "__main__":
main(sys.argv)