Skip to content

Commit

Permalink
Add percentile filter for abs & PL
Browse files Browse the repository at this point in the history
  • Loading branch information
XPD Operator committed May 29, 2024
1 parent 70680e8 commit 2d1791b
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 13 deletions.
65 changes: 59 additions & 6 deletions scripts/_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,57 @@ def rate_unit_converter(r0 = 'ul/min', r1 = 'ul/min'):
return ruc



### Filter out bad absorption data due to PF oil ###
def percentile_abs(wavelength, absorbance, w_range=[210, 700], percent_range=[15, 85]):
absorbance = np.nan_to_num(absorbance, nan=0)
idx1, _ = find_nearest(wavelength[0], w_range[0])
idx2, _ = find_nearest(wavelength[0], w_range[1])

abs_array2 = absorbance[:, idx1:idx2]
wavelength2 = wavelength[:, idx1:idx2]

iqr = np.multiply(abs_array2, wavelength2).mean(axis=1)
q0, q1 = np.percentile(iqr, percent_range)
idx_iqr = np.argwhere(np.logical_and(iqr>=q0, iqr<=q1))


abs_percentile = np.zeros((idx_iqr.shape[0], absorbance.shape[1]))
j = 0
for i in idx_iqr.flatten():
abs_percentile[j] = absorbance[i]
j += 1

return abs_percentile



### Filter out bad fluorescence PL data due to PF oil ###
def percentile_PL(wavelength, fluorescence, w_range=[400, 800], percent_range=[30, 100]):
fluorescence = np.nan_to_num(fluorescence, nan=0)
idx1, _ = find_nearest(wavelength[0], w_range[0])
idx2, _ = find_nearest(wavelength[0], w_range[1])

PL_array2 = fluorescence[:, idx1:idx2]
wavelength2 = wavelength[:, idx1:idx2]

# iqr = np.multiply(abs_array2, wavelength2).mean(axis=1)
iqr = np.max(fluorescence, axis=1)
q0, q1 = np.percentile(iqr, percent_range)
idx_iqr = np.argwhere(np.logical_and(iqr>=q0, iqr<=q1))


PL_percentile = np.zeros((idx_iqr.shape[0], fluorescence.shape[1]))
j = 0
for i in idx_iqr.flatten():
PL_percentile[j] = fluorescence[i]
j += 1

return PL_percentile




#### Criteria to identify a bad fluorescence peak ####
# c1. PL peak height < 2000 --> execute in scipy.find_peaks
# c2. PL peak wavelength < 560 nm, and the diference of peak integrstion (PL minus LED) < 100000
Expand Down Expand Up @@ -293,8 +344,8 @@ def _2peak_fit_good_PL(x0, y0, fit_function, peak=False, maxfev=100000, fit_boun
initial_guess = [y.max(), x[y.argmax()], sigma, y[find_nearest(x, second_peak)[0]], second_peak, sigma]
except (TypeError, IndexError):
initial_guess = [y0[peak[0]], x0[peak[0]], sigma, y0[peak[-1]], x0[peak[-1]], sigma]


try:
bnd = ((0,200,0,0,200,0),(y.max()*1.15,1000, np.inf, y.max()*1.15,1000, np.inf))
popt, pcov = curve_fit(fit_function, x, y, p0=initial_guess, bounds=bnd, maxfev=maxfev)
Expand Down Expand Up @@ -358,7 +409,7 @@ def _identify_one_in_kafka(qepro_dic, metadata_dic, key_height=200, distance=100



def _identify_multi_in_kafka(qepro_dic, metadata_dic, key_height=200, distance=100, height=50, dummy_test=False):
def _identify_multi_in_kafka(qepro_dic, metadata_dic, key_height=200, distance=100, height=50, dummy_test=False, w_range=[400, 800], percent_range=[30, 100]):
t0 = de._readable_time(metadata_dic['time'])
data_id = f'{t0[0]}_{t0[1]}_{metadata_dic["uid"][:8]}'
_for_average = pd.DataFrame()
Expand All @@ -369,10 +420,12 @@ def _identify_multi_in_kafka(qepro_dic, metadata_dic, key_height=200, distance=1
if (type(p1) is np.ndarray) and (type(p2) is dict):
_for_average[f'{data_id}_{i:03d}'] = y_i

_for_average[f'{data_id}_mean'] = _for_average.mean(axis=1)
# _for_average[f'{data_id}_mean'] = _for_average.mean(axis=1)

x0 = x_i
y0 = _for_average[f'{data_id}_mean'].values
PL_per = percentile_PL(x0, _for_average.to_numpy().T, w_range=w_range, percent_range=percent_range)
# y0 = _for_average[f'{data_id}_mean'].values
y0 = PL_per.mean(axis=0)

peak, prop = good_bad_data(x0, y0, key_height=key_height, data_id = f'{data_id}_average', distance=distance, height=height, dummy_test=dummy_test)
return x0, y0, data_id, peak, prop
Expand Down
2 changes: 1 addition & 1 deletion scripts/_pdf_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _set_CsPbBr3_constrain(PDF_calculator_object, phase_idx=1, fix_APD=True):

# Refine diameter for the spherical particle
pf.constrain(pf.spdiameter, '@133')
pf.setpar(133, 80)
pf.setpar(133, 100)
# pf.fixpar(133)

# Set temperature factors isotropic to each atom
Expand Down
8 changes: 6 additions & 2 deletions scripts/kafka_consumer_iterate_RM.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,16 +283,20 @@ def print_message(consumer, doctype, doc,
u.plot_data(clear_fig=clear_fig)
print(f'\n** Plot {stream_name} in uid: {uid[0:8]} complete **\n')

## Idenfify good/bad data if it is a fluorescence sacn in 'primary'
## Idenfify good/bad data if it is a fluorescence scan in 'primary'
if qepro_dic['QEPro_spectrum_type'][0]==2 and stream_name=='primary':
print(f'\n*** start to identify good/bad data in stream: {stream_name} ***\n')
x0, y0, data_id, peak, prop = da._identify_one_in_kafka(qepro_dic, metadata_dic, key_height=kh, distance=dis, height=hei, dummy_test=dummy_test)


## Apply an offset to zero baseline of absorption spectra
elif stream_name == 'absorbance':
print(f'\n*** start to flter absorbance within 15%-85% due to PF oil phase***\n')
abs_per = da.percentile_abs(qepro_dic['QEPro_x_axis'], qepro_dic['QEPro_output'], percent_range=[15, 85])

print(f'\n*** start to check absorbance at 365b nm in stream: {stream_name} is positive or not***\n')
abs_array = qepro_dic['QEPro_output'][1:].mean(axis=0)
# abs_array = qepro_dic['QEPro_output'][1:].mean(axis=0)
abs_array = abs_per.mean(axis=0)
wavelength = qepro_dic['QEPro_x_axis'][0]

popt_abs01, _ = da.fit_line_2D(wavelength, abs_array, da.line_2D, x_range=[205, 240], plot=False)
Expand Down
8 changes: 4 additions & 4 deletions scripts/kafka_consumer_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def print_message(consumer, doctype, doc,
kh = key_height[0]
hei = height[0]
dis = distance[0]

'''
## obtain phase fraction & particle size from g(r)
if 'scattering' in stream_list:
if fitting_pdf:
Expand All @@ -204,7 +204,7 @@ def print_message(consumer, doctype, doc,
pdf_property={'Br_ratio': np.nan, 'Br_size': np.nan}
## remove 'scattering' from stream_list to avoid redundant work in next for loop
stream_list.remove('scattering')
## Export, plotting, fitting, calculate # of good/bad data, add queue item
for stream_name in stream_list:
## Read data from databroker and turn into dic
Expand Down Expand Up @@ -258,7 +258,7 @@ def print_message(consumer, doctype, doc,
# u.plot_average_good(x0, y0, color=cmap(color_idx[sub_idx]), label=label_uid)
# sub_idx = sample.index(metadata_dic['sample_type'])
u.plot_average_good(x0, y0, label=label_uid, clf_limit=14)
## Skip peak fitting if qepro type is absorbance
if qepro_dic['QEPro_spectrum_type'][0] == 3:
print(f"\n*** No need to carry out fitting for {stream_name} in uid: {uid[:8]} ***\n")
Expand Down Expand Up @@ -367,7 +367,7 @@ def print_message(consumer, doctype, doc,
print(f'*** Accumulated num of good data: {len(good_data)} ***\n')
print(f'good_data = {good_data}\n')
print(f'*** Accumulated num of bad data: {len(bad_data)} ***\n')
print(f'*** Accumulated num of bad data: {len(bad_data)} ***\n') '''
print('########### Events printing division ############\n')


Expand Down

0 comments on commit 2d1791b

Please sign in to comment.