Skip to content

Commit

Permalink
kwargs parameter added for clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
warunawickramasingha committed Jan 2, 2025
1 parent fa55698 commit 661c29e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
44 changes: 28 additions & 16 deletions diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,59 +47,71 @@ def __init__(self, model_weights_path, batch_size=64, workers=0, iou_threshold=0
self.iou_threshold = iou_threshold


def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold=0.0, clustering=Clustering.QLab.name, q_tol=0.05):
def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold=0.0, **kwargs):
"""
Find bragg peaks using the pre trained FasterRCNN model and create a peaks workspace
:param workspace: Workspace name or the object of Workspace from WISH, ex: "WISH0042730"
:param output_ws_name: Name of the peaks workspace
:param conf_threshold: Confidence threshold to filter peaks inferred from RCNN
:param clustering: Clustering method to filter and merge the peaks ex: QLab, HDBSCAN, KMeans
:param q_tol: QLab tolerance to remove duplicate peaks, it will onlye be useful when clustering is QLab
:param kwargs: variable keyword params for clustering. default is {"name": "Qlab", "q_tol": 0.05}
Ex: {"name": "HDBSCAN", "keep_ignored_labels": True}
"""
clustering_params = {"name": "QLab", "q_tol": 0.05 }
clustering_params.update(kwargs)

start_time = time.time()
data_set, predicted_indices = self._do_cnn_inferencing(workspace)

filtered_indices = predicted_indices[predicted_indices[:, -1] > conf_threshold]

#Do Clustering
print(f"Starting peak clustering with {clustering} method..")
clustered_peaks = self._do_peak_clustering(filtered_indices, clustering)
print(f"Starting peak clustering with { clustering_params['name'] } method..")
clustered_peaks = self._do_peak_clustering(filtered_indices, clustering_params)
print(f"Number of peaks after clustering is={len(clustered_peaks)}")

cluster_indices_rounded = np.round(clustered_peaks[:, :3]).astype(int)
peaksws = createPeaksWorkspaceFromIndices(data_set.get_workspace(), output_ws_name, cluster_indices_rounded, data_set.get_ws_as_3d_array())
for ipk, pk in enumerate(peaksws):
pk.setIntensity(clustered_peaks[ipk, -1])

if clustering == Clustering.QLab.name:
if clustering_params["name"] == Clustering.QLab.name:
#Filter peaks by qlab
BaseSX.remove_duplicate_peaks_by_qlab(peaksws, q_tol)
BaseSX.remove_duplicate_peaks_by_qlab(peaksws, clustering_params["q_tol"])

data_set.delete_rebunched_ws()
print(f"Bragg peaks finding from FasterRCNN model is completed in {time.time()-start_time} seconds!")


def _do_peak_clustering(self, detected_peaks, clustering):
def _do_peak_clustering(self, detected_peaks, params):
print(f"Number of peaks before clustering={len(detected_peaks)}")
if clustering == Clustering.HDBSCAN.name:
return self._do_hdbscan_clustering(detected_peaks)
elif clustering == Clustering.KMeans.name:
if params["name"] == Clustering.HDBSCAN.name:
return self._do_hdbscan_clustering(detected_peaks, params)
elif params["name"] == Clustering.KMeans.name:
return self._do_kmeans_clustering(detected_peaks)
else:
return detected_peaks


def _do_hdbscan_clustering(self, peakdata):
def _do_hdbscan_clustering(self, peakdata, params):
data = np.delete(peakdata, [3,4], axis=1)

hdbscan = HDBSCAN(min_cluster_size=2, store_centers="medoid")
hdbscan = HDBSCAN(min_cluster_size=2,
min_samples=2,
store_centers="medoid",
algorithm="auto",
cluster_selection_method="eom",
metric="euclidean")
hdbscan.fit(data)
print(f"Silhouette score of the clusters={silhouette_score(data, hdbscan.labels_)}")

if ("keep_ignored_labels" in params) and params["keep_ignored_labels"]:
selected_peaks = np.concatenate((hdbscan.medoids_, data[np.where(hdbscan.labels_==-1)]), axis=0)
else:
selected_peaks = hdbscan.medoids_
confidence = []
for medoid in hdbscan.medoids_:
confidence.append(peakdata[np.where((data == medoid).all(axis=1))[0].item(), -1])
return np.column_stack((hdbscan.medoids_, confidence))
for peak in selected_peaks:
confidence.append(peakdata[np.where((data == peak).all(axis=1))[0].item(), -1])
return np.column_stack((selected_peaks, confidence))


def _do_kmeans_clustering(self, peakdata):
Expand Down
3 changes: 2 additions & 1 deletion diffraction/WISH/bragg-detect/cnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Inorder to use the pre-trained Faster RCNN model inside mantid using an IDAaaS i
from cnn.BraggDetectCNN import BraggDetectCNN
model_weights = r'/mnt/ceph/auxiliary/wish/BraggDetect_FasterRCNN_Resnet50_Weights_v1.pt'
cnn_peaks_detector = BraggDetectCNN(model_weights_path=model_weights, batch_size=64)
cnn_peaks_detector.find_bragg_peaks(workspace='WISH00042730', output_ws_name="CNN_Peaks", conf_threshold=0.0, clustering="QLab", q_tol=0.05)
clustering_params = {"name":"QLab", "q_tol": 0.05}
cnn_peaks_detector.find_bragg_peaks(workspace='WISH00042730', output_ws_name="CNN_Peaks", conf_threshold=0.0, **clustering_params)
```
* If the above import is not working, check whether the `<local path>\diffraction\WISH` path is listed under `Python Script Directories` tab from `File->Manage User Directories`.

0 comments on commit 661c29e

Please sign in to comment.