From 741df7b05864705977f3b3dc8d71c58aaf59ea13 Mon Sep 17 00:00:00 2001 From: Waruna Wickramasingha Date: Wed, 18 Dec 2024 16:32:14 +0000 Subject: [PATCH] KMeans removed and updated to pass kwargs --- .../WISH/bragg-detect/cnn/BraggDetectCNN.py | 88 +++++++------------ diffraction/WISH/bragg-detect/cnn/README.md | 5 +- 2 files changed, 35 insertions(+), 58 deletions(-) diff --git a/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py b/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py index c18bc94..13974b6 100644 --- a/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py +++ b/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py @@ -9,14 +9,12 @@ from Diffraction.single_crystal.base_sx import BaseSX import time from enum import Enum -from sklearn.cluster import KMeans, HDBSCAN -from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.cluster import HDBSCAN from sklearn.metrics import silhouette_score class Clustering(Enum): QLab = 1 HDBSCAN = 2 - KMeans = 3 class BraggDetectCNN: @@ -47,94 +45,74 @@ def __init__(self, model_weights_path, batch_size=64, workers=0, iou_threshold=0 self.iou_threshold = iou_threshold - def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold=0.0, **kwargs): + def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold=0.0, clustering=Clustering.QLab.name, **kwargs): """ Find bragg peaks using the pre trained FasterRCNN model and create a peaks workspace :param workspace: Workspace name or the object of Workspace from WISH, ex: "WISH0042730" :param output_ws_name: Name of the peaks workspace :param conf_threshold: Confidence threshold to filter peaks inferred from RCNN - :param kwargs: variable keyword params for clustering. default is {"name": "Qlab", "q_tol": 0.05} - Ex: {"name": "HDBSCAN", "keep_ignored_labels": True} + :param clustering: name of clustering method. Default is QLab and allowed + :param kwargs: variable keyword params for clustering methods """ - clustering_params = {"name": "QLab", "q_tol": 0.05 } - clustering_params.update(kwargs) - start_time = time.time() data_set, predicted_indices = self._do_cnn_inferencing(workspace) filtered_indices = predicted_indices[predicted_indices[:, -1] > conf_threshold] #Do Clustering - print(f"Starting peak clustering with { clustering_params['name'] } method..") - clustered_peaks = self._do_peak_clustering(filtered_indices, clustering_params) - print(f"Number of peaks after clustering is={len(clustered_peaks)}") - + print(f"Starting peak clustering with {clustering} method..") + clustered_peaks = self._do_peak_clustering(filtered_indices, clustering, **kwargs) cluster_indices_rounded = np.round(clustered_peaks[:, :3]).astype(int) peaksws = createPeaksWorkspaceFromIndices(data_set.get_workspace(), output_ws_name, cluster_indices_rounded, data_set.get_ws_as_3d_array()) for ipk, pk in enumerate(peaksws): pk.setIntensity(clustered_peaks[ipk, -1]) - if clustering_params["name"] == Clustering.QLab.name: + if clustering == Clustering.QLab.name: #Filter peaks by qlab - BaseSX.remove_duplicate_peaks_by_qlab(peaksws, clustering_params["q_tol"]) + clustering_params = {"q_tol": 0.05 } + clustering_params.update(kwargs) + BaseSX.remove_duplicate_peaks_by_qlab(peaksws, **clustering_params) + + print(f"Number of peaks after clustering is = {len(peaksws)}") data_set.delete_rebunched_ws() - print(f"Bragg peaks finding from FasterRCNN model is completed in {time.time()-start_time} seconds!") + print(f"Bragg peaks finding from FasterRCNN model is completed in {time.time()-start_time:.2f} seconds!") - def _do_peak_clustering(self, detected_peaks, params): - print(f"Number of peaks before clustering={len(detected_peaks)}") - if params["name"] == Clustering.HDBSCAN.name: - return self._do_hdbscan_clustering(detected_peaks, params) - elif params["name"] == Clustering.KMeans.name: - return self._do_kmeans_clustering(detected_peaks) + def _do_peak_clustering(self, detected_peaks, clustering, **kwargs): + print(f"Number of peaks before clustering = {len(detected_peaks)}") + if clustering == Clustering.HDBSCAN.name: + return self._do_hdbscan_clustering(detected_peaks, **kwargs) else: return detected_peaks - def _do_hdbscan_clustering(self, peakdata, params): + def _do_hdbscan_clustering(self, peakdata, keep_ignored_labels=True, **kwargs): data = np.delete(peakdata, [3,4], axis=1) - - hdbscan = HDBSCAN(min_cluster_size=2, - min_samples=2, - store_centers="medoid", - algorithm="auto", - cluster_selection_method="eom", - metric="euclidean") + if ("keep_ignored_labels" in kwargs): + keep_ignored_labels = kwargs.pop("keep_ignored_labels") + + hdbscan_params = {"min_cluster_size": 2, + "min_samples": 2, + "store_centers" : "medoid", + "algorithm": "auto", + "cluster_selection_method": "eom", + "metric": "euclidean" + } + hdbscan_params.update(kwargs) + hdbscan = HDBSCAN(**hdbscan_params) hdbscan.fit(data) print(f"Silhouette score of the clusters={silhouette_score(data, hdbscan.labels_)}") - if ("keep_ignored_labels" in params) and params["keep_ignored_labels"]: - selected_peaks = np.concatenate((hdbscan.medoids_, data[np.where(hdbscan.labels_==-1)]), axis=0) + if keep_ignored_labels: + selected_peaks = np.concatenate((hdbscan.medoids_, data[np.where(hdbscan.labels_==-1)]), axis=0) else: selected_peaks = hdbscan.medoids_ confidence = [] for peak in selected_peaks: confidence.append(peakdata[np.where((data == peak).all(axis=1))[0].item(), -1]) return np.column_stack((selected_peaks, confidence)) - - - def _do_kmeans_clustering(self, peakdata): - stdScaler = StandardScaler() - peakdata[:, 3] = stdScaler.fit_transform(peakdata[:, 3].reshape(-1,1)).flatten() - minmaxScaler = MinMaxScaler() - peakdata[:, 4] = minmaxScaler.fit_transform(peakdata[:, 4].reshape(-1, 1)).flatten() - - WCSS = [] - cluster_range = range(1, len(peakdata), 2) - for i in cluster_range: - model = KMeans(n_clusters = i, init = 'k-means++') - model.fit(peakdata) - WCSS.append(model.inertia_) - - first_derivative = np.diff(WCSS, n=1) - elbow_point = np.argmax(first_derivative) + 1 - print(f"Selected elbow point={elbow_point} for KMeans clustering") - finalmodel = KMeans(n_clusters = elbow_point, init = "k-means++", max_iter = 500, n_init = 10, random_state = 0) - finalmodel.fit_predict(peakdata) - print(f"Silhouette score of the clusters={silhouette_score(peakdata, finalmodel.labels_)}") - return finalmodel.cluster_centers_ - + def _do_cnn_inferencing(self, workspace): data_set = WISHWorkspaceDataSet(workspace) diff --git a/diffraction/WISH/bragg-detect/cnn/README.md b/diffraction/WISH/bragg-detect/cnn/README.md index 338427a..6760580 100644 --- a/diffraction/WISH/bragg-detect/cnn/README.md +++ b/diffraction/WISH/bragg-detect/cnn/README.md @@ -7,12 +7,11 @@ Inorder to use the pre-trained Faster RCNN model inside mantid using an IDAaaS i * Launch Mantid workbench nightly from Applications->Software->Mantid->Mantid Workbench Nightly * Download `scriptrepository\diffraction\WISH` directory from mantid's script repository as instructed here https://docs.mantidproject.org/nightly/workbench/scriptrepository.html * Check whether `\diffraction\WISH` path is listed under `Python Script Directories` tab from `File->Manage User Directories` of Mantid workbench. -* Below is an example code snippet to test the code. It will create a peaks workspace with the inferred peaks from the cnn. The valid values for the clustering are QLab, HDBSCAN, KMeans. +* Below is an example code snippet to test the code. It will create a peaks workspace with the inferred peaks from the cnn. The valid values for the clustering are QLab or HDBSCAN. ```python from cnn.BraggDetectCNN import BraggDetectCNN model_weights = r'/mnt/ceph/auxiliary/wish/BraggDetect_FasterRCNN_Resnet50_Weights_v1.pt' cnn_peaks_detector = BraggDetectCNN(model_weights_path=model_weights, batch_size=64) -clustering_params = {"name":"QLab", "q_tol": 0.05} -cnn_peaks_detector.find_bragg_peaks(workspace='WISH00042730', output_ws_name="CNN_Peaks", conf_threshold=0.0, **clustering_params) +cnn_peaks_detector.find_bragg_peaks(workspace='WISH00042730', output_ws_name="CNN_Peaks", conf_threshold=0.0, clustering="QLab") ``` * If the above import is not working, check whether the `\diffraction\WISH` path is listed under `Python Script Directories` tab from `File->Manage User Directories`.