diff --git a/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py b/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py index 83b7794..c18bc94 100644 --- a/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py +++ b/diffraction/WISH/bragg-detect/cnn/BraggDetectCNN.py @@ -47,23 +47,26 @@ def __init__(self, model_weights_path, batch_size=64, workers=0, iou_threshold=0 self.iou_threshold = iou_threshold - def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold=0.0, clustering=Clustering.QLab.name, q_tol=0.05): + def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold=0.0, **kwargs): """ Find bragg peaks using the pre trained FasterRCNN model and create a peaks workspace :param workspace: Workspace name or the object of Workspace from WISH, ex: "WISH0042730" :param output_ws_name: Name of the peaks workspace :param conf_threshold: Confidence threshold to filter peaks inferred from RCNN - :param clustering: Clustering method to filter and merge the peaks ex: QLab, HDBSCAN, KMeans - :param q_tol: QLab tolerance to remove duplicate peaks, it will onlye be useful when clustering is QLab + :param kwargs: variable keyword params for clustering. default is {"name": "Qlab", "q_tol": 0.05} + Ex: {"name": "HDBSCAN", "keep_ignored_labels": True} """ + clustering_params = {"name": "QLab", "q_tol": 0.05 } + clustering_params.update(kwargs) + start_time = time.time() data_set, predicted_indices = self._do_cnn_inferencing(workspace) filtered_indices = predicted_indices[predicted_indices[:, -1] > conf_threshold] #Do Clustering - print(f"Starting peak clustering with {clustering} method..") - clustered_peaks = self._do_peak_clustering(filtered_indices, clustering) + print(f"Starting peak clustering with { clustering_params['name'] } method..") + clustered_peaks = self._do_peak_clustering(filtered_indices, clustering_params) print(f"Number of peaks after clustering is={len(clustered_peaks)}") cluster_indices_rounded = np.round(clustered_peaks[:, :3]).astype(int) @@ -71,35 +74,44 @@ def find_bragg_peaks(self, workspace, output_ws_name="CNN_Peaks", conf_threshold for ipk, pk in enumerate(peaksws): pk.setIntensity(clustered_peaks[ipk, -1]) - if clustering == Clustering.QLab.name: + if clustering_params["name"] == Clustering.QLab.name: #Filter peaks by qlab - BaseSX.remove_duplicate_peaks_by_qlab(peaksws, q_tol) + BaseSX.remove_duplicate_peaks_by_qlab(peaksws, clustering_params["q_tol"]) data_set.delete_rebunched_ws() print(f"Bragg peaks finding from FasterRCNN model is completed in {time.time()-start_time} seconds!") - def _do_peak_clustering(self, detected_peaks, clustering): + def _do_peak_clustering(self, detected_peaks, params): print(f"Number of peaks before clustering={len(detected_peaks)}") - if clustering == Clustering.HDBSCAN.name: - return self._do_hdbscan_clustering(detected_peaks) - elif clustering == Clustering.KMeans.name: + if params["name"] == Clustering.HDBSCAN.name: + return self._do_hdbscan_clustering(detected_peaks, params) + elif params["name"] == Clustering.KMeans.name: return self._do_kmeans_clustering(detected_peaks) else: return detected_peaks - def _do_hdbscan_clustering(self, peakdata): + def _do_hdbscan_clustering(self, peakdata, params): data = np.delete(peakdata, [3,4], axis=1) - hdbscan = HDBSCAN(min_cluster_size=2, store_centers="medoid") + hdbscan = HDBSCAN(min_cluster_size=2, + min_samples=2, + store_centers="medoid", + algorithm="auto", + cluster_selection_method="eom", + metric="euclidean") hdbscan.fit(data) print(f"Silhouette score of the clusters={silhouette_score(data, hdbscan.labels_)}") + if ("keep_ignored_labels" in params) and params["keep_ignored_labels"]: + selected_peaks = np.concatenate((hdbscan.medoids_, data[np.where(hdbscan.labels_==-1)]), axis=0) + else: + selected_peaks = hdbscan.medoids_ confidence = [] - for medoid in hdbscan.medoids_: - confidence.append(peakdata[np.where((data == medoid).all(axis=1))[0].item(), -1]) - return np.column_stack((hdbscan.medoids_, confidence)) + for peak in selected_peaks: + confidence.append(peakdata[np.where((data == peak).all(axis=1))[0].item(), -1]) + return np.column_stack((selected_peaks, confidence)) def _do_kmeans_clustering(self, peakdata): diff --git a/diffraction/WISH/bragg-detect/cnn/README.md b/diffraction/WISH/bragg-detect/cnn/README.md index 4045c19..338427a 100644 --- a/diffraction/WISH/bragg-detect/cnn/README.md +++ b/diffraction/WISH/bragg-detect/cnn/README.md @@ -12,6 +12,7 @@ Inorder to use the pre-trained Faster RCNN model inside mantid using an IDAaaS i from cnn.BraggDetectCNN import BraggDetectCNN model_weights = r'/mnt/ceph/auxiliary/wish/BraggDetect_FasterRCNN_Resnet50_Weights_v1.pt' cnn_peaks_detector = BraggDetectCNN(model_weights_path=model_weights, batch_size=64) -cnn_peaks_detector.find_bragg_peaks(workspace='WISH00042730', output_ws_name="CNN_Peaks", conf_threshold=0.0, clustering="QLab", q_tol=0.05) +clustering_params = {"name":"QLab", "q_tol": 0.05} +cnn_peaks_detector.find_bragg_peaks(workspace='WISH00042730', output_ws_name="CNN_Peaks", conf_threshold=0.0, **clustering_params) ``` * If the above import is not working, check whether the `\diffraction\WISH` path is listed under `Python Script Directories` tab from `File->Manage User Directories`.