Add test for calculating means

Cleaned function for calculating means in the reduction analysis and added unit test to check the filtering of widths and intensities is working. I removed all of the assertions because instead of stoping the code it's best to at least keep going and produce the table for means so that at least the user can have a clue for what happened.
mantidproject · Nov 29, 2024 · 52d7417 · 52d7417
1 parent 91e635f
commit 52d7417
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 85 deletions.
diff --git a/src/mvesuvio/analysis_reduction.py b/src/mvesuvio/analysis_reduction.py
@@ -413,34 +413,43 @@ def _create_summed_workspaces(self):
             )
 
     def _set_means_and_std(self):
-        """Calculate mean widths and intensities from tableWorkspace"""
-
-        fitParsTable = self._table_fit_results
-        widths = np.zeros((self._profiles_table.rowCount(), fitParsTable.rowCount()))
+        widths = np.zeros((self._profiles_table.rowCount(), self._table_fit_results.rowCount()))
         intensities = np.zeros(widths.shape)
+
         for i, label in enumerate(self._profiles_table.column("label")):
-            widths[i] = fitParsTable.column(f"{label} width")
-            intensities[i] = fitParsTable.column(f"{label} intensity")
-        (
-            meanWidths,
-            stdWidths,
-            meanIntensityRatios,
-            stdIntensityRatios,
-        ) = self.calculateMeansAndStds(widths, intensities)
-
-        assert (
-            len(meanWidths) == self._profiles_table.rowCount()
-        ), "Number of mean widths must match number of profiles!"
-
-        self._mean_widths = meanWidths
-        self._std_widths = stdWidths
-        self._mean_intensity_ratios = meanIntensityRatios
-        self._std_intensity_ratios = stdIntensityRatios
+            widths[i] = self._table_fit_results.column(f"{label} width")
+            intensities[i] = self._table_fit_results.column(f"{label} intensity")
+            self._set_means_and_std_arrays(widths, intensities)
 
         self._create_means_table()
         return
 
 
+    def _set_means_and_std_arrays(self, widths, intensities):
+        # Remove failed fits and masked spectra
+        non_zero_columns = np.any(widths!=0, axis=0)
+        widths = widths[:, non_zero_columns]
+        intensities = intensities[:, non_zero_columns]
+
+        widths_mean = np.mean(widths, axis=1).reshape(-1, 1)
+        widths_std = np.std(widths, axis=1).reshape(-1, 1)
+
+        widths_deviations = np.abs(widths - widths_mean)
+
+        # Remove width outliers
+        widths[widths_deviations > widths_std] = np.nan
+        intensities[widths_deviations > widths_std] = np.nan
+
+        # Use sum instead of nansum to propagate nans
+        intensities = intensities / intensities.sum(axis=0)
+
+        self._mean_widths = np.nanmean(widths, axis=1) 
+        self._std_widths = np.nanstd(widths, axis=1) 
+        self._mean_intensity_ratios = np.nanmean(intensities, axis=1) 
+        self._std_intensity_ratios = np.nanstd(intensities, axis=1) 
+        return
+
+
     def _create_means_table(self):
         table = CreateEmptyTableWorkspace(
             OutputWorkspace=self._workspace_being_fit.name() + "_means"
@@ -470,69 +479,6 @@ def _create_means_table(self):
         return table
 
 
-    def calculateMeansAndStds(self, widthsIn, intensitiesIn):
-        betterWidths, betterIntensities = self.filterWidthsAndIntensities(widthsIn, intensitiesIn)
-
-        meanWidths = np.nanmean(betterWidths, axis=1)
-        stdWidths = np.nanstd(betterWidths, axis=1)
-
-        meanIntensityRatios = np.nanmean(betterIntensities, axis=1)
-        stdIntensityRatios = np.nanstd(betterIntensities, axis=1)
-
-        return meanWidths, stdWidths, meanIntensityRatios, stdIntensityRatios
-
-
-    def filterWidthsAndIntensities(self, widthsIn, intensitiesIn):
-        """Puts nans in places to be ignored"""
-
-        widths = widthsIn.copy()  # Copy to avoid accidental changes in arrays
-        intensities = intensitiesIn.copy()
-
-        zeroSpecs = np.all(
-            widths == 0, axis=0
-        )  # Catches all failed fits, not just masked spectra
-        widths[:, zeroSpecs] = np.nan
-        intensities[:, zeroSpecs] = np.nan
-
-        meanWidths = np.nanmean(widths, axis=1)[:, np.newaxis]
-
-        widthDeviation = np.abs(widths - meanWidths)
-        stdWidths = np.nanstd(widths, axis=1)[:, np.newaxis]
-
-        # Put nan in places where width deviation is bigger than std
-        filterMask = widthDeviation > stdWidths
-        betterWidths = np.where(filterMask, np.nan, widths)
-
-        maskedIntensities = np.where(filterMask, np.nan, intensities)
-        betterIntensities = maskedIntensities / np.sum(
-            maskedIntensities, axis=0
-        )  # Not nansum()
-
-        # Keep this around in case it is needed again
-        # When trying to estimate HToMassIdxRatio and normalization fails, skip normalization
-        # if np.all(np.isnan(betterIntensities)) & IC.runningPreliminary:
-        #     assert IC.noOfMSIterations == 0, (
-        #         "Calculation of mean intensities failed, cannot proceed with MS correction."
-        #         "Try to run again with noOfMSIterations=0."
-        #     )
-        #     betterIntensities = maskedIntensities
-        # else:
-        #     pass
-
-        assert np.all(meanWidths != np.nan), "At least one mean of widths is nan!"
-        assert np.sum(filterMask) >= 1, "No widths survive filtering condition"
-        assert not (np.all(np.isnan(betterWidths))), "All filtered widths are nan"
-        assert not (np.all(np.isnan(betterIntensities))), "All filtered intensities are nan"
-        assert np.nanmax(betterWidths) != np.nanmin(
-            betterWidths
-        ), f"All filtered widths have the same value: {np.nanmin(betterWidths)}"
-        assert np.nanmax(betterIntensities) != np.nanmin(
-            betterIntensities
-        ), f"All filtered intensities have the same value: {np.nanmin(betterIntensities)}"
-
-        return betterWidths, betterIntensities
-
-
     def _fit_neutron_compton_profiles_to_row(self):
 
         if np.all(self._dataY[self._row_being_fit] == 0):

diff --git a/tests/unit/analysis/test_analysis_reduction.py b/tests/unit/analysis/test_analysis_reduction.py
@@ -350,6 +350,35 @@ def test_get_lorentzian_resolution(self):
         centers = np.array([11.3, 9.6, 14.7]).reshape(-1, 1)
         np.testing.assert_allclose(alg._get_lorentzian_resolution(centers), np.array([11, 12, 15]).reshape(-1, 1))
 
-
+
+    def test_set_means_and_std(self):
+        alg = VesuvioAnalysisRoutine()
+        alg._create_means_table = MagicMock()
+        alg._profiles_table = MagicMock(rowCount=MagicMock(return_value=2), column=MagicMock(return_value=['1.0', '12.0']))
+
+        def pick_column(arg):
+            table = {
+                '1.0 width': [5.6, 5.1, 0, 2, 5.4],
+                '12.0 width': [2.1, 1, 0, 2.3, 1.9],
+                '1.0 intensity': [7.8, 7.6, 0, 5, 7.3],
+                '12.0 intensity': [3.1, 2, 0, 3.2, 3.1],
+            }
+            return table[arg]
+
+        alg._table_fit_results = MagicMock(rowCount=MagicMock(return_value=5), column=MagicMock(side_effect=pick_column))
+
+        alg._set_means_and_std()
+
+        self.assertEqual(alg._table_fit_results.column.call_count, 4)
+        self.assertEqual(alg._mean_widths[0], np.mean([5.6, 5.1, 5.4]))
+        self.assertEqual(alg._std_widths[0], np.std([5.6, 5.1, 5.4]))
+        self.assertEqual(alg._mean_widths[1], np.mean([2.1, 2.3, 1.9]))
+        self.assertEqual(alg._std_widths[1], np.std([2.1, 2.3, 1.9]))
+        self.assertEqual(alg._mean_intensity_ratios[0], np.nanmean(np.array([7.8, 7.6, np.nan, np.nan, 7.3]) / np.array([7.8+3.1, np.nan, np.nan, np.nan, 7.3+3.1])))
+        self.assertEqual(alg._std_intensity_ratios[0], np.nanstd(np.array([7.8, 7.6, np.nan, np.nan, 7.3]) / np.array([7.8+3.1, np.nan, np.nan, np.nan, 7.3+3.1])))
+        self.assertEqual(alg._mean_intensity_ratios[1], np.nanmean(np.array([3.1, np.nan, np.nan, 3.2, 3.1]) / np.array([7.8+3.1, np.nan, np.nan, np.nan, 7.3+3.1])))
+        self.assertEqual(alg._std_intensity_ratios[1], np.nanstd(np.array([3.1, np.nan, np.nan, 3.2, 3.1]) / np.array([7.8+3.1, np.nan, np.nan, np.nan, 7.3+3.1])))
+
+
 if __name__ == "__main__":
     unittest.main()