-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Added KZ spectrum smoothing and tests #631
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -636,6 +636,14 @@ public void ReplaceXbyApplyingFunction(Func<MzPeak, double> convertor) | |
Buffer.BlockCopy(YArray, 0, data, size * Size, size * Size); | ||
return data; | ||
} | ||
public virtual double[,] CopyTo2DArray(double[] xArray, double[] yArray) | ||
{ | ||
double[,] data = new double[2, Size]; | ||
const int size = sizeof(double); | ||
Buffer.BlockCopy(xArray, 0, data, 0, size * Size); | ||
Buffer.BlockCopy(yArray, 0, data, size * Size, size * Size); | ||
return data; | ||
} | ||
|
||
public double? GetClosestPeakXvalue(double x) | ||
{ | ||
|
@@ -811,5 +819,80 @@ private MzPeak GeneratePeak(int index) | |
{ | ||
return new MzPeak(XArray[index], YArray[index]); | ||
} | ||
/// <summary> | ||
/// This method smooths a mass spectrum by using a Kolmogorov–Zurbenko (KZ). | ||
/// High quality spectral smoothing is the first step in my deconvolultion algorithm. | ||
/// Algorithm employed is the Kolmogorov-Zurbenko algorithm originally written in C for R: | ||
/// https://github.com/cran/kza/blob/master/src/kz.c | ||
/// </summary> | ||
public double[,] SmoothSpectrumKZ(int window, int iterations) | ||
{ | ||
double[] smoothedIntArray = KZ1D(YArray, window, iterations); | ||
if(smoothedIntArray.Length < YArray.Length) | ||
{ | ||
throw new ArgumentException("output length is unequal to input length"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's change this to an mzlib exception with a message. add a unit test for that exception to make sure it happens and to make sure the message is correct. this gets passed to metamorpheus and are really helpful |
||
} | ||
return CopyTo2DArray(XArray, smoothedIntArray); | ||
} | ||
/// <summary> | ||
/// Implements the KZ filter for one-dimensional data. | ||
/// </summary> | ||
/// <param name="x"></param><summary>double array (double[]) of the intensity values.</summary> | ||
/// <param name="window"></param><summary>The window size over which to perform the filtering.</summary> | ||
/// <param name="iterations"></param><summary>The number of smoothering iterations to perform.</summary> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. smoothering? |
||
/// <returns></returns> | ||
public static double[] KZ1D(double[] x, int window, int iterations) | ||
{ | ||
int m = 2 * window + 1; | ||
int p = (m - 1) / 2; | ||
|
||
// iterations are performed in this function. The result from the first iteration is | ||
// copied and the next iteration is performed on the result of the first one. | ||
// Note that the code I ported this function from was initially written in R, | ||
// which intentionally doesn't use reference semantics. So I'm sure there's a way to | ||
// optimize this procedure, and I'm open to suggestions. | ||
|
||
double[] result = new double[x.Length]; | ||
double[] xCopy = new double[x.Length]; | ||
// remember that block copy requires the total number of bytes to copy as the | ||
// last parameter. | ||
Buffer.BlockCopy(x, 0, xCopy, 0, x.Length * sizeof(double)); | ||
|
||
for(int k = 0; k < iterations; k++) | ||
{ | ||
for(int i = 0; i < x.Length; i++) | ||
{ | ||
result[i] = Mavg1D(xCopy, i, p); | ||
} | ||
Buffer.BlockCopy(result, 0, xCopy, 0, result.Length * sizeof(double)); | ||
} | ||
return result; | ||
} | ||
/// <summary> | ||
/// Computes the moving average for a given window. Used in conjuction with the KZ1D method. | ||
/// </summary> | ||
/// <param name="v"></param><summary>The source data array.</summary> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's use more informative variable names here for v and w at least |
||
/// <param name="col"></param><summary>The starting index in the source data.</summary> | ||
/// <param name="w"></param><summary>The length of the window over which to calculate the moving average.</summary> | ||
/// <returns></returns> | ||
private static double Mavg1D(double[] v, int col, int w) | ||
{ | ||
int startcol = col - w > 0 ? col - w : 0; | ||
int endcol = col + w < v.Length - 1 ? col + w + 1 : v.Length - 1; | ||
|
||
int z = 0; | ||
double s = 0.0; | ||
|
||
for(int i = startcol; i < endcol; i++) | ||
{ | ||
z++; | ||
s += v[i]; | ||
} | ||
if (z == 0) | ||
{ | ||
return double.NaN; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please cover this with a unit test |
||
} | ||
return s / (double)z; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
using System; | ||
using NUnit.Framework; | ||
using System.Diagnostics.CodeAnalysis; | ||
using MassSpectrometry; | ||
using System.Linq; | ||
using MathNet; | ||
using System.Collections.Generic; | ||
|
||
namespace Test | ||
{ | ||
[TestFixture] | ||
[ExcludeFromCodeCoverage] | ||
public class TestSpectrumSmoothing | ||
{ | ||
[Test] | ||
// Standard case. | ||
[TestCase(3, 2)] | ||
// These next two cases are to show what happens when window is as long or | ||
// longer than signal. They effectively return the average of the whole scan. | ||
[TestCase(50, 2)] | ||
[TestCase(51, 2)] | ||
public void TestKZ1D(int windowSize, int iterations) | ||
{ | ||
// fuzzy square wave. i = 1 from 25 to 34 and 0 at all other points. | ||
double[] testArray = new double[50]; | ||
double[] squareWave = new double[10]; | ||
// populate squareWave with 1's. | ||
for(int i = 0; i < squareWave.Length; i++) | ||
{ | ||
squareWave[i] = 1D; | ||
} | ||
Buffer.BlockCopy(squareWave, 0, testArray, sizeof(double)*24, sizeof(double) * squareWave.Length); | ||
|
||
// add noise to the square wave: | ||
// use a set seed to make things reproducible | ||
Random rnd = new(1551); | ||
for(int i = 0; i < testArray.Length; i++) | ||
{ | ||
testArray[i] += rnd.NextDouble()/10; | ||
} | ||
double[] smoothedData = MzSpectrum.KZ1D(testArray, windowSize, iterations); | ||
|
||
// Testing length of output. | ||
Assert.AreEqual(50, smoothedData.Length); | ||
|
||
// A better test might be to perform a cross-correlation with the original | ||
// square wave and see a high cross-correlation value from indices 24-35. | ||
} | ||
[Test] | ||
[TestCase(5, 2)] | ||
public void TestSmoothSpectra(int windowSize, int iterations) | ||
{ | ||
// construct an mzspectrum object that is just a square wave. | ||
// square wave construction for the intensity values | ||
double[] testArray = new double[50]; | ||
double[] squareWave = new double[10]; | ||
// populate squareWave with 1's. | ||
for (int i = 0; i < squareWave.Length; i++) | ||
{ | ||
squareWave[i] = 1D; | ||
} | ||
Buffer.BlockCopy(squareWave, 0, testArray, sizeof(double) * 24, sizeof(double) * squareWave.Length); | ||
// m/z values will just be the sequence from 0 to 49. | ||
double[] mzVals = new double[testArray.Length]; | ||
for(int i = 0; i < mzVals.Length; i++) | ||
{ | ||
mzVals[i] = (double)i; | ||
} | ||
|
||
MzSpectrum testMzSpectrum = new(mzVals, testArray, true); | ||
|
||
double[,] result = testMzSpectrum.SmoothSpectrumKZ(windowSize, iterations); | ||
// scan average should be zero based on window | ||
double shouldBeZero = result[1,0]; | ||
// original scan has a zero at 23, but the filtered scan should | ||
// have a non-zero value. | ||
double shouldNotBeZero = result[1,23]; | ||
Assert.AreEqual(50, result.GetLength(1)); | ||
Assert.IsTrue(shouldBeZero == 0); | ||
Assert.IsTrue(shouldNotBeZero > 0); | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
let's use an informative name for window and have variable explanations for window and iterations before the method with recommendations.