Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Added KZ spectrum smoothing and tests #631

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions mzLib/MassSpectrometry/MzSpectra/MzSpectrum.cs
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,14 @@ public void ReplaceXbyApplyingFunction(Func<MzPeak, double> convertor)
Buffer.BlockCopy(YArray, 0, data, size * Size, size * Size);
return data;
}
public virtual double[,] CopyTo2DArray(double[] xArray, double[] yArray)
{
double[,] data = new double[2, Size];
const int size = sizeof(double);
Buffer.BlockCopy(xArray, 0, data, 0, size * Size);
Buffer.BlockCopy(yArray, 0, data, size * Size, size * Size);
return data;
}

public double? GetClosestPeakXvalue(double x)
{
Expand Down Expand Up @@ -811,5 +819,80 @@ private MzPeak GeneratePeak(int index)
{
return new MzPeak(XArray[index], YArray[index]);
}
/// <summary>
/// This method smooths a mass spectrum by using a Kolmogorov–Zurbenko (KZ).
/// High quality spectral smoothing is the first step in my deconvolultion algorithm.
/// Algorithm employed is the Kolmogorov-Zurbenko algorithm originally written in C for R:
/// https://github.com/cran/kza/blob/master/src/kz.c
/// </summary>
public double[,] SmoothSpectrumKZ(int window, int iterations)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's use an informative name for window and have variable explanations for window and iterations before the method with recommendations.

{
double[] smoothedIntArray = KZ1D(YArray, window, iterations);
if(smoothedIntArray.Length < YArray.Length)
{
throw new ArgumentException("output length is unequal to input length");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's change this to an mzlib exception with a message. add a unit test for that exception to make sure it happens and to make sure the message is correct. this gets passed to metamorpheus and are really helpful

}
return CopyTo2DArray(XArray, smoothedIntArray);
}
/// <summary>
/// Implements the KZ filter for one-dimensional data.
/// </summary>
/// <param name="x"></param><summary>double array (double[]) of the intensity values.</summary>
/// <param name="window"></param><summary>The window size over which to perform the filtering.</summary>
/// <param name="iterations"></param><summary>The number of smoothering iterations to perform.</summary>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smoothering?

/// <returns></returns>
public static double[] KZ1D(double[] x, int window, int iterations)
{
int m = 2 * window + 1;
int p = (m - 1) / 2;

// iterations are performed in this function. The result from the first iteration is
// copied and the next iteration is performed on the result of the first one.
// Note that the code I ported this function from was initially written in R,
// which intentionally doesn't use reference semantics. So I'm sure there's a way to
// optimize this procedure, and I'm open to suggestions.

double[] result = new double[x.Length];
double[] xCopy = new double[x.Length];
// remember that block copy requires the total number of bytes to copy as the
// last parameter.
Buffer.BlockCopy(x, 0, xCopy, 0, x.Length * sizeof(double));

for(int k = 0; k < iterations; k++)
{
for(int i = 0; i < x.Length; i++)
{
result[i] = Mavg1D(xCopy, i, p);
}
Buffer.BlockCopy(result, 0, xCopy, 0, result.Length * sizeof(double));
}
return result;
}
/// <summary>
/// Computes the moving average for a given window. Used in conjuction with the KZ1D method.
/// </summary>
/// <param name="v"></param><summary>The source data array.</summary>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's use more informative variable names here for v and w at least

/// <param name="col"></param><summary>The starting index in the source data.</summary>
/// <param name="w"></param><summary>The length of the window over which to calculate the moving average.</summary>
/// <returns></returns>
private static double Mavg1D(double[] v, int col, int w)
{
int startcol = col - w > 0 ? col - w : 0;
int endcol = col + w < v.Length - 1 ? col + w + 1 : v.Length - 1;

int z = 0;
double s = 0.0;

for(int i = startcol; i < endcol; i++)
{
z++;
s += v[i];
}
if (z == 0)
{
return double.NaN;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please cover this with a unit test

}
return s / (double)z;
}
}
}
84 changes: 84 additions & 0 deletions mzLib/Test/TestSpectrumSmoothing.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
using System;
using NUnit.Framework;
using System.Diagnostics.CodeAnalysis;
using MassSpectrometry;
using System.Linq;
using MathNet;
using System.Collections.Generic;

namespace Test
{
[TestFixture]
[ExcludeFromCodeCoverage]
public class TestSpectrumSmoothing
{
[Test]
// Standard case.
[TestCase(3, 2)]
// These next two cases are to show what happens when window is as long or
// longer than signal. They effectively return the average of the whole scan.
[TestCase(50, 2)]
[TestCase(51, 2)]
public void TestKZ1D(int windowSize, int iterations)
{
// fuzzy square wave. i = 1 from 25 to 34 and 0 at all other points.
double[] testArray = new double[50];
double[] squareWave = new double[10];
// populate squareWave with 1's.
for(int i = 0; i < squareWave.Length; i++)
{
squareWave[i] = 1D;
}
Buffer.BlockCopy(squareWave, 0, testArray, sizeof(double)*24, sizeof(double) * squareWave.Length);

// add noise to the square wave:
// use a set seed to make things reproducible
Random rnd = new(1551);
for(int i = 0; i < testArray.Length; i++)
{
testArray[i] += rnd.NextDouble()/10;
}
double[] smoothedData = MzSpectrum.KZ1D(testArray, windowSize, iterations);

// Testing length of output.
Assert.AreEqual(50, smoothedData.Length);

// A better test might be to perform a cross-correlation with the original
// square wave and see a high cross-correlation value from indices 24-35.
}
[Test]
[TestCase(5, 2)]
public void TestSmoothSpectra(int windowSize, int iterations)
{
// construct an mzspectrum object that is just a square wave.
// square wave construction for the intensity values
double[] testArray = new double[50];
double[] squareWave = new double[10];
// populate squareWave with 1's.
for (int i = 0; i < squareWave.Length; i++)
{
squareWave[i] = 1D;
}
Buffer.BlockCopy(squareWave, 0, testArray, sizeof(double) * 24, sizeof(double) * squareWave.Length);
// m/z values will just be the sequence from 0 to 49.
double[] mzVals = new double[testArray.Length];
for(int i = 0; i < mzVals.Length; i++)
{
mzVals[i] = (double)i;
}

MzSpectrum testMzSpectrum = new(mzVals, testArray, true);

double[,] result = testMzSpectrum.SmoothSpectrumKZ(windowSize, iterations);
// scan average should be zero based on window
double shouldBeZero = result[1,0];
// original scan has a zero at 23, but the filtered scan should
// have a non-zero value.
double shouldNotBeZero = result[1,23];
Assert.AreEqual(50, result.GetLength(1));
Assert.IsTrue(shouldBeZero == 0);
Assert.IsTrue(shouldNotBeZero > 0);
}
}

}