From 985caeef2e4ea0a4b5fbfa5e04be503a3c42e403 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Tue, 1 Oct 2024 13:55:41 +0200 Subject: [PATCH] feat: add xBoxPlotWithOutliers mehtod to deal with possible outliers (#260) This removes the previous option in xBoxPlot that would have yield to different types --- .../__snapshots__/index.test.ts.snap | 1 + src/x/__tests__/xBoxPlot.test.ts | 20 --- src/x/__tests__/xBoxPlotWithOutliers.test.ts | 116 ++++++++++++++++++ src/x/index.ts | 1 + src/x/xBoxPlot.ts | 26 +--- src/x/xBoxPlotWithOutliers.ts | 43 +++++++ 6 files changed, 162 insertions(+), 45 deletions(-) create mode 100644 src/x/__tests__/xBoxPlotWithOutliers.test.ts create mode 100644 src/x/xBoxPlotWithOutliers.ts diff --git a/src/__tests__/__snapshots__/index.test.ts.snap b/src/__tests__/__snapshots__/index.test.ts.snap index 68ec1d0b..bbef2f6a 100644 --- a/src/__tests__/__snapshots__/index.test.ts.snap +++ b/src/__tests__/__snapshots__/index.test.ts.snap @@ -14,6 +14,7 @@ exports[`test existence of exported functions 1`] = ` "xApplyFunctionStr", "xAutoCorrelation", "xBoxPlot", + "xBoxPlotWithOutliers", "xCheck", "xCheckLengths", "xCorrelation", diff --git a/src/x/__tests__/xBoxPlot.test.ts b/src/x/__tests__/xBoxPlot.test.ts index 8b0e18ac..b80f23e6 100644 --- a/src/x/__tests__/xBoxPlot.test.ts +++ b/src/x/__tests__/xBoxPlot.test.ts @@ -10,7 +10,6 @@ test('test xBoxPlot even', () => { q3: 8.5, min: 0, max: 11, - outliers: [], }); }); @@ -22,7 +21,6 @@ test('test xBoxPlot even small', () => { q3: 4, min: 0, max: 5, - outliers: [], }); }); @@ -34,7 +32,6 @@ test('test xBoxPlot odd', () => { q3: 8, min: 0, max: 10, - outliers: [], }); }); @@ -46,7 +43,6 @@ test('test xBoxPlot odd small', () => { q3: 3.5, min: 0, max: 4, - outliers: [], }); }); @@ -73,7 +69,6 @@ test('test xBoxPlot with one element', () => { q3: 42, min: 42, max: 42, - outliers: [], }); }); @@ -86,7 +81,6 @@ test('test xBoxPlot with 2 elements', () => { q3: 44, min: 40, max: 44, - outliers: [], }); }); @@ -99,7 +93,6 @@ test('test xBoxPlot with 3 elements', () => { q3: 44, min: 40, max: 44, - outliers: [], }); }); @@ -111,18 +104,5 @@ test('outliers', () => { q3: 8, min: 0, max: 100, - outliers: [], - }); -}); - -test('outliers', () => { - const array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 100]; - expect(xBoxPlot(array, { calculateOutliers: true })).toStrictEqual({ - q1: 2, - median: 5, - q3: 8, - min: 0, - max: 9, - outliers: [100], }); }); diff --git a/src/x/__tests__/xBoxPlotWithOutliers.test.ts b/src/x/__tests__/xBoxPlotWithOutliers.test.ts new file mode 100644 index 00000000..19b7f220 --- /dev/null +++ b/src/x/__tests__/xBoxPlotWithOutliers.test.ts @@ -0,0 +1,116 @@ +import { expect, test } from 'vitest'; + +import { xBoxPlotWithOutliers } from '../xBoxPlotWithOutliers'; + +test('test xBoxPlotWithOutliers even', () => { + const array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + expect(xBoxPlotWithOutliers(array)).toStrictEqual({ + q1: 2.5, + median: 5.5, + q3: 8.5, + min: 0, + max: 11, + outliers: [], + }); +}); + +test('test xBoxPlotWithOutliers even small', () => { + const array = [0, 1, 2, 3, 4, 5]; + expect(xBoxPlotWithOutliers(array)).toStrictEqual({ + q1: 1, + median: 2.5, + q3: 4, + min: 0, + max: 5, + outliers: [], + }); +}); + +test('test xBoxPlotWithOutliers odd', () => { + const array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + expect(xBoxPlotWithOutliers(array)).toStrictEqual({ + q1: 2, + median: 5, + q3: 8, + min: 0, + max: 10, + outliers: [], + }); +}); + +test('test xBoxPlotWithOutliers odd small', () => { + const array = [0, 1, 2, 3, 4]; + expect(xBoxPlotWithOutliers(array)).toStrictEqual({ + q1: 0.5, + median: 2, + q3: 3.5, + min: 0, + max: 4, + outliers: [], + }); +}); + +test('test xBoxPlotWithOutliers too small', () => { + const array = [0, 1, 2, 3]; + expect(() => xBoxPlotWithOutliers(array)).toThrow( + 'can not calculate info if array contains less than 5 elements', + ); +}); + +test('test xBoxPlotWithOutliers with one element', () => { + const array = [42]; + expect(() => xBoxPlotWithOutliers(array)).toThrow( + 'can not calculate info if array contains less than 5 elements', + ); +}); + +test('test xBoxPlotWithOutliers with one element', () => { + const array = [42]; + const result = xBoxPlotWithOutliers(array, { allowSmallArray: true }); + expect(result).toStrictEqual({ + q1: 42, + median: 42, + q3: 42, + min: 42, + max: 42, + outliers: [], + }); +}); + +test('test xBoxPlotWithOutliers with 2 elements', () => { + const array = [40, 44]; + const result = xBoxPlotWithOutliers(array, { allowSmallArray: true }); + expect(result).toStrictEqual({ + q1: 40, + median: 42, + q3: 44, + min: 40, + max: 44, + outliers: [], + }); +}); + +test('test xBoxPlotWithOutliers with 3 elements', () => { + const array = [40, 42, 44]; + const result = xBoxPlotWithOutliers(array, { allowSmallArray: true }); + expect(result).toStrictEqual({ + q1: 40, + median: 42, + q3: 44, + min: 40, + max: 44, + outliers: [], + }); +}); + +test('outliers', () => { + const array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 100]; + expect(xBoxPlotWithOutliers(array)).toStrictEqual({ + q1: 2, + median: 5, + q3: 8, + min: 0, + max: 9, + outliers: [100], + }); +}); diff --git a/src/x/index.ts b/src/x/index.ts index 6d298e77..67dd2d45 100644 --- a/src/x/index.ts +++ b/src/x/index.ts @@ -6,6 +6,7 @@ export * from './xAdd'; export * from './xApplyFunctionStr'; export * from './xAutoCorrelation'; export * from './xBoxPlot'; +export * from './xBoxPlotWithOutliers'; export * from './xCheck'; export * from './xCheckLengths'; export * from './xCorrelation'; diff --git a/src/x/xBoxPlot.ts b/src/x/xBoxPlot.ts index 6eeef07d..b6925322 100644 --- a/src/x/xBoxPlot.ts +++ b/src/x/xBoxPlot.ts @@ -6,12 +6,6 @@ export interface XBoxPlotOptions { * @default false */ allowSmallArray?: boolean; - - /** - * Calculate outliers (value < min-1.5IQR or value > max+1.5IQR). The min and max are recalculated without the outliers. - * @default false - */ - calculateOutliers?: boolean; } export interface XBoxPlot { @@ -20,7 +14,6 @@ export interface XBoxPlot { q3: number; min: number; max: number; - outliers: number[]; } /** @@ -32,7 +25,7 @@ export function xBoxPlot( array: NumberArray, options: XBoxPlotOptions = {}, ): XBoxPlot { - const { allowSmallArray = false, calculateOutliers = false } = options; + const { allowSmallArray = false } = options; if (array.length < 5) { if (allowSmallArray) { if (array.length === 0) { @@ -53,7 +46,6 @@ export function xBoxPlot( q3: 0, min: array[0], max: array.at(-1) as number, - outliers: [], }; let q1max, q3min; if (array.length % 2 === 1) { @@ -77,21 +69,5 @@ export function xBoxPlot( info.q3 = (array[middleOver] + array[middleOver - 1]) / 2; } - if (calculateOutliers) { - const iqr = info.q3 - info.q1; - const min = info.q1 - 1.5 * iqr; - const max = info.q3 + 1.5 * iqr; - // we need to recalculate the min and the max because they could be outliers - info.min = info.median; - info.max = info.median; - for (const value of array) { - if (value < min || value > max) { - info.outliers.push(value); - } else { - if (value < info.min) info.min = value; - if (value > info.max) info.max = value; - } - } - } return info; } diff --git a/src/x/xBoxPlotWithOutliers.ts b/src/x/xBoxPlotWithOutliers.ts new file mode 100644 index 00000000..c3cb25cd --- /dev/null +++ b/src/x/xBoxPlotWithOutliers.ts @@ -0,0 +1,43 @@ +import { NumberArray } from 'cheminfo-types'; + +import { xBoxPlot, XBoxPlotOptions } from './xBoxPlot'; + +export interface XBoxPlotWithOutliers { + q1: number; + median: number; + q3: number; + min: number; + max: number; + outliers: number[]; +} + +/** + * Calculating the box plot of the array + * @param array - data + * @param options + */ +export function xBoxPlotWithOutliers( + array: NumberArray, + options: XBoxPlotOptions = {}, +): XBoxPlotWithOutliers { + const info: XBoxPlotWithOutliers = { + ...xBoxPlot(array, options), + outliers: [], + }; + + const iqr = info.q3 - info.q1; + const min = info.q1 - 1.5 * iqr; + const max = info.q3 + 1.5 * iqr; + // we need to recalculate the min and the max because they could be outliers + info.min = info.median; + info.max = info.median; + for (const value of array) { + if (value < min || value > max) { + info.outliers.push(value); + } else { + if (value < info.min) info.min = value; + if (value > info.max) info.max = value; + } + } + return info; +}