From b33b3cd583c5af63de62a69ed3887608892cd9d9 Mon Sep 17 00:00:00 2001 From: Torfinn Berset Date: Sat, 17 Feb 2024 15:13:05 +0100 Subject: [PATCH] Create arm_fir_decimate_f64 based on arm_fir_decimate_f32, but without Neon or Helium vectorizations --- Include/dsp/filtering_functions.h | 49 +- .../FilteringFunctions/arm_fir_decimate_f64.c | 454 ++++++++++++++++++ .../arm_fir_decimate_init_f64.c | 105 ++++ 3 files changed, 606 insertions(+), 2 deletions(-) create mode 100644 Source/FilteringFunctions/arm_fir_decimate_f64.c create mode 100644 Source/FilteringFunctions/arm_fir_decimate_init_f64.c diff --git a/Include/dsp/filtering_functions.h b/Include/dsp/filtering_functions.h index 3b0a2d3b..c24d9d56 100755 --- a/Include/dsp/filtering_functions.h +++ b/Include/dsp/filtering_functions.h @@ -820,7 +820,7 @@ extern "C" } arm_fir_decimate_instance_q31; /** - @brief Instance structure for floating-point FIR decimator. + @brief Instance structure for single precision floating-point FIR decimator. */ typedef struct { @@ -830,8 +830,53 @@ typedef struct float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ } arm_fir_decimate_instance_f32; + /** + @brief Instance structure for double precision floating-point FIR decimator. + */ + typedef struct + { + uint8_t M; /**< decimation factor. */ + uint16_t numTaps; /**< number of coefficients in the filter. */ + const float64_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ + float64_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + } arm_fir_decimate_instance_f64; -/** + /** + @brief Processing function for floating-point FIR decimator. + @param[in] S points to an instance of the floating-point FIR decimator structure + @param[in] pSrc points to the block of input data + @param[out] pDst points to the block of output data + @param[in] blockSize number of samples to process + */ + void arm_fir_decimate_f64( + const arm_fir_decimate_instance_f64 * S, + const float64_t * pSrc, + float64_t * pDst, + uint32_t blockSize); + + + /** + @brief Initialization function for the floating-point FIR decimator. + @param[in,out] S points to an instance of the floating-point FIR decimator structure + @param[in] numTaps number of coefficients in the filter + @param[in] M decimation factor + @param[in] pCoeffs points to the filter coefficients + @param[in] pState points to the state buffer + @param[in] blockSize number of input samples to process per call + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_LENGTH_ERROR : blockSize is not a multiple of M + */ + arm_status arm_fir_decimate_init_f64( + arm_fir_decimate_instance_f64 * S, + uint16_t numTaps, + uint8_t M, + const float64_t * pCoeffs, + float64_t * pState, + uint32_t blockSize); + + + /** @brief Processing function for floating-point FIR decimator. @param[in] S points to an instance of the floating-point FIR decimator structure @param[in] pSrc points to the block of input data diff --git a/Source/FilteringFunctions/arm_fir_decimate_f64.c b/Source/FilteringFunctions/arm_fir_decimate_f64.c new file mode 100644 index 00000000..3dcb9320 --- /dev/null +++ b/Source/FilteringFunctions/arm_fir_decimate_f64.c @@ -0,0 +1,454 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_fir_decimate_f64.c + * Description: FIR decimation for floating-point sequences + * + * $Date: 17 February 2024 + * $Revision: V1.16.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2024 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/filtering_functions.h" + +/** + @ingroup groupFilters + */ + +/** + @defgroup FIR_decimate Finite Impulse Response (FIR) Decimator + + These functions combine an FIR filter together with a decimator. + They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion. + Conceptually, the functions are equivalent to the block diagram below: + \image html FIRDecimator.gif "Components included in the FIR Decimator functions" + When decimating by a factor of M, the signal should be prefiltered by a lowpass filter with a normalized + cutoff frequency of 1/M in order to prevent aliasing distortion. + The user of the function is responsible for providing the filter coefficients. + + The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner. + Instead of calculating all of the FIR filter outputs and discarding M-1 out of every M, only the + samples output by the decimator are computed. + The functions operate on blocks of input and output data. + pSrc points to an array of blockSize input values and + pDst points to an array of blockSize/M output values. + In order to have an integer number of output samples blockSize + must always be a multiple of the decimation factor M. + + The library provides separate functions for Q15, Q31 and floating-point data types. + + @par Algorithm: + The FIR portion of the algorithm uses the standard form filter: +
+      y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
+  
+ where, b[n] are the filter coefficients. + @par + The pCoeffs points to a coefficient array of size numTaps. + Coefficients are stored in time reversed order. + @par +
+      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+  
+ @par + pState points to a state array of size numTaps + blockSize - 1. + Samples in the state buffer are stored in the order: + @par +
+      {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
+  
+ The state variables are updated after each block of data is processed, the coefficients are untouched. + + @par Instance Structure + The coefficients and state variables for a filter are stored together in an instance data structure. + A separate instance structure must be defined for each filter. + Coefficient arrays may be shared among several instances while state variable array should be allocated separately. + There are separate instance structure declarations for each of the 3 supported data types. + + @par Initialization Functions + There is also an associated initialization function for each data type. + The initialization function performs the following operations: + - Sets the values of the internal structure fields. + - Zeros out the values in the state buffer. + - Checks to make sure that the size of the input is a multiple of the decimation factor. + To do this manually without calling the init function, assign the follow subfields of the instance structure: + numTaps, pCoeffs, M (decimation factor), pState. Also set all of the values in pState to zero. + @par + Use of the initialization function is optional. + However, if the initialization function is used, then the instance structure cannot be placed into a const data section. + To place an instance structure into a const data section, the instance structure must be manually initialized. + The code below statically initializes each of the 3 different data type filter instance structures +
+      arm_fir_decimate_instance_f64 S = {M, numTaps, pCoeffs, pState};
+      arm_fir_decimate_instance_q31 S = {M, numTaps, pCoeffs, pState};
+      arm_fir_decimate_instance_q15 S = {M, numTaps, pCoeffs, pState};
+  
+ where M is the decimation factor; numTaps is the number of filter coefficients in the filter; + pCoeffs is the address of the coefficient buffer; + pState is the address of the state buffer. + Be sure to set the values in the state buffer to zeros when doing static initialization. + + @par Fixed-Point Behavior + Care must be taken when using the fixed-point versions of the FIR decimate filter functions. + In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. + Refer to the function specific documentation below for usage guidelines. + */ + +/** + @addtogroup FIR_decimate + @{ + */ + +/** + @brief Processing function for floating-point FIR decimator. + @param[in] S points to an instance of the floating-point FIR decimator structure + @param[in] pSrc points to the block of input data + @param[out] pDst points to the block of output data + @param[in] blockSize number of input samples to process + */ + +void arm_fir_decimate_f64( + const arm_fir_decimate_instance_f64 * S, + const float64_t * pSrc, + float64_t * pDst, + uint32_t blockSize) +{ + float64_t *pState = S->pState; /* State pointer */ + const float64_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float64_t *pStateCur; /* Points to the current sample of the state */ + float64_t *px0; /* Temporary pointer for state buffer */ + const float64_t *pb; /* Temporary pointer for coefficient buffer */ + float64_t x0, c0; /* Temporary variables to hold state and coefficient values */ + float64_t acc0; /* Accumulator */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */ + +#if defined (ARM_MATH_LOOPUNROLL) + float64_t *px1, *px2, *px3; + float64_t x1, x2, x3; + float64_t acc1, acc2, acc3; +#endif + + /* S->pState buffer contains previous frame (numTaps - 1) samples */ + /* pStateCur points to the location where the new input data should be written */ + pStateCur = S->pState + (numTaps - 1U); + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 samples at a time */ + blkCnt = outBlockSize >> 2U; + + /* Samples loop unrolled by 4 */ + while (blkCnt > 0U) + { + /* Copy 4 * decimation factor number of new input samples into the state buffer */ + i = S->M * 4; + + do + { + *pStateCur++ = *pSrc++; + + } while (--i); + + /* Set accumulators to zero */ + acc0 = 0.0f; + acc1 = 0.0f; + acc2 = 0.0f; + acc3 = 0.0f; + + /* Initialize state pointer for all the samples */ + px0 = pState; + px1 = pState + S->M; + px2 = pState + 2 * S->M; + px3 = pState + 3 * S->M; + + /* Initialize coeff pointer */ + pb = pCoeffs; + + /* Loop unrolling: Compute 4 taps at a time */ + tapCnt = numTaps >> 2U; + + while (tapCnt > 0U) + { + /* Read the b[numTaps-1] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-1] sample for acc0 */ + x0 = *(px0++); + /* Read x[n-numTaps-1] sample for acc1 */ + x1 = *(px1++); + /* Read x[n-numTaps-1] sample for acc2 */ + x2 = *(px2++); + /* Read x[n-numTaps-1] sample for acc3 */ + x3 = *(px3++); + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + acc1 += x1 * c0; + acc2 += x2 * c0; + acc3 += x3 * c0; + + /* Read the b[numTaps-2] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */ + x0 = *(px0++); + x1 = *(px1++); + x2 = *(px2++); + x3 = *(px3++); + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + acc1 += x1 * c0; + acc2 += x2 * c0; + acc3 += x3 * c0; + + /* Read the b[numTaps-3] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */ + x0 = *(px0++); + x1 = *(px1++); + x2 = *(px2++); + x3 = *(px3++); + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + acc1 += x1 * c0; + acc2 += x2 * c0; + acc3 += x3 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */ + x0 = *(px0++); + x1 = *(px1++); + x2 = *(px2++); + x3 = *(px3++); + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + acc1 += x1 * c0; + acc2 += x2 * c0; + acc3 += x3 * c0; + + /* Decrement loop counter */ + tapCnt--; + } + + /* Loop unrolling: Compute remaining taps */ + tapCnt = numTaps % 0x4U; + + while (tapCnt > 0U) + { + /* Read coefficients */ + c0 = *(pb++); + + /* Fetch state variables for acc0, acc1, acc2, acc3 */ + x0 = *(px0++); + x1 = *(px1++); + x2 = *(px2++); + x3 = *(px3++); + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + acc1 += x1 * c0; + acc2 += x2 * c0; + acc3 += x3 * c0; + + /* Decrement loop counter */ + tapCnt--; + } + + /* Advance the state pointer by the decimation factor + * to process the next group of decimation factor number samples */ + pState = pState + S->M * 4; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = acc0; + *pDst++ = acc1; + *pDst++ = acc2; + *pDst++ = acc3; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining samples */ + blkCnt = outBlockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = outBlockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Copy decimation factor number of new input samples into the state buffer */ + i = S->M; + + do + { + *pStateCur++ = *pSrc++; + + } while (--i); + + /* Set accumulator to zero */ + acc0 = 0.0f; + + /* Initialize state pointer */ + px0 = pState; + + /* Initialize coeff pointer */ + pb = pCoeffs; + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 taps at a time */ + tapCnt = numTaps >> 2U; + + while (tapCnt > 0U) + { + /* Read the b[numTaps-1] coefficient */ + c0 = *pb++; + + /* Read x[n-numTaps-1] sample */ + x0 = *px0++; + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + + /* Read the b[numTaps-2] coefficient */ + c0 = *pb++; + + /* Read x[n-numTaps-2] sample */ + x0 = *px0++; + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + + /* Read the b[numTaps-3] coefficient */ + c0 = *pb++; + + /* Read x[n-numTaps-3] sample */ + x0 = *px0++; + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *pb++; + + /* Read x[n-numTaps-4] sample */ + x0 = *px0++; + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + + /* Decrement loop counter */ + tapCnt--; + } + + /* Loop unrolling: Compute remaining taps */ + tapCnt = numTaps % 0x4U; + +#else + + /* Initialize tapCnt with number of taps */ + tapCnt = numTaps; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (tapCnt > 0U) + { + /* Read coefficients */ + c0 = *pb++; + + /* Fetch 1 state variable */ + x0 = *px0++; + + /* Perform the multiply-accumulate */ + acc0 += x0 * c0; + + /* Decrement loop counter */ + tapCnt--; + } + + /* Advance the state pointer by the decimation factor + * to process the next group of decimation factor number samples */ + pState = pState + S->M; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = acc0; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Processing is complete. + Now copy the last numTaps - 1 samples to the satrt of the state buffer. + This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCur = S->pState; + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 taps at a time */ + tapCnt = (numTaps - 1U) >> 2U; + + /* Copy data */ + while (tapCnt > 0U) + { + *pStateCur++ = *pState++; + *pStateCur++ = *pState++; + *pStateCur++ = *pState++; + *pStateCur++ = *pState++; + + /* Decrement loop counter */ + tapCnt--; + } + + /* Loop unrolling: Compute remaining taps */ + tapCnt = (numTaps - 1U) % 0x04U; + +#else + + /* Initialize tapCnt with number of taps */ + tapCnt = (numTaps - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + /* Copy data */ + while (tapCnt > 0U) + { + *pStateCur++ = *pState++; + + /* Decrement loop counter */ + tapCnt--; + } + +} +/** + @} end of FIR_decimate group + */ diff --git a/Source/FilteringFunctions/arm_fir_decimate_init_f64.c b/Source/FilteringFunctions/arm_fir_decimate_init_f64.c new file mode 100644 index 00000000..2fe50ac9 --- /dev/null +++ b/Source/FilteringFunctions/arm_fir_decimate_init_f64.c @@ -0,0 +1,105 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_fir_decimate_init_f64.c + * Description: Floating-point FIR Decimator initialization function + * + * $Date: 17 February 2024 + * $Revision: V1.16.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2024 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/filtering_functions.h" + +/** + @ingroup groupFilters + */ + +/** + @addtogroup FIR_decimate + @{ + */ + +/** + @brief Initialization function for the floating-point FIR decimator. + @param[in,out] S points to an instance of the floating-point FIR decimator structure + @param[in] numTaps number of coefficients in the filter + @param[in] M decimation factor + @param[in] pCoeffs points to the filter coefficients + @param[in] pState points to the state buffer + @param[in] blockSize number of input samples to process per call + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_LENGTH_ERROR : blockSize is not a multiple of M + + @par Details + pCoeffs points to the array of filter coefficients stored in time reversed order: +
+      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+  
+ @par + pState points to the array of state variables. + pState is of length numTaps+blockSize-1 words where blockSize is the number of input samples passed to arm_fir_decimate_f64(). + M is the decimation factor. + */ + +arm_status arm_fir_decimate_init_f64( + arm_fir_decimate_instance_f64 * S, + uint16_t numTaps, + uint8_t M, + const float64_t * pCoeffs, + float64_t * pState, + uint32_t blockSize) +{ + arm_status status; + + /* The size of the input block must be a multiple of the decimation factor */ + if ((blockSize % M) != 0U) + { + /* Set status as ARM_MATH_LENGTH_ERROR */ + status = ARM_MATH_LENGTH_ERROR; + } + else + { + /* Assign filter taps */ + S->numTaps = numTaps; + + /* Assign coefficient pointer */ + S->pCoeffs = pCoeffs; + + /* Clear the state buffer. The size is always (blockSize + numTaps - 1) */ + memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float64_t)); + + /* Assign state pointer */ + S->pState = pState; + + /* Assign Decimation Factor */ + S->M = M; + + status = ARM_MATH_SUCCESS; + } + + return (status); + +} + +/** + @} end of FIR_decimate group + */