diff --git a/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FftMultiplier.java b/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FftMultiplier.java index 494720d7..445e21b8 100644 --- a/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FftMultiplier.java +++ b/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FftMultiplier.java @@ -780,6 +780,14 @@ void add(int idxa, MutableComplex c) { a[imagIdx(idxa)] += c.imag; } + void addReal(int idxa, double real) { + a[realIdx(idxa)] += real; + } + + void addImag(int idxa, double imag) { + a[imagIdx(idxa)] += imag; + } + void addInto(int idxa, ComplexVector c, int idxc, MutableComplex destination) { destination.real = a[realIdx(idxa)] + c.real(idxc); destination.imag = a[imagIdx(idxa)] + c.imag(idxc); @@ -985,6 +993,20 @@ void timesTwoToThe(int idxa, int n) { a[ri] = fastScalb(real, n); a[ii] = fastScalb(imag, n); } + + public void scale(double scale) { + for (int i = 0; i < length << COMPLEX_SIZE_SHIFT; i++) { + a[i] *= scale; + } + } + public int length() { + return length; + } + + @Override + public String toString() { + return "V=" + length; + } } final static class MutableComplex { diff --git a/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FftMultiplierFft3Test.java b/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FftMultiplierFft3Test.java new file mode 100644 index 00000000..dc40e8fc --- /dev/null +++ b/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FftMultiplierFft3Test.java @@ -0,0 +1,66 @@ +/* + * @(#)FftMultiplierFft3Test.java + * Copyright © 2023 Werner Randelshofer, Switzerland. MIT License. + */ +package ch.randelshofer.fastdoubleparser; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import ch.randelshofer.fastdoubleparser.JmhFftMultiplierFft3.Fft3Algorithm; +import ch.randelshofer.fastdoubleparser.FftMultiplier.ComplexVector; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.stream.Stream; + +public class FftMultiplierFft3Test { + private static final double SCALE = 1.0 / 3; + private static final int NUMBER_OF_TESTED_RANDOM_VALUES = 10; + + @ParameterizedTest + @MethodSource("someRandomParameters") + public void compareWithOriginal(Fft3Algorithm algorithm, + ComplexVector a0, + ComplexVector a1, + ComplexVector a2, + int sign) { + algorithm.fft3(a0, a1, a2, sign, SCALE); + } + + private static Stream someRandomParameters() { + List testCases = new ArrayList<>(); + Random rng = new Random(2023); + for (int i = 0; i < NUMBER_OF_TESTED_RANDOM_VALUES; i++) { + int n = 1 << (5 + i * 4 / NUMBER_OF_TESTED_RANDOM_VALUES); + ComplexVector a0 = new ComplexVector(n); + ComplexVector a1 = new ComplexVector(n); + ComplexVector a2 = new ComplexVector(n); + for (int j = 0; j < n; j++) { + a0.set(j, rng.nextDouble(), rng.nextDouble()); + a1.set(j, rng.nextDouble(), rng.nextDouble()); + a2.set(j, rng.nextDouble(), rng.nextDouble()); + } + int sign = i % 2 == 0 ? -1 : 1; + testCases.add(new Object[]{a0, a1, a2, sign}); + } + return combineWithAlgorithms(testCases); + } + + private static Stream combineWithAlgorithms(List testCases) { + List args = new ArrayList<>(); + for (JmhFftMultiplierFft3.Fft3Algorithm algorithm : JmhFftMultiplierFft3.Fft3Algorithm.values()) { + if (!algorithm.equals(JmhFftMultiplierFft3.Fft3Algorithm.ORIGINAL)) { + for (Object[] params : testCases) { + ComplexVector a0 = (ComplexVector) params[0]; + ComplexVector a1 = (ComplexVector) params[1]; + ComplexVector a2 = (ComplexVector) params[2]; + int sign = (int) params[3]; + args.add(Arguments.of(algorithm, a0, a1, a2, sign)); + } + } + } + return args.stream(); + } +} \ No newline at end of file diff --git a/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/JmhFftMultiplierFft3.java b/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/JmhFftMultiplierFft3.java new file mode 100644 index 00000000..c22ccb9d --- /dev/null +++ b/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/JmhFftMultiplierFft3.java @@ -0,0 +1,503 @@ +/* + * @(#)JmhFftMultiplierFft3.java + * Copyright © 2023 Werner Randelshofer, Switzerland. MIT License. + */ +package ch.randelshofer.fastdoubleparser; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Fork(value = 1, jvmArgs = { + "-XX:+UnlockExperimentalVMOptions", "--add-modules", "jdk.incubator.vector" + , "--enable-preview" + , "-Xmx4g" +}) +@Measurement(iterations = 5, time = 1) +@Warmup(iterations = 4, time = 1) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +@State(Scope.Benchmark) +public class JmhFftMultiplierFft3 { + + private static final int N = 1 << 10; + private static final double SCALE = 1.0 / 3; + + public int sign = -1; + FftMultiplier.ComplexVector a0 = new FftMultiplier.ComplexVector(N); + FftMultiplier.ComplexVector a1 = new FftMultiplier.ComplexVector(N); + FftMultiplier.ComplexVector a2 = new FftMultiplier.ComplexVector(N); + + @Setup(Level.Iteration) + public void setUp() { + for (int i = 0; i < a0.length(); i++) { + // just some values, but zeroes + a0.real(i, i); + a0.imag(i, i * 200 >>> 8); + a1.real(i, i + 4); + a1.imag(i, i * 250 >>> 7); + a2.real(i, 2 * i + 3); + a2.imag(i, i * 230 >>> 8); + } + } + + public void run(Fft3Algorithm algorithm, Blackhole blackhole) { + algorithm.fft3(a0, a1, a2, sign, SCALE); + blackhole.consume(a0); + blackhole.consume(a1); + blackhole.consume(a2); + } + + /** + * Dummy work, because any benchmark which run first was slower for any JMH settings. + */ + // @formatter:off + @Benchmark + public void a2isZero(Blackhole blackhole) { run(Fft3Algorithm.A2_IS_ZERO, blackhole); } + @Benchmark + public void original(Blackhole blackhole) { run(Fft3Algorithm.ORIGINAL, blackhole); } + @Benchmark + public void scaleStandalone(Blackhole blackhole) { run(Fft3Algorithm.SCALE_STANDALONE, blackhole); } + @Benchmark + public void scaleStandaloneSums(Blackhole blackhole) { run(Fft3Algorithm.SCALE_STANDALONE_SUMS, blackhole); } + @Benchmark + public void scaleStandaloneSumsOneIntermediateVariableCdMiddle(Blackhole blackhole) { run(Fft3Algorithm.SCALE_STANDALONE_SUMS_ONE_INTERMEDIATE_VARIABLE_CD_MIDDLE, blackhole); } + @Benchmark + public void scaleStandaloneSumsOneIntermediateVariableCdLast(Blackhole blackhole) { run(Fft3Algorithm.SCALE_STANDALONE_SUMS_ONE_INTERMEDIATE_VARIABLE_CD_LAST, blackhole); } + @Benchmark + public void scaleStandaloneSumsOneIntermediateVariableCdFirst(Blackhole blackhole) { run(Fft3Algorithm.SCALE_STANDALONE_SUMS_ONE_INTERMEDIATE_VARIABLE_CD_FIRST, blackhole); } + @Benchmark + public void scaleStandaloneOneIntermediateVariable(Blackhole blackhole) { run(Fft3Algorithm.SCALE_STANDALONE_ONE_INTERMEDIATE_VARIABLE, blackhole); } + @Benchmark + public void sums(Blackhole blackhole) { run(Fft3Algorithm.SUMS, blackhole); } + @Benchmark + public void sums2(Blackhole blackhole) { run(Fft3Algorithm.SUMS2, blackhole); } + @Benchmark + public void sums3(Blackhole blackhole) { run(Fft3Algorithm.SUMS3, blackhole); } + @Benchmark + public void sumsReused1Var(Blackhole blackhole) { run(Fft3Algorithm.SUMS_REUSED_1VAR, blackhole); } + @Benchmark + public void oneIntermediateVariable(Blackhole blackhole) { run(Fft3Algorithm.ONE_INTERMEDIATE_VARIABLE, blackhole); } + @Benchmark + public void sumsReusedOneIntermediateVar(Blackhole blackhole) { run(Fft3Algorithm.SUMS_REUSED_ONE_INTERMEDIATE_VAR, blackhole); } + @Benchmark + public void sumsReused2Var(Blackhole blackhole) { run(Fft3Algorithm.SUMS_REUSED_2VAR, blackhole); } + // @formatter:on + + enum Fft3Algorithm { + A2_IS_ZERO { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = -omegaImag * a1.imag(i); + double d = omegaImag * a1.real(i); + + double e = 0.5 * a1.real(i); + double f = 0.5 * a1.imag(i); + + double a0Real = a0.real(i) + a1.real(i); + double a0Imag = a0.imag(i) + a1.imag(i); + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, a0Real * scale); + a0.imag(i, a0Imag * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, a2Real * scale); + a2.imag(i, a2Imag * scale); + } + } + }, + SUMS { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + double a02Real = a0.real(i) + a12real; + double a02Imag = a0.imag(i) + a12imag; + + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, a02Real * scale); + a0.imag(i, a02Imag * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, a2Real * scale); + a2.imag(i, a2Imag * scale); + } + } + }, + SUMS2 { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, (a0.real(i) + a12real) * scale); + a0.imag(i, (a0.imag(i) + a12imag) * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, a2Real * scale); + a2.imag(i, a2Imag * scale); + } + } + }, + SUMS3 { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + a12real += a0.real(i); + a12imag += a0.imag(i); + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, a12real * scale); + a0.imag(i, a12imag * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, a2Real * scale); + a2.imag(i, a2Imag * scale); + } + } + }, + SUMS_REUSED_1VAR { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + + double e = a1.real(i) + a2.real(i); + double f = a1.imag(i) + a2.imag(i); + double a02Real = a0.real(i) + e; + double a02Imag = a0.imag(i) + f; + + e *= 0.5; + f *= 0.5; + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, a02Real * scale); + a0.imag(i, a02Imag * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, a2Real * scale); + a2.imag(i, a2Imag * scale); + } + } + }, + SUMS_REUSED_2VAR { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = a1.real(i) + a2.real(i); + double f = a1.imag(i) + a2.imag(i); + + double a02Real = a0.real(i) + e; + double a02Imag = a0.imag(i) + f; + + + e *= 0.5; + f *= 0.5; + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + e = a0.real(i) - e - c; + f = a0.imag(i) - d - f; + + a0.real(i, a02Real * scale); + a0.imag(i, a02Imag * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, e * scale); + a2.imag(i, f * scale); + } + } + }, + SUMS_REUSED_ONE_INTERMEDIATE_VAR { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = a1.real(i) + a2.real(i); + double f = a1.imag(i) + a2.imag(i); + + double a0real = a0.real(i) + e; + double a0imag = a0.imag(i) + f; + + e *= 0.5; + f *= 0.5; + + a1.real(i, (a0.real(i) - e + c) * scale); + a1.imag(i, (a0.imag(i) + d - f) * scale); + a2.real(i, (a0.real(i) - e - c) * scale); + a2.imag(i, (a0.imag(i) - d - f) * scale); + + a0.real(i, a0real * scale); + a0.imag(i, a0imag * scale); + } + } + }, + + ORIGINAL { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = 0.5 * (a1.real(i) + a2.real(i)); + double f = 0.5 * (a1.imag(i) + a2.imag(i)); + double a02Real = a0.real(i) + a1.real(i) + a2.real(i); + double a02Imag = a0.imag(i) + a1.imag(i) + a2.imag(i); + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + a0.real(i, a02Real * scale); + a0.imag(i, a02Imag * scale); + a1.real(i, a1Real * scale); + a1.imag(i, a1Imag * scale); + a2.real(i, a2Real * scale); + a2.imag(i, a2Imag * scale); + } + } + }, + + SCALE_STANDALONE { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = 0.5 * (a1.real(i) + a2.real(i)); + double f = 0.5 * (a1.imag(i) + a2.imag(i)); + + double a02Real = a0.real(i) + a1.real(i) + a2.real(i); + double a02Imag = a0.imag(i) + a1.imag(i) + a2.imag(i); + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, a02Real); + a0.imag(i, a02Imag); + a1.real(i, a1Real); + a1.imag(i, a1Imag); + a2.real(i, a2Real); + a2.imag(i, a2Imag); + } + a0.scale(scale); + a1.scale(scale); + a2.scale(scale); + } + }, + + SCALE_STANDALONE_SUMS { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, + int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + + double a02Real = a0.real(i) + a12real; + double a02Imag = a0.imag(i) + a12imag; + double a1Real = a0.real(i) - e + c; + double a1Imag = a0.imag(i) + d - f; + double a2Real = a0.real(i) - e - c; + double a2Imag = a0.imag(i) - d - f; + + a0.real(i, a02Real); + a0.imag(i, a02Imag); + a1.real(i, a1Real); + a1.imag(i, a1Imag); + a2.real(i, a2Real); + a2.imag(i, a2Imag); + } + a0.scale(scale); + a1.scale(scale); + a2.scale(scale); + } + }, + + SCALE_STANDALONE_ONE_INTERMEDIATE_VARIABLE { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector + a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = 0.5 * (a1.real(i) + a2.real(i)); + double f = 0.5 * (a1.imag(i) + a2.imag(i)); + + double a12real = a0.real(i) + a1.real(i) + a2.real(i); + double a12imag = a0.imag(i) + a1.imag(i) + a2.imag(i); + a1.real(i, a0.real(i) - e + c); + a1.imag(i, a0.imag(i) + d - f); + a2.real(i, a0.real(i) - e - c); + a2.imag(i, a0.imag(i) - d - f); + a0.real(i, a12real); + a0.imag(i, a12imag); + } + a0.scale(scale); + a1.scale(scale); + a2.scale(scale); + } + }, + + SCALE_STANDALONE_SUMS_ONE_INTERMEDIATE_VARIABLE_CD_MIDDLE { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + + a2.real(i, a0.real(i) - e - c); + a2.imag(i, a0.imag(i) - d - f); + a1.real(i, a0.real(i) - e + c); + a1.imag(i, a0.imag(i) + d - f); + a0.addReal(i, a12real); + a0.addImag(i, a12imag); + } + a0.scale(scale); + a1.scale(scale); + a2.scale(scale); + } + }, + + SCALE_STANDALONE_SUMS_ONE_INTERMEDIATE_VARIABLE_CD_FIRST { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + + a2.real(i, a0.real(i) - e - c); + a2.imag(i, a0.imag(i) - d - f); + a1.real(i, a0.real(i) - e + c); + a1.imag(i, a0.imag(i) + d - f); + a0.addReal(i, a12real); + a0.addImag(i, a12imag); + } + a0.scale(scale); + a1.scale(scale); + a2.scale(scale); + } + }, + + SCALE_STANDALONE_SUMS_ONE_INTERMEDIATE_VARIABLE_CD_LAST { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + + a2.real(i, a0.real(i) - e - c); + a2.imag(i, a0.imag(i) - d - f); + a1.real(i, a0.real(i) - e + c); + a1.imag(i, a0.imag(i) + d - f); + a0.addReal(i, a12real); + a0.addImag(i, a12imag); + } + a0.scale(scale); + a1.scale(scale); + a2.scale(scale); + } + }, + + ONE_INTERMEDIATE_VARIABLE { + @Override + void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale) { + double omegaImag = sign * -0.5 * Math.sqrt(3); // imaginary part of omega for n=3: sin(sign*(-2)*pi*1/3) + for (int i = 0; i < a0.length(); i++) { + double c = omegaImag * (a2.imag(i) - a1.imag(i)); + double d = omegaImag * (a1.real(i) - a2.real(i)); + double a12real = a1.real(i) + a2.real(i); + double a12imag = a1.imag(i) + a2.imag(i); + double e = 0.5 * a12real; + double f = 0.5 * a12imag; + + a12real = a0.real(i) + a12real; + a12imag = a0.imag(i) + a12imag; + a1.real(i, (a0.real(i) - e + c) * scale); + a1.imag(i, (a0.imag(i) + d - f) * scale); + a2.real(i, (a0.real(i) - e - c) * scale); + a2.imag(i, (a0.imag(i) - d - f) * scale); + a0.real(i, a12real * scale); + a0.imag(i, a12imag * scale); + } + } + }; + + @SuppressWarnings("SameParameterValue") + abstract void fft3(FftMultiplier.ComplexVector a0, FftMultiplier.ComplexVector a1, FftMultiplier.ComplexVector a2, int sign, double scale); + } +} \ No newline at end of file