Added some gaussian processes and kernel methods. Added a bunch of ne…

…w optimization methods
ooples · Jan 7, 2025 · 9d3f9cb · 9d3f9cb
1 parent 51d6be5
commit 9d3f9cb
Show file tree

Hide file tree

Showing 73 changed files with 5,683 additions and 50 deletions.
diff --git a/src/AiDotNet.csproj b/src/AiDotNet.csproj
@@ -49,6 +49,7 @@
 	<ItemGroup>
 	  <Folder Include="Evaluation\" />
 	  <Folder Include="Caching\" />
+	  <Folder Include="GaussianProcesses\" />
 	  <Folder Include="TimeSeries\" />
 	</ItemGroup>
 

diff --git a/src/Enums/AcquisitionFunctionType.cs b/src/Enums/AcquisitionFunctionType.cs
@@ -0,0 +1,7 @@
+namespace AiDotNet.Enums;
+
+public enum AcquisitionFunctionType
+{
+    UpperConfidenceBound,
+    ExpectedImprovement
+}
diff --git a/src/GaussianProcesses/MultiOutputGaussianProcess.cs b/src/GaussianProcesses/MultiOutputGaussianProcess.cs
@@ -0,0 +1,123 @@
+namespace AiDotNet.GaussianProcesses;
+
+public class MultiOutputGaussianProcess<T> : IGaussianProcess<T>
+{
+    private IKernelFunction<T> _kernel;
+    private Matrix<T> _X;
+    private Matrix<T> _Y;
+    private Matrix<T> _K;
+    private Matrix<T> _L;
+    private Matrix<T> _alpha;
+    private INumericOperations<T> _numOps;
+
+    public MultiOutputGaussianProcess(IKernelFunction<T> kernel)
+    {
+        _kernel = kernel;
+        _numOps = MathHelper.GetNumericOperations<T>();
+        _X = Matrix<T>.Empty();
+        _Y = Matrix<T>.Empty();
+        _K = Matrix<T>.Empty();
+        _L = Matrix<T>.Empty();
+        _alpha = Matrix<T>.Empty();
+    }
+
+    public void Fit(Matrix<T> X, Vector<T> y)
+    {
+        throw new InvalidOperationException("Use FitMultiOutput method for multi-output GP");
+    }
+
+    public void FitMultiOutput(Matrix<T> X, Matrix<T> Y)
+    {
+        _X = X;
+        _Y = Y;
+
+        // Calculate the kernel matrix
+        _K = CalculateKernelMatrix(X, X);
+
+        // Add a small value to the diagonal for numerical stability
+        for (int i = 0; i < _K.Rows; i++)
+        {
+            _K[i, i] = _numOps.Add(_K[i, i], _numOps.FromDouble(1e-8));
+        }
+
+        // Solve for alpha using Cholesky decomposition
+        _alpha = new Matrix<T>(Y.Rows, Y.Columns, _numOps);
+        for (int i = 0; i < Y.Columns; i++)
+        {
+            var y_col = Y.GetColumn(i);
+            var alpha_col = MatrixSolutionHelper.SolveLinearSystem(_K, y_col, MatrixDecompositionType.Cholesky);
+            for (int j = 0; j < Y.Rows; j++)
+            {
+                _alpha[j, i] = alpha_col[j];
+            }
+        }
+
+        // Store the Cholesky decomposition for later use in predictions
+        _L = new CholeskyDecomposition<T>(_K).L;
+    }
+
+    public (T mean, T variance) Predict(Vector<T> x)
+    {
+        throw new InvalidOperationException("Use PredictMultiOutput method for multi-output GP");
+    }
+
+    public (Vector<T> means, Matrix<T> covariance) PredictMultiOutput(Vector<T> x)
+    {
+        var k_star = CalculateKernelVector(_X, x);
+        var means = new Vector<T>(_Y.Columns, _numOps);
+
+        for (int i = 0; i < _Y.Columns; i++)
+        {
+            means[i] = _numOps.Zero;
+            for (int j = 0; j < k_star.Length; j++)
+            {
+                means[i] = _numOps.Add(means[i], _numOps.Multiply(k_star[j], _alpha[j, i]));
+            }
+        }
+
+        var v = MatrixSolutionHelper.SolveLinearSystem(_K, k_star, MatrixDecompositionType.Cholesky);
+        var variance = _numOps.Subtract(_kernel.Calculate(x, x), k_star.DotProduct(v));
+        var covariance = new Matrix<T>(_Y.Columns, _Y.Columns, _numOps);
+
+        for (int i = 0; i < _Y.Columns; i++)
+        {
+            covariance[i, i] = variance;
+        }
+
+        return (means, covariance);
+    }
+
+    public void UpdateKernel(IKernelFunction<T> kernel)
+    {
+        _kernel = kernel;
+        if (!_X.IsEmpty && !_Y.IsEmpty)
+        {
+            FitMultiOutput(_X, _Y);
+        }
+    }
+
+    private Matrix<T> CalculateKernelMatrix(Matrix<T> X1, Matrix<T> X2)
+    {
+        var K = new Matrix<T>(X1.Rows, X2.Rows, _numOps);
+        for (int i = 0; i < X1.Rows; i++)
+        {
+            for (int j = 0; j < X2.Rows; j++)
+            {
+                K[i, j] = _kernel.Calculate(X1.GetRow(i), X2.GetRow(j));
+            }
+        }
+
+        return K;
+    }
+
+    private Vector<T> CalculateKernelVector(Matrix<T> X, Vector<T> x)
+    {
+        var k = new Vector<T>(X.Rows, _numOps);
+        for (int i = 0; i < X.Rows; i++)
+        {
+            k[i] = _kernel.Calculate(X.GetRow(i), x);
+        }
+
+        return k;
+    }
+}
diff --git a/src/GaussianProcesses/SparseGaussianProcess.cs b/src/GaussianProcesses/SparseGaussianProcess.cs
@@ -0,0 +1,157 @@
+namespace AiDotNet.GaussianProcesses;
+
+public class SparseGaussianProcess<T> : IGaussianProcess<T>
+{
+    private IKernelFunction<T> _kernel;
+    private Matrix<T> _X;
+    private Vector<T> _y;
+    private Matrix<T> _inducingPoints;
+    private INumericOperations<T> _numOps;
+    private Matrix<T> _L;
+    private Matrix<T> _V;
+    private Vector<T> _D;
+    private Vector<T> _alpha;
+    private readonly MatrixDecompositionType _decompositionType;
+
+    public SparseGaussianProcess(IKernelFunction<T> kernel, MatrixDecompositionType decompositionType = MatrixDecompositionType.Cholesky)
+    {
+        _kernel = kernel;
+        _X = Matrix<T>.Empty();
+        _y = Vector<T>.Empty();
+        _L = Matrix<T>.Empty();
+        _V = Matrix<T>.Empty();
+        _D = Vector<T>.Empty();
+        _alpha = Vector<T>.Empty();
+        _inducingPoints = Matrix<T>.Empty();
+        _decompositionType = decompositionType;
+        _numOps = MathHelper.GetNumericOperations<T>();
+    }
+
+    public void Fit(Matrix<T> X, Vector<T> y)
+    {
+        _X = X;
+        _y = y;
+        _inducingPoints = SelectInducingPoints(X);
+
+        // Sparse GP training algorithm (Fully Independent Training Conditional - FITC)
+        var Kuu = CalculateKernelMatrix(_inducingPoints, _inducingPoints);
+        var Kuf = CalculateKernelMatrix(_inducingPoints, X);
+        var Kff_diag = CalculateKernelDiagonal(X);
+
+        var choleskyKuu = new CholeskyDecomposition<T>(Kuu);
+        var L = choleskyKuu.L;
+
+        // Solve for each column of Kuf separately
+        var V = new Matrix<T>(Kuu.Rows, Kuf.Columns);
+        for (int i = 0; i < Kuf.Columns; i++)
+        {
+            var column = Kuf.GetColumn(i);
+            var solvedColumn = choleskyKuu.Solve(column);
+            V.SetColumn(i, solvedColumn);
+        }
+
+        // Calculate Qff_diag
+        var Qff_diag = new Vector<T>(Kuf.Columns);
+        for (int i = 0; i < Kuf.Columns; i++)
+        {
+            var column = V.GetColumn(i);
+            Qff_diag[i] = _numOps.Square(column.Sum());
+        }
+
+        var Lambda = Kff_diag.Subtract(Qff_diag);
+
+        var noise = _numOps.FromDouble(1e-6); // Small noise term for numerical stability
+        var D = Lambda.Add(noise).Transform(v => Reciprocal(v));
+
+        var Ky = Kuu.Add(Kuf.Multiply(D.CreateDiagonal()).Multiply(Kuf.Transpose()));
+        var choleskyKy = new CholeskyDecomposition<T>(Ky);
+        var alpha = choleskyKy.Solve(Kuf.Multiply(D.CreateDiagonal()).Multiply(y));
+
+        // Store necessary components for prediction
+        _L = L;
+        _V = V;
+        _D = D;
+        _alpha = alpha;
+    }
+
+    private T Reciprocal(T value)
+    {
+        return _numOps.Divide(_numOps.One, value);
+    }
+
+    public (T mean, T variance) Predict(Vector<T> x)
+    {
+        var Kus = CalculateKernelVector(_inducingPoints, x);
+        var Kss = _kernel.Calculate(x, x);
+
+        var f_mean = Kus.DotProduct(_alpha);
+
+        var v = MatrixSolutionHelper.SolveLinearSystem(_L, Kus, _decompositionType);
+        var f_var = _numOps.Subtract(Kss, v.DotProduct(v));
+
+        return (f_mean, f_var);
+    }
+
+    public void UpdateKernel(IKernelFunction<T> kernel)
+    {
+        _kernel = kernel;
+        if (!_X.IsEmpty && !_y.IsEmpty)
+        {
+            Fit(_X, _y);
+        }
+    }
+
+    private Matrix<T> SelectInducingPoints(Matrix<T> X)
+    {
+        int m = Math.Min(X.Rows, 100); // Number of inducing points, capped at 100 or the number of data points
+        var indices = new List<int>();
+        var random = new Random();
+
+        while (indices.Count < m)
+        {
+            int index = random.Next(0, X.Rows);
+            if (!indices.Contains(index))
+            {
+                indices.Add(index);
+            }
+        }
+
+        return X.GetRows(indices);
+    }
+
+    private Matrix<T> CalculateKernelMatrix(Matrix<T> X1, Matrix<T> X2)
+    {
+        var K = new Matrix<T>(X1.Rows, X2.Rows);
+        for (int i = 0; i < X1.Rows; i++)
+        {
+            for (int j = 0; j < X2.Rows; j++)
+            {
+                K[i, j] = _kernel.Calculate(X1.GetRow(i), X2.GetRow(j));
+            }
+        }
+
+        return K;
+    }
+
+    private Vector<T> CalculateKernelVector(Matrix<T> X, Vector<T> x)
+    {
+        var k = new Vector<T>(X.Rows);
+        for (int i = 0; i < X.Rows; i++)
+        {
+            k[i] = _kernel.Calculate(X.GetRow(i), x);
+        }
+
+        return k;
+    }
+
+    private Vector<T> CalculateKernelDiagonal(Matrix<T> X)
+    {
+        var diag = new Vector<T>(X.Rows);
+        for (int i = 0; i < X.Rows; i++)
+        {
+            diag[i] = _kernel.Calculate(X.GetRow(i), X.GetRow(i));
+        }
+
+        return diag;
+    }
+}
diff --git a/src/GaussianProcesses/StandardGaussianProcess.cs b/src/GaussianProcesses/StandardGaussianProcess.cs
@@ -0,0 +1,77 @@
+namespace AiDotNet.GaussianProcesses;
+
+public class StandardGaussianProcess<T> : IGaussianProcess<T>
+{
+    private IKernelFunction<T> _kernel;
+    private Matrix<T> _X;
+    private Vector<T> _y;
+    private Matrix<T> _K;
+    private INumericOperations<T> _numOps;
+    private readonly MatrixDecompositionType _decompositionType;
+
+    public StandardGaussianProcess(IKernelFunction<T> kernel, MatrixDecompositionType decompositionType = MatrixDecompositionType.Cholesky)
+    {
+        _kernel = kernel;
+        _X = Matrix<T>.Empty();
+        _y = Vector<T>.Empty();
+        _K = Matrix<T>.Empty();
+        _numOps = MathHelper.GetNumericOperations<T>();
+        _decompositionType = decompositionType;
+    }
+
+    public void Fit(Matrix<T> X, Vector<T> y)
+    {
+        _X = X;
+        _y = y;
+        _K = CalculateKernelMatrix(X, X);
+    }
+
+    public (T mean, T variance) Predict(Vector<T> x)
+    {
+        var k = CalculateKernelVector(_X, x);
+
+        // Solve _K * alpha = _y
+        var alpha = MatrixSolutionHelper.SolveLinearSystem(_K, _y, _decompositionType);
+        var mean = k.DotProduct(alpha);
+
+        // Solve _K * v = k
+        var v = MatrixSolutionHelper.SolveLinearSystem(_K, k, _decompositionType);
+        var variance = _numOps.Subtract(_kernel.Calculate(x, x), k.DotProduct(v));
+
+        return (mean, variance);
+    }
+
+    public void UpdateKernel(IKernelFunction<T> kernel)
+    {
+        _kernel = kernel;
+        if (_X != null && _y != null)
+        {
+            Fit(_X, _y);
+        }
+    }
+
+    private Matrix<T> CalculateKernelMatrix(Matrix<T> X1, Matrix<T> X2)
+    {
+        var K = new Matrix<T>(X1.Rows, X2.Rows);
+        for (int i = 0; i < X1.Rows; i++)
+        {
+            for (int j = 0; j < X2.Rows; j++)
+            {
+                K[i, j] = _kernel.Calculate(X1.GetRow(i), X2.GetRow(j));
+            }
+        }
+
+        return K;
+    }
+
+    private Vector<T> CalculateKernelVector(Matrix<T> X, Vector<T> x)
+    {
+        var k = new Vector<T>(X.Rows);
+        for (int i = 0; i < X.Rows; i++)
+        {
+            k[i] = _kernel.Calculate(X.GetRow(i), x);
+        }
+
+        return k;
+    }
+}