data.py

import numpy as np
import torch
from scipy.stats import special_ortho_group as rot
from stats_utils import diagonal_matrix


def random_matrix_sv (singular_values, d_input=10, d_output=2, return_svd=False):
    '''
    Generates a random matrix with given singular values
    '''

    # generate random rotation matrices
    # to use as left and right singular vector basis
    U = rot.rvs(d_output)  if d_output > 1 else np.array([[1.]])
    Vh = rot.rvs(d_input).T  if d_input > 1 else np.array([[1.]])

    _S = diagonal_matrix(singular_values, d_output, d_input)

    _mat = np.matmul( U, _S )
    _mat = np.matmul( _mat, Vh )

    if return_svd:
        return _mat, (U, S, Vh)
    else:
        return _mat


class LinearRegressionDataset(torch.utils.data.Dataset):
    '''
    data to be used through a data loader
    - N: dimensionality of the input
    - n_samples: number of samples in the dataset
    '''
    def __init__ (self, w_star, n_samples, cov=None):

        self.w = np.atleast_2d(w_star)
        self.d, self.N = self.w.shape
        if cov is None:
            cov = np.eye(self.N)
        assert len(cov) == self.N, "covariance matrix must have the same dimensions as the input vector"

        X = np.random.multivariate_normal(np.zeros(self.N), cov, size=n_samples)
        y = np.matmul(X, self.w.T)

        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    @property
    def data(self):
        return self.X

    @property
    def targets(self):
        return self.y

    def __len__ (self):
        return len(self.X)

    def __getitem__ (self, i):
        return self.X[i], self.y[i]


class SemanticsDataset (torch.utils.data.Dataset):

    # 0 - grow
    # 1 - move
    # 2 - roots
    # 3 - fly
    # 4 - swims
    # 5 - leaves
    # 6 - petals
    N = 7

    def __init__ (self, n_samples, cov=None):

        # input-output covariance matrix
        IO_cov = np.array([
                [1,1,0,1,0,0,0],
                [1,1,0,0,1,0,0],
                [1,0,1,0,0,1,0],
                [1,0,1,0,0,0,1]
            ]).astype(float).T

        # input-input covariance matrix
        if cov is None:
            cov = np.eye(self.N)

        # target input-output map
        self.w = np.dot( np.linalg.inv(cov), IO_cov ).T

        X = np.random.multivariate_normal(np.zeros(self.N), cov, size=n_samples)
        y = np.matmul(X, self.w.T)

        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    @property
    def data(self):
        return self.X

    @property
    def targets(self):
        return self.y

    def __len__ (self):
        return len(self.X)

    def __getitem__ (self, i):
        return self.X[i], self.y[i]


if __name__ == "__main__":

    d_input = 3
    d_output = 2

    S = np.array([2., 1.])

    w_star, (_U, _, _Vh) = random_matrix_sv(S, d_input=d_input, d_output=d_output, return_svd=True)

    print("w_star")
    print(w_star.shape)
    print(w_star)

    U, S, Vh = np.linalg.svd(w_star)
    print("S")
    print(S)

    print(np.matmul(U.T, _U))
    print(np.allclose(np.eye(d_output), np.matmul(U.T, _U)))

    print(np.matmul(_Vh, _Vh.T))
    print(np.allclose(np.eye(d_input), np.matmul(_Vh, _Vh.T)))