diff --git a/CHANGELOG.txt b/CHANGELOG.txt index edf2f68..aa3320e 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,8 +1,14 @@ Changelog ========= -0.1.4 (xx.xx.xxxx) - IN DEVELOPMENT +0.2.0 (xx.xx.xxxx) - IN DEVELOPMENT +~~~~~~~~~~~~~~~~~~ + +* Added derived parameters +* Added rejection sampling + +0.1.4 (xx.xx.xxxx) ~~~~~~~~~~~~~~~~~~ * Added patience parameter to stop training early when validation loss hasn't improved -* Fixed dynamical stepsize \ No newline at end of file +* Fix: Dynamical stepsize \ No newline at end of file diff --git a/examples/flow/gauss.py b/examples/flow/gauss.py new file mode 100644 index 0000000..d22f108 --- /dev/null +++ b/examples/flow/gauss.py @@ -0,0 +1,68 @@ +import os +import sys +import argparse + +import numpy as np +from scipy.stats import multivariate_normal +import torch +import scipy.special + +sys.path.append(os.getcwd()) + + +def main(args): + + from nnest.trainer import Trainer + from nnest.distributions import GeneralisedNormal + + def loglike(x): + return multivariate_normal.logpdf(x, mean=np.zeros(args.x_dim), cov=np.eye(args.x_dim) + args.corr * (1 - np.eye(args.x_dim))) + + def transform(x): + return 3. * x + + n_samples = args.num_live_points + fraction = args.fraction + + x = 2 * (np.random.uniform(size=(int(n_samples / fraction), 2)) - 0.5) + likes = loglike(transform(x)) + idx = np.argsort(-likes) + samples = x[idx[0:n_samples]] + + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + + t = Trainer(args.x_dim, args.hidden_dim, log_dir=args.log_dir, num_blocks=args.num_blocks, + num_layers=args.num_layers, base_dist=base_dist, scale=args.scale) + t.train(samples, max_iters=args.train_iters) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('--x_dim', type=int, default=2, + help="Dimensionality") + parser.add_argument('--train_iters', type=int, default=1000, + help="number of train iters") + parser.add_argument("--num_live_points", type=int, default=1000) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--num_layers', type=int, default=1) + parser.add_argument('--batch_size', type=int, default=100) + parser.add_argument('-use_gpu', action='store_true') + parser.add_argument('--flow', type=str, default='nvp') + parser.add_argument('--num_blocks', type=int, default=5) + parser.add_argument('--noise', type=float, default=-1) + parser.add_argument('--run_num', type=str, default='') + parser.add_argument('--num_slow', type=int, default=0) + parser.add_argument('--corr', type=float, default=0.99) + parser.add_argument('--log_dir', type=str, default='logs/flow/gauss') + parser.add_argument('--beta', type=float, default=8.0) + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--fraction', type=float, default=0.02) + + args = parser.parse_args() + main(args) diff --git a/examples/flow/himmelblau.py b/examples/flow/himmelblau.py new file mode 100644 index 0000000..81d7f56 --- /dev/null +++ b/examples/flow/himmelblau.py @@ -0,0 +1,68 @@ +import os +import sys +import argparse + +import numpy as np +import torch + +sys.path.append(os.getcwd()) + + +def main(args): + + from nnest.trainer import Trainer + from nnest.distributions import GeneralisedNormal + + def loglike(z): + z1 = z[:, 0] + z2 = z[:, 1] + return - (z1 ** 2 + z2 - 11.) ** 2 - (z1 + z2 ** 2 - 7.) ** 2 + + def transform(x): + return 5. * x + + n_samples = args.num_live_points + fraction = args.fraction + + x = 2 * (np.random.uniform(size=(int(n_samples / fraction), 2)) - 0.5) + likes = loglike(transform(x)) + idx = np.argsort(-likes) + samples = x[idx[0:n_samples]] + + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + + t = Trainer(args.x_dim, args.hidden_dim, log_dir=args.log_dir, num_blocks=args.num_blocks, + num_layers=args.num_layers, base_dist=base_dist, scale=args.scale) + t.train(samples, max_iters=args.train_iters) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('--x_dim', type=int, default=2, + help="Dimensionality") + parser.add_argument('--train_iters', type=int, default=1000, + help="number of train iters") + parser.add_argument("--num_live_points", type=int, default=1000) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--num_layers', type=int, default=1) + parser.add_argument('--batch_size', type=int, default=100) + parser.add_argument('-use_gpu', action='store_true') + parser.add_argument('--flow', type=str, default='nvp') + parser.add_argument('--num_blocks', type=int, default=5) + parser.add_argument('--noise', type=float, default=-1) + parser.add_argument('--run_num', type=str, default='') + parser.add_argument('--num_slow', type=int, default=0) + parser.add_argument('--corr', type=float, default=0.99) + parser.add_argument('--log_dir', type=str, default='logs/flow/himmelblau') + parser.add_argument('--beta', type=float, default=8.0) + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--fraction', type=float, default=0.02) + + args = parser.parse_args() + main(args) diff --git a/examples/flow/mog4.py b/examples/flow/mog4.py new file mode 100644 index 0000000..544c126 --- /dev/null +++ b/examples/flow/mog4.py @@ -0,0 +1,121 @@ +import os +import sys +import argparse + +import numpy as np +import torch + +sys.path.append(os.getcwd()) + + +def log_gaussian_pdf(theta, sigma=1, mu=0, ndim=None): + if ndim is None: + try: + ndim = len(theta) + except TypeError: + assert isinstance(theta, (float, int)), theta + ndim = 1 + logl = -(np.sum((theta - mu) ** 2) / (2 * sigma ** 2)) + logl -= np.log(2 * np.pi * (sigma ** 2)) * ndim / 2.0 + return logl + + +class Gaussian(object): + + def __init__(self, sigma=1.0, nderived=0): + self.sigma = sigma + self.nderived = nderived + + def __call__(self, theta): + logl = log_gaussian_pdf(theta, sigma=self.sigma, mu=0) + return logl, [0.0] * self.nderived + + +class GaussianMix(object): + + def __init__(self, sep=4, weights=(0.4, 0.3, 0.2, 0.1), sigma=1, + nderived=0): + assert len(weights) in [2, 3, 4], ( + 'Weights must have 2, 3 or 4 components. Weights=' + str(weights)) + assert np.isclose(sum(weights), 1), ( + 'Weights must sum to 1! Weights=' + str(weights)) + self.nderived = nderived + self.weights = weights + self.sigmas = [sigma] * len(weights) + positions = [] + positions.append(np.asarray([0, sep])) + positions.append(np.asarray([0, -sep])) + positions.append(np.asarray([sep, 0])) + positions.append(np.asarray([-sep, 0])) + self.positions = positions[:len(weights)] + + def __call__(self, theta): + thetas = [] + for pos in self.positions: + thetas.append(copy.deepcopy(theta)) + thetas[-1][:2] -= pos + logls = [(Gaussian(sigma=self.sigmas[i])(thetas[i])[0] + + np.log(self.weights[i])) for i in range(len(self.weights))] + logl = scipy.special.logsumexp(logls) + return logl, [0.0] * self.nderived + + + +def main(args): + + from nnest.trainer import Trainer + from nnest.distributions import GeneralisedNormal + + g = GaussianMix() + + def loglike(z): + return np.array([g(x)[0] for x in z]) + + def transform(x): + return 10. * x + + n_samples = args.num_live_points + fraction = args.fraction + + x = 2 * (np.random.uniform(size=(int(n_samples / fraction), 2)) - 0.5) + likes = loglike(transform(x)) + idx = np.argsort(-likes) + samples = x[idx[0:n_samples]] + + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + + t = Trainer(args.x_dim, args.hidden_dim, log_dir=args.log_dir, num_blocks=args.num_blocks, + num_layers=args.num_layers, base_dist=base_dist, scale=args.scale) + t.train(samples, max_iters=args.train_iters) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('--x_dim', type=int, default=2, + help="Dimensionality") + parser.add_argument('--train_iters', type=int, default=1000, + help="number of train iters") + parser.add_argument("--num_live_points", type=int, default=1000) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--num_layers', type=int, default=1) + parser.add_argument('--batch_size', type=int, default=100) + parser.add_argument('-use_gpu', action='store_true') + parser.add_argument('--flow', type=str, default='nvp') + parser.add_argument('--num_blocks', type=int, default=5) + parser.add_argument('--noise', type=float, default=-1) + parser.add_argument('--run_num', type=str, default='') + parser.add_argument('--num_slow', type=int, default=0) + parser.add_argument('--corr', type=float, default=0.99) + parser.add_argument('--log_dir', type=str, default='logs/flow/gauss') + parser.add_argument('--beta', type=float, default=8.0) + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--fraction', type=float, default=0.02) + + args = parser.parse_args() + main(args) diff --git a/examples/flow/rosenbrock.py b/examples/flow/rosenbrock.py new file mode 100644 index 0000000..7af7bf4 --- /dev/null +++ b/examples/flow/rosenbrock.py @@ -0,0 +1,66 @@ +import os +import sys +import argparse + +import numpy as np +import torch + +sys.path.append(os.getcwd()) + + +def main(args): + + from nnest.trainer import Trainer + from nnest.distributions import GeneralisedNormal + + def loglike(z): + return np.array([-sum(100.0 * (x[1:] - x[:-1] ** 2.0) ** 2.0 + (1 - x[:-1]) ** 2.0) for x in z]) + + def transform(x): + return 5. * x + + n_samples = args.num_live_points + fraction = args.fraction + + x = 2 * (np.random.uniform(size=(int(n_samples / fraction), 2)) - 0.5) + likes = loglike(transform(x)) + idx = np.argsort(-likes) + samples = x[idx[0:n_samples]] + + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + + t = Trainer(args.x_dim, args.hidden_dim, log_dir=args.log_dir, num_blocks=args.num_blocks, + num_layers=args.num_layers, base_dist=base_dist, scale=args.scale) + t.train(samples, max_iters=args.train_iters) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('--x_dim', type=int, default=2, + help="Dimensionality") + parser.add_argument('--train_iters', type=int, default=1000, + help="number of train iters") + parser.add_argument("--num_live_points", type=int, default=1000) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--num_layers', type=int, default=1) + parser.add_argument('--batch_size', type=int, default=100) + parser.add_argument('-use_gpu', action='store_true') + parser.add_argument('--flow', type=str, default='nvp') + parser.add_argument('--num_blocks', type=int, default=5) + parser.add_argument('--noise', type=float, default=-1) + parser.add_argument('--run_num', type=str, default='') + parser.add_argument('--num_slow', type=int, default=0) + parser.add_argument('--corr', type=float, default=0.99) + parser.add_argument('--log_dir', type=str, default='logs/flow/rosenbrock') + parser.add_argument('--beta', type=float, default=8.0) + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--fraction', type=float, default=0.02) + + args = parser.parse_args() + main(args) diff --git a/examples/nested/gauss.py b/examples/nested/gauss.py index 0ec17f2..47b7035 100644 --- a/examples/nested/gauss.py +++ b/examples/nested/gauss.py @@ -4,6 +4,7 @@ import numpy as np from scipy.stats import multivariate_normal +import torch sys.path.append(os.getcwd()) @@ -11,6 +12,7 @@ def main(args): from nnest import NestedSampler + from nnest.distributions import GeneralisedNormal def loglike(x): return multivariate_normal.logpdf(x, mean=np.zeros(args.x_dim), cov=np.eye(args.x_dim) + args.corr * (1 - np.eye(args.x_dim))) @@ -18,9 +20,14 @@ def loglike(x): def transform(x): return 3. * x + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + sampler = NestedSampler(args.x_dim, loglike, transform=transform, log_dir=args.log_dir, num_live_points=args.num_live_points, hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_blocks=args.num_blocks, num_slow=args.num_slow, - use_gpu=args.use_gpu) + use_gpu=args.use_gpu, base_dist=base_dist, scale=args.scale) sampler.run(train_iters=args.train_iters, mcmc_steps=args.mcmc_steps, volume_switch=args.switch, noise=args.noise) @@ -46,6 +53,11 @@ def transform(x): parser.add_argument('--num_slow', type=int, default=0) parser.add_argument('--corr', type=float, default=0.99) parser.add_argument('--log_dir', type=str, default='logs/gauss') + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--beta', type=float, default=8.0) args = parser.parse_args() main(args) + + print('Expected log Z: %5.4f' % (args.x_dim * np.log(6))) diff --git a/examples/nested/himmelblau.py b/examples/nested/himmelblau.py index 5755185..ac90505 100644 --- a/examples/nested/himmelblau.py +++ b/examples/nested/himmelblau.py @@ -2,12 +2,15 @@ import sys import argparse +import torch + sys.path.append(os.getcwd()) def main(args): from nnest import NestedSampler + from nnest.distributions import GeneralisedNormal def loglike(z): z1 = z[:, 0] @@ -17,11 +20,17 @@ def loglike(z): def transform(x): return 5. * x + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + sampler = NestedSampler(args.x_dim, loglike, transform=transform, log_dir=args.log_dir, num_live_points=args.num_live_points, hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_blocks=args.num_blocks, num_slow=args.num_slow, - use_gpu=args.use_gpu) + use_gpu=args.use_gpu, base_dist=base_dist, scale=args.scale) sampler.run(train_iters=args.train_iters, mcmc_steps=args.mcmc_steps, volume_switch=args.switch, noise=args.noise, - num_test_samples=args.test_samples, test_mcmc_steps=args.test_mcmc_steps) + num_test_mcmc_samples=args.test_samples, test_mcmc_steps=args.test_mcmc_steps) + if __name__ == '__main__': @@ -46,6 +55,9 @@ def transform(x): parser.add_argument('--run_num', type=str, default='') parser.add_argument('--num_slow', type=int, default=0) parser.add_argument('--log_dir', type=str, default='logs/himmelblau') + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--beta', type=float, default=8.0) args = parser.parse_args() main(args) diff --git a/examples/nested/mog4.py b/examples/nested/mog4.py index d874e34..216a3bb 100644 --- a/examples/nested/mog4.py +++ b/examples/nested/mog4.py @@ -5,6 +5,7 @@ import numpy as np import scipy.special +import torch sys.path.append(os.getcwd()) @@ -34,12 +35,9 @@ def __call__(self, theta): class GaussianMix(object): - def __init__(self, sep=4, weights=(0.4, 0.3, 0.2, 0.1), sigma=1, - nderived=0): - assert len(weights) in [2, 3, 4], ( - 'Weights must have 2, 3 or 4 components. Weights=' + str(weights)) - assert np.isclose(sum(weights), 1), ( - 'Weights must sum to 1! Weights=' + str(weights)) + def __init__(self, sep=4, weights=(0.4, 0.3, 0.2, 0.1), sigma=1, nderived=0): + assert len(weights) in [2, 3, 4], ('Weights must have 2, 3 or 4 components. Weights=' + str(weights)) + assert np.isclose(sum(weights), 1), ('Weights must sum to 1! Weights=' + str(weights)) self.nderived = nderived self.weights = weights self.sigmas = [sigma] * len(weights) @@ -64,6 +62,7 @@ def __call__(self, theta): def main(args): from nnest import NestedSampler + from nnest.distributions import GeneralisedNormal g = GaussianMix() @@ -73,9 +72,14 @@ def loglike(z): def transform(x): return 10. * x + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + sampler = NestedSampler(args.x_dim, loglike, transform=transform, log_dir=args.log_dir, num_live_points=args.num_live_points, hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_blocks=args.num_blocks, num_slow=args.num_slow, - use_gpu=args.use_gpu) + use_gpu=args.use_gpu, base_dist=base_dist, scale=args.scale) sampler.run(train_iters=args.train_iters, mcmc_steps=args.mcmc_steps, volume_switch=args.switch, noise=args.noise) @@ -101,6 +105,11 @@ def transform(x): parser.add_argument('--run_num', type=str, default='') parser.add_argument('--num_slow', type=int, default=0) parser.add_argument('--log_dir', type=str, default='logs/mog4') + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--beta', type=float, default=8.0) args = parser.parse_args() main(args) + + print('Expected log Z: %5.4f' % (args.x_dim * np.log(20))) diff --git a/examples/nested/mog4_derived.py b/examples/nested/mog4_derived.py new file mode 100644 index 0000000..dd962c1 --- /dev/null +++ b/examples/nested/mog4_derived.py @@ -0,0 +1,117 @@ +import os +import sys +import argparse +import copy + +import numpy as np +import scipy.special +import torch + +sys.path.append(os.getcwd()) + + +def log_gaussian_pdf(theta, sigma=1, mu=0, ndim=None): + if ndim is None: + try: + ndim = len(theta) + except TypeError: + assert isinstance(theta, (float, int)), theta + ndim = 1 + logl = -(np.sum((theta - mu) ** 2) / (2 * sigma ** 2)) + logl -= np.log(2 * np.pi * (sigma ** 2)) * ndim / 2.0 + return logl + + +class Gaussian(object): + + def __init__(self, sigma=1.0, nderived=0): + self.sigma = sigma + self.nderived = nderived + + def __call__(self, theta): + logl = log_gaussian_pdf(theta, sigma=self.sigma, mu=0) + return logl, [0.0] * self.nderived + + +class GaussianMix(object): + + def __init__(self, sep=4, weights=(0.4, 0.3, 0.2, 0.1), sigma=1, nderived=0): + assert len(weights) in [2, 3, 4], ('Weights must have 2, 3 or 4 components. Weights=' + str(weights)) + assert np.isclose(sum(weights), 1), ('Weights must sum to 1! Weights=' + str(weights)) + self.nderived = nderived + self.weights = weights + self.sigmas = [sigma] * len(weights) + positions = [] + positions.append(np.asarray([0, sep])) + positions.append(np.asarray([0, -sep])) + positions.append(np.asarray([sep, 0])) + positions.append(np.asarray([-sep, 0])) + self.positions = positions[:len(weights)] + + def __call__(self, theta): + thetas = [] + for pos in self.positions: + thetas.append(copy.deepcopy(theta)) + thetas[-1][:2] -= pos + logls = [(Gaussian(sigma=self.sigmas[i])(thetas[i])[0] + + np.log(self.weights[i])) for i in range(len(self.weights))] + logl = scipy.special.logsumexp(logls) + return logl, [0.0] * self.nderived + + +def main(args): + + from nnest import NestedSampler + from nnest.distributions import GeneralisedNormal + + g = GaussianMix(nderived=args.num_derived) + + def loglike(z): + return np.array([g(x)[0] for x in z]), np.array([g(x)[1] for x in z]) + + def transform(x): + return 10. * x + + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + + sampler = NestedSampler(args.x_dim, loglike, transform=transform, log_dir=args.log_dir, num_live_points=args.num_live_points, + hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_blocks=args.num_blocks, + num_slow=args.num_slow, num_derived=args.num_derived, + use_gpu=args.use_gpu, base_dist=base_dist, scale=args.scale) + sampler.run(train_iters=args.train_iters, mcmc_steps=args.mcmc_steps, volume_switch=args.switch, noise=args.noise) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + + parser.add_argument('--x_dim', type=int, default=5, + help="Dimensionality") + parser.add_argument('--train_iters', type=int, default=2000, + help="number of train iters") + parser.add_argument("--mcmc_steps", type=int, default=0) + parser.add_argument("--num_live_points", type=int, default=1000) + parser.add_argument('--switch', type=float, default=-1) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--num_layers', type=int, default=2) + parser.add_argument('--batch_size', type=int, default=100) + parser.add_argument('-use_gpu', action='store_true') + parser.add_argument('--flow', type=str, default='nvp') + parser.add_argument('--num_blocks', type=int, default=5) + parser.add_argument('--noise', type=float, default=-1) + parser.add_argument("--test_samples", type=int, default=0) + parser.add_argument('--run_num', type=str, default='') + parser.add_argument('--num_slow', type=int, default=0) + parser.add_argument('--num_derived', type=int, default=1) + parser.add_argument('--log_dir', type=str, default='logs/mog4') + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--beta', type=float, default=8.0) + + args = parser.parse_args() + main(args) + + print('Expected log Z: %5.4f' % (args.x_dim * np.log(20))) diff --git a/examples/nested/mog4_fast.py b/examples/nested/mog4_fast.py index 9066749..0cc2914 100644 --- a/examples/nested/mog4_fast.py +++ b/examples/nested/mog4_fast.py @@ -34,12 +34,9 @@ def __call__(self, theta): class GaussianMix(object): - def __init__(self, sep=4, weights=(0.4, 0.3, 0.2, 0.1), sigma=1, - nderived=0): - assert len(weights) in [2, 3, 4], ( - 'Weights must have 2, 3 or 4 components. Weights=' + str(weights)) - assert np.isclose(sum(weights), 1), ( - 'Weights must sum to 1! Weights=' + str(weights)) + def __init__(self, sep=4, weights=(0.4, 0.3, 0.2, 0.1), sigma=1, nderived=0): + assert len(weights) in [2, 3, 4], ('Weights must have 2, 3 or 4 components. Weights=' + str(weights)) + assert np.isclose(sum(weights), 1), ('Weights must sum to 1! Weights=' + str(weights)) self.nderived = nderived self.weights = weights self.sigmas = [sigma] * len(weights) diff --git a/examples/nested/rosenbrock.py b/examples/nested/rosenbrock.py index 4d9d7c3..b20bfca 100644 --- a/examples/nested/rosenbrock.py +++ b/examples/nested/rosenbrock.py @@ -3,12 +3,15 @@ import argparse import numpy as np +import torch sys.path.append(os.getcwd()) def main(args): + from nnest import NestedSampler + from nnest.distributions import GeneralisedNormal def loglike(z): return np.array([-sum(100.0 * (x[1:] - x[:-1] ** 2.0) ** 2.0 + (1 - x[:-1]) ** 2.0) for x in z]) @@ -16,11 +19,16 @@ def loglike(z): def transform(x): return 5. * x + if args.base_dist == 'gen_normal': + base_dist = GeneralisedNormal(torch.zeros(args.x_dim), torch.ones(args.x_dim), torch.tensor(args.beta)) + else: + base_dist = None + sampler = NestedSampler(args.x_dim, loglike, transform=transform, log_dir=args.log_dir, num_live_points=args.num_live_points, hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_blocks=args.num_blocks, num_slow=args.num_slow, - use_gpu=args.use_gpu) + use_gpu=args.use_gpu, base_dist=base_dist, scale=args.scale) sampler.run(train_iters=args.train_iters, mcmc_steps=args.mcmc_steps, volume_switch=args.switch, noise=args.noise, - num_test_samples=args.test_samples, test_mcmc_steps=args.test_mcmc_steps) + num_test_mcmc_samples=args.test_samples, test_mcmc_steps=args.test_mcmc_steps) if __name__ == '__main__': @@ -44,6 +52,9 @@ def transform(x): parser.add_argument('--run_num', type=str, default='') parser.add_argument('--num_slow', type=int, default=0) parser.add_argument('--log_dir', type=str, default='logs/rosenbrock') + parser.add_argument('--base_dist', type=str, default='') + parser.add_argument('--scale', type=str, default='constant') + parser.add_argument('--beta', type=float, default=8.0) args = parser.parse_args() main(args) diff --git a/nnest/__init__.py b/nnest/__init__.py index 98ee16d..5e39b62 100644 --- a/nnest/__init__.py +++ b/nnest/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.1.4' +__version__ = '0.2.0' from nnest.nested import NestedSampler -from nnest.mcmc import MCMCSampler +from nnest.mcmc import MCMCSampler \ No newline at end of file diff --git a/nnest/distributions/__init__.py b/nnest/distributions/__init__.py new file mode 100644 index 0000000..5a460b2 --- /dev/null +++ b/nnest/distributions/__init__.py @@ -0,0 +1 @@ +from .generalised_normal import GeneralisedNormal \ No newline at end of file diff --git a/nnest/distributions/generalised_normal.py b/nnest/distributions/generalised_normal.py new file mode 100644 index 0000000..29cab4c --- /dev/null +++ b/nnest/distributions/generalised_normal.py @@ -0,0 +1,83 @@ +import math +from numbers import Number + +import torch +from torch.distributions import constraints +from torch.distributions.exp_family import ExponentialFamily +from torch.distributions.utils import _standard_normal, broadcast_all +from scipy.stats import gennorm +import numpy as np + + +class GeneralisedNormal(ExponentialFamily): + r""" + Creates a generalised normal (also called Gaussian) distribution parameterized by + :attr:`loc` and :attr:`scale` and :attr:`beta`. + Example:: + >>> m = GeneralisedNormal(torch.tensor([0.0]), torch.tensor([1.0]), 2) + Args: + loc (float or Tensor): mean of the distribution (often referred to as mu) + scale (float or Tensor): + beta (float or Tensor): + """ + arg_constraints = {'loc': constraints.real, 'scale': constraints.positive, 'beta': constraints.real} + support = constraints.real + has_rsample = True + _mean_carrier_measure = 0 + + @property + def mean(self): + return self.loc + + @property + def stddev(self): + return self.scale + + @property + def variance(self): + return self.stddev.pow(2) + + def __init__(self, loc, scale, beta, validate_args=None): + self.loc, self.scale = broadcast_all(loc, scale) + self.beta = beta + if isinstance(loc, Number) and isinstance(scale, Number): + batch_shape = torch.Size() + else: + batch_shape = self.loc.size() + super(GeneralisedNormal, self).__init__(batch_shape, validate_args=validate_args) + + def sample(self, sample_shape=torch.Size()): + shape = self._extended_shape(sample_shape) + with torch.no_grad(): + return torch.tensor(gennorm.rvs(self.beta, size=shape), dtype=torch.float32, device=self.loc.device) + + def rsample(self, sample_shape=torch.Size()): + raise NotImplementedError + + def usample(self, sample_shape=torch.Size()): + shape = self._extended_shape(sample_shape) + return 2 * (np.random.uniform(size=shape) - 0.5) + + def log_prob(self, value): + if self._validate_args: + self._validate_sample(value) + log_scale = math.log(self.scale) if isinstance(self.scale, Number) else self.scale.log() + log_beta = math.log(self.beta) if isinstance(self.beta, Number) else self.beta.log() + log_gamma = math.log(1.0 / self.beta) if isinstance(self.beta, Number) else torch.mvlgamma(1.0 / self.beta, 1) + return -((torch.abs(value - self.loc) / (self.scale)) ** self.beta) + log_beta - log_scale - math.log(2) - log_gamma + + def cdf(self, value): + raise NotImplementedError + + def icdf(self, value): + raise NotImplementedError + + def entropy(self): + raise NotImplementedError + + @property + def _natural_params(self): + raise NotImplementedError + + def _log_normalizer(self, x, y): + raise NotImplementedError diff --git a/nnest/mcmc.py b/nnest/mcmc.py index c7cc00e..05f7623 100644 --- a/nnest/mcmc.py +++ b/nnest/mcmc.py @@ -34,15 +34,17 @@ def __init__(self, num_blocks=5, num_layers=2, log_dir='logs/test', + base_dist=None, + scale='', use_gpu=False): self.sampler = 'mcmc' super(MCMCSampler, self).__init__(x_dim, loglike, transform=transform, append_run_num=append_run_num, - run_num=run_num, hidden_dim=hidden_dim, num_slow=num_slow, + run_num=run_num, hidden_dim=hidden_dim, num_slow=num_slow, num_derived=num_derived, batch_size=batch_size, flow=flow, num_blocks=num_blocks, num_layers=num_layers, log_dir=log_dir, - use_gpu=use_gpu) + use_gpu=use_gpu, base_dist=base_dist, scale=scale) def _init_samples(self, mcmc_steps=5000, mcmc_batch_size=5, ignore_rows=0.3): u = 2 * (np.random.uniform(size=(mcmc_batch_size, self.x_dim)) - 0.5) @@ -128,7 +130,7 @@ def run( mc = self._init_samples(mcmc_steps=bootstrap_mcmc_steps, mcmc_batch_size=bootstrap_batch_size, ignore_rows=ignore_rows) else: - samples, likes, scale, nc = self.trainer.sample( + samples, likes, scale, nc = self.trainer.mcmc_sample( loglike=self.loglike, transform=transform, mcmc_steps=bootstrap_mcmc_steps, alpha=alpha, dynamic=False, show_progress=True) samples = transform(samples) @@ -148,7 +150,7 @@ def run( def transform(x): return x * std + mean - samples, likes, scale, nc = self.trainer.sample( + samples, likes, scale, nc = self.trainer.mcmc_sample( loglike=self.loglike, transform=transform, mcmc_steps=mcmc_steps, alpha=alpha, dynamic=False, show_progress=True, out_chain=os.path.join(self.logs['chains'], 'chain')) diff --git a/nnest/nested.py b/nnest/nested.py index 78b0dfd..74eff90 100644 --- a/nnest/nested.py +++ b/nnest/nested.py @@ -34,6 +34,8 @@ def __init__(self, num_layers=2, log_dir='logs/test', use_gpu=False, + base_dist=None, + scale='', num_live_points=1000 ): @@ -41,10 +43,10 @@ def __init__(self, self.sampler = 'nested' super(NestedSampler, self).__init__(x_dim, loglike, transform=transform, append_run_num=append_run_num, - run_num=run_num, hidden_dim=hidden_dim, num_slow=num_slow, + run_num=run_num, hidden_dim=hidden_dim, num_slow=num_slow, num_derived=num_derived, batch_size=batch_size, flow=flow, num_blocks=num_blocks, num_layers=num_layers, log_dir=log_dir, - use_gpu=use_gpu) + use_gpu=use_gpu, base_dist=base_dist, scale=scale) if self.log: self.logger.info('Num live points [%d]' % (self.num_live_points)) @@ -57,6 +59,7 @@ def __init__(self, def run( self, + method=None, mcmc_steps=0, mcmc_burn_in=0, mcmc_batch_size=10, @@ -64,14 +67,17 @@ def run( update_interval=None, log_interval=None, dlogz=0.5, - train_iters=50, - volume_switch=0, + train_iters=500, + volume_switch=-1.0, alpha=0.0, noise=-1.0, - num_test_samples=0, + num_test_mcmc_samples=0, test_mcmc_steps=1000, test_mcmc_burn_in=0): + if method is None: + method = 'rejection_prior' + if update_interval is None: update_interval = max(1, round(self.num_live_points)) else: @@ -89,14 +95,13 @@ def run( if mcmc_steps <= 0: mcmc_steps = 5 * self.x_dim - if volume_switch <= 0: - volume_switch = 1 / mcmc_steps - if alpha == 0.0: alpha = 2 / self.x_dim ** 0.5 if self.log: - self.logger.info('MCMC steps [%d] alpha [%5.4f] volume switch [%5.4f]' % (mcmc_steps, alpha, volume_switch)) + self.logger.info('MCMC steps [%d]' % mcmc_steps) + self.logger.info('Initial scale [%5.4f]' % alpha) + self.logger.info('Volume switch [%5.4f]' % volume_switch) if self.use_mpi: self.logger.info('Using MPI with rank [%d]' % (self.mpi_rank)) @@ -122,7 +127,7 @@ def run( recv_active_logl = self.comm.bcast(recv_active_logl, root=0) active_logl = np.concatenate(recv_active_logl, axis=0) else: - active_logl = self.loglike(active_v) + active_logl, active_derived = self.loglike(active_v) saved_v = [] # Stored points for posterior results saved_logl = [] @@ -134,6 +139,8 @@ def run( ncall = self.num_live_points # number of calls we already made first_time = True nb = self.mpi_size * mcmc_batch_size + rejection_sample = volume_switch < 1 + ncs = [] for it in range(0, max_iters): @@ -147,7 +154,11 @@ def run( logz = logz_new # Add worst object to samples. - saved_v.append(np.array(active_v[worst])) + + if self.num_derived > 0: + saved_v.append(np.concatenate((active_v[worst], active_derived[worst]))) + else: + saved_v.append(np.array(active_v[worst], copy=True)) saved_logwt.append(logwt) saved_logl.append(active_logl[worst]) @@ -156,17 +167,47 @@ def run( # The new likelihood constraint is that of the worst object. loglstar = active_logl[worst] - if expected_vol > volume_switch: + # Train flow + if first_time or it % update_interval == 0: + self.trainer.train(active_u, max_iters=train_iters, noise=noise) + if num_test_mcmc_samples > 0: + # Test multiple chains from worst point to check mixing + init_x = np.concatenate( + [active_u[worst:worst + 1, :] for i in range(num_test_mcmc_samples)]) + test_samples, _, _, _, scale, _ = self.trainer.mcmc_sample( + self.loglike, init_x=init_x, loglstar=loglstar, + transform=self.transform, mcmc_steps=test_mcmc_steps + test_mcmc_burn_in, + max_prior=1, alpha=alpha) + np.save(os.path.join(self.logs['chains'], 'test_samples.npy'), test_samples) + self._chain_stats(test_samples, mean=np.mean(active_u, axis=0), std=np.std(active_u, axis=0)) + first_time = False + + if method == 'rejection_prior' or method == 'rejection_flow': + + # Rejection sampling nc = 0 - # Simple rejection sampling over prior - while True: - u = 2 * (np.random.uniform(size=(1, self.x_dim)) - 0.5) - v = self.transform(u) - logl = self.loglike(v) - nc += 1 - if logl > loglstar: - break + + if method == 'rejection_prior': + + # Simple rejection sampling over prior + while True: + u = 2 * (np.random.uniform(size=(1, self.x_dim)) - 0.5) + v = self.transform(u) + logl, der = self.loglike(v) + nc += 1 + if logl > loglstar: + break + + else: + + # Rejection sampling using flow + u, logl, nc = self.trainer.rejection_sample(self.loglike, loglstar, transform=self.transform, + init_x=active_u, max_prior=1) + + ncs.append(nc) + mean_calls = np.mean(ncs[-10:]) + if self.use_mpi: recv_samples = self.comm.gather(u, root=0) recv_likes = self.comm.gather(logl, root=0) @@ -179,36 +220,29 @@ def run( ncall += sum(recv_nc) else: samples = np.array(u) + derived_samples = np.array(der) likes = np.array(logl) ncall += nc for ib in range(0, self.mpi_size): active_u[worst] = samples[ib, :] active_v[worst] = self.transform(active_u[worst]) active_logl[worst] = likes[ib] + if self.num_derived > 0: + active_derived[worst] = derived_samples[ib, :] if it % log_interval == 0 and self.log: self.logger.info( - 'Step [%d] loglstar [%5.4f] max logl [%5.4f] logz [%5.4f] vol [%6.5f] ncalls [%d]]' % - (it, loglstar, np.max(active_logl), logz, expected_vol, ncall)) + 'Step [%d] loglstar [%5.4e] max logl [%5.4e] logz [%5.4e] vol [%6.5e] ncalls [%d] mean ' + 'calls [%5.4f]' % (it, loglstar, np.max(active_logl), logz, expected_vol, ncall, mean_calls)) - else: + if expected_vol < volume_switch >= 0 or (volume_switch < 0 and mean_calls > mcmc_steps): + # Switch to MCMC if rejection sampling becomes inefficient + self.logger.info('Switching to MCMC sampling') + method = 'mcmc' - # MCMC - if first_time or it % update_interval == 0: - self.trainer.train(active_u, max_iters=train_iters, noise=noise) - if num_test_samples > 0: - # Test multiple chains from worst point to check mixing - init_x = np.concatenate( - [active_u[worst:worst + 1, :] for i in range(num_test_samples)]) - logl = np.concatenate( - [active_logl[worst:worst + 1] for i in range(num_test_samples)]) - test_samples, _, scale, _ = self.trainer.sample( - loglike=self.loglike, init_x=init_x, logl=logl, loglstar=loglstar, - transform=self.transform, mcmc_steps=test_mcmc_steps + test_mcmc_burn_in, - max_prior=1, alpha=alpha) - np.save(os.path.join(self.logs['chains'], 'test_samples.npy'), test_samples) - self._chain_stats(test_samples, mean=np.mean(active_u, axis=0), std=np.std(active_u, axis=0)) - first_time = False + elif method == 'mcmc': + + # MCMC sampling accept = False while not accept: @@ -218,9 +252,8 @@ def run( idx = np.random.randint( low=0, high=self.num_live_points, size=mcmc_batch_size) init_x = active_u[idx, :] - logl = active_logl[idx] - samples, likes, scale, nc = self.trainer.sample( - loglike=self.loglike, init_x=init_x, logl=logl, loglstar=loglstar, + samples, derived_samples, likes, scale, nc = self.trainer.mcmc_sample( + loglike=self.loglike, init_x=init_x, loglstar=loglstar, transform=self.transform, mcmc_steps=mcmc_steps + mcmc_burn_in, max_prior=1, alpha=alpha) if self.use_mpi: @@ -241,26 +274,25 @@ def run( active_u[worst] = samples[ib, -1, :] active_v[worst] = self.transform(active_u[worst]) active_logl[worst] = likes[ib, -1] + if self.num_derived > 0: + active_derived[worst] = derived_samples[ib, -1, :] accept = True break if it % log_interval == 0 and self.log: acceptance, ess, jump_distance = self._chain_stats(samples, mean=np.mean(active_u, axis=0), std=np.std(active_u, axis=0)) - np.save( - os.path.join( - self.logs['extra'], - 'active_%s.npy' % - it), - active_v) self.logger.info( - 'Step [%d] loglstar [%5.4f] maxlogl [%5.4f] logz [%5.4f] vol [%6.5f] ncalls [%d] scale [%5.4f]' % - (it, loglstar, np.max(active_logl), logz, expected_vol, ncall, scale)) + 'Step [%d] loglstar [%5.4e] maxlogl [%5.4e] logz [%5.4e] vol [%6.5e] ncalls [%d] ' + 'scale [%5.4f]' % (it, loglstar, np.max(active_logl), logz, expected_vol, ncall, scale)) with open(os.path.join(self.logs['results'], 'results.csv'), 'a') as f: writer = csv.writer(f) writer.writerow([it, acceptance, np.min(ess), np.max( ess), jump_distance, scale, loglstar, logz, fraction_remain, ncall]) - self._save_samples(np.array(saved_v), np.exp(np.array(saved_logwt) - logz), np.array(saved_logl)) + + if it % log_interval == 0 and self.log: + np.save(os.path.join(self.logs['extra'], 'active_%s.npy' % it), active_v) + self._save_samples(np.array(saved_v), np.exp(np.array(saved_logwt) - logz), np.array(saved_logl)) # Shrink interval logvol -= 1.0 / self.num_live_points diff --git a/nnest/networks.py b/nnest/networks.py index edca26d..4bd8aa2 100755 --- a/nnest/networks.py +++ b/nnest/networks.py @@ -10,6 +10,7 @@ import torch import torch.nn as nn +from torch import distributions class BatchNormFlow(nn.Module): @@ -34,7 +35,7 @@ def forward(self, inputs, cond_inputs=None, mode='direct'): if self.training: self.batch_mean = inputs.mean(0) self.batch_var = ( - inputs - self.batch_mean).pow(2).mean(0) + self.eps + inputs - self.batch_mean).pow(2).mean(0) + self.eps self.running_mean.mul_(self.momentum) self.running_var.mul_(self.momentum) @@ -99,18 +100,20 @@ def __init__(self, total_inputs = num_inputs + num_cond_inputs else: total_inputs = num_inputs - - scale_layers = [nn.Linear(total_inputs, num_hidden), s_act_func()] - for i in range(0, num_layers): - scale_layers += [nn.Linear(num_hidden, num_hidden), s_act_func()] - scale_layers += [nn.Linear(num_hidden, num_inputs)] - self.scale_net = nn.Sequential(*scale_layers) + + if not translate_only: + scale_layers = [nn.Linear(total_inputs, num_hidden), s_act_func()] + for i in range(0, num_layers): + scale_layers += [nn.Linear(num_hidden, num_hidden), s_act_func()] + scale_layers += [nn.Linear(num_hidden, num_inputs)] + self.scale_net = nn.Sequential(*scale_layers) + translate_layers = [nn.Linear(total_inputs, num_hidden), t_act_func()] for i in range(0, num_layers): translate_layers += [nn.Linear(num_hidden, num_hidden), t_act_func()] translate_layers += [nn.Linear(num_hidden, num_inputs)] self.translate_net = nn.Sequential(*translate_layers) - + def init(m): if isinstance(m, nn.Linear): m.bias.data.fill_(0) @@ -127,7 +130,7 @@ def forward(self, inputs, cond_inputs=None, mode='direct'): if self.translate_only: if mode == 'direct': - return inputs + t, 0 + return inputs + t, 0 else: return inputs - t, 0 else: @@ -140,6 +143,22 @@ def forward(self, inputs, cond_inputs=None, mode='direct'): return (inputs - t) * s, -log_s.sum(-1, keepdim=True) +class ScaleLayer(nn.Module): + + def __init__(self): + super(ScaleLayer, self).__init__() + + self.scale = nn.Parameter(torch.tensor(0.0), requires_grad=True) + + def forward(self, inputs, cond_inputs=None, mode='direct'): + if mode == 'direct': + s = torch.exp(self.scale) + return inputs * s, self.scale.sum(-1, keepdim=True) + else: + s = torch.exp(-self.scale) + return inputs * s, -self.scale.sum(-1, keepdim=True) + + class FlowSequential(nn.Sequential): """ A sequential container for flows. In addition to a forward pass it implements a backward pass and @@ -152,7 +171,6 @@ def forward(self, inputs, cond_inputs=None, mode='direct', logdets=None): inputs: a tuple of inputs and logdets mode: to run direct computation or inverse """ - self.num_inputs = inputs.size(-1) if logdets is None: logdets = torch.zeros(inputs.size(0), 1, device=inputs.device) @@ -169,27 +187,22 @@ def forward(self, inputs, cond_inputs=None, mode='direct', logdets=None): return inputs, logdets - def log_probs(self, inputs, cond_inputs=None): - u, log_jacob = self(inputs, cond_inputs) - log_probs = (-0.5 * u.pow(2) - 0.5 * math.log(2 * math.pi)).sum( - -1, keepdim=True) - return (log_probs + log_jacob).sum(-1, keepdim=True) - - def sample(self, num_samples=None, noise=None, cond_inputs=None): - if noise is None: - noise = torch.Tensor(num_samples, self.num_inputs).normal_() - device = next(self.parameters()).device - noise = noise.to(device) - if cond_inputs is not None: - cond_inputs = cond_inputs.to(device) - samples = self.forward(noise, cond_inputs, mode='inverse')[0] - return samples - class SingleSpeed(nn.Module): - def __init__(self, num_inputs, num_hidden, num_blocks, num_layers, translate_only=False, device=None): + def __init__(self, num_inputs, num_hidden, num_blocks, num_layers, scale='', + base_dist=None, device=None): super(SingleSpeed, self).__init__() + + self.num_inputs = num_inputs + + if base_dist is None: + self.base_dist = distributions.MultivariateNormal(torch.zeros(num_inputs), torch.eye(num_inputs)) + else: + self.base_dist = base_dist + + translate_only = scale == 'translate' or scale == 'constant' + mask = torch.arange(0, num_inputs) % 2 mask = mask.float() if device is not None: @@ -201,6 +214,8 @@ def __init__(self, num_inputs, num_hidden, num_blocks, num_layers, translate_onl num_inputs, num_hidden, mask, None, s_act='tanh', t_act='relu', num_layers=num_layers, translate_only=translate_only), ] + if scale == 'constant': + modules += [ScaleLayer()] mask = 1 - mask self.net = FlowSequential(*modules) if device is not None: @@ -210,16 +225,26 @@ def forward(self, inputs, cond_inputs=None, mode='direct', logdets=None): return self.net(inputs, cond_inputs=cond_inputs, mode=mode, logdets=logdets) def log_probs(self, inputs, cond_inputs=None): - return self.net.log_probs(inputs, cond_inputs=None) + u, log_jacob = self.net(inputs, cond_inputs) + log_probs = self.base_dist.log_prob(u).unsqueeze(1) + return (log_probs + log_jacob).sum(-1, keepdim=True) def sample(self, num_samples=None, noise=None, cond_inputs=None): - return self.net.sample(num_samples=num_samples, noise=noise, cond_inputs=cond_inputs) + if noise is None: + noise = self.base_dist.sample((num_samples,)) + device = next(self.parameters()).device + noise = noise.to(device) + if cond_inputs is not None: + cond_inputs = cond_inputs.to(device) + samples = self.forward(noise, cond_inputs, mode='inverse')[0] + return samples -class FastSlow(nn.Module): +class FastSlow(SingleSpeed): - def __init__(self, num_fast, num_slow, num_hidden, num_blocks, num_layers, translate_only=False, device=None): - super(FastSlow, self).__init__() + def __init__(self, num_fast, num_slow, num_hidden, num_blocks, num_layers, scale='', + base_dist=None, device=None): + super(FastSlow, self).__init__(num_fast + num_slow, num_slow, num_hidden, num_blocks, num_layers) self.num_fast = num_fast self.num_slow = num_slow @@ -235,7 +260,7 @@ def __init__(self, num_fast, num_slow, num_hidden, num_blocks, num_layers, trans modules_fast += [ CouplingLayer( num_fast, num_hidden, mask_fast, None, - s_act='tanh', t_act='relu', num_layers=num_layers, translate_only=translate_only) + s_act='tanh', t_act='relu', num_layers=num_layers) ] mask_fast = 1 - mask_fast self.net_fast = FlowSequential(*modules_fast) @@ -250,20 +275,19 @@ def __init__(self, num_fast, num_slow, num_hidden, num_blocks, num_layers, trans modules_slow += [ CouplingLayer( num_slow, num_hidden, mask_slow, None, - s_act='tanh', t_act='relu', num_layers=num_layers, translate_only=translate_only) + s_act='tanh', t_act='relu', num_layers=num_layers) ] mask_slow = 1 - mask_slow self.net_slow = FlowSequential(*modules_slow) # Combine fast and slow such that slow is unnchanged just by updating fast block - modules = [] mask = torch.cat((torch.ones(num_slow), torch.zeros(num_fast))) if device is not None: mask = mask.to(device) modules = [ CouplingLayer( num_slow + num_fast, num_hidden, mask, None, - s_act='tanh', t_act='relu', num_layers=num_layers, translate_only=translate_only) + s_act='tanh', t_act='relu', num_layers=num_layers) ] self.net = FlowSequential(*modules) @@ -271,31 +295,21 @@ def forward(self, inputs, cond_inputs=None, mode='direct', logdets=None): assert mode in ['direct', 'inverse'] if mode == 'direct': slow, logdets_slow = self.net_slow(inputs[:, 0:self.num_slow], mode=mode) - fast, logdets_fast = self.net_fast(inputs[:, self.num_slow:self.num_slow+self.num_fast], mode=mode) + fast, logdets_fast = self.net_fast(inputs[:, self.num_slow:self.num_slow + self.num_fast], mode=mode) inputs = torch.cat((slow, fast), dim=1) inputs, logdets = self.net(inputs, mode=mode) else: inputs, logdets = self.net(inputs, mode=mode) slow, logdets_slow = self.net_slow(inputs[:, 0:self.num_slow], mode=mode) - fast, logdets_fast = self.net_fast(inputs[:, self.num_slow:self.num_slow+self.num_fast], mode=mode) + fast, logdets_fast = self.net_fast(inputs[:, self.num_slow:self.num_slow + self.num_fast], mode=mode) inputs = torch.cat((slow, fast), dim=1) return inputs, logdets_slow + logdets_fast + logdets def log_probs(self, inputs, cond_inputs=None): slow, logdets_slow = self.net_slow(inputs[:, 0:self.num_slow]) - fast, logdets_fast = self.net_fast(inputs[:, self.num_slow:self.num_slow+self.num_fast]) + fast, logdets_fast = self.net_fast(inputs[:, self.num_slow:self.num_slow + self.num_fast]) inputs = torch.cat((slow, fast), dim=1) u, log_jacob = self.net(inputs) log_probs = (-0.5 * u.pow(2) - 0.5 * math.log(2 * math.pi)).sum( -1, keepdim=True) return (log_probs + log_jacob + logdets_slow + logdets_fast).sum(-1, keepdim=True) - - def sample(self, num_samples=None, noise=None, cond_inputs=None): - if noise is None: - noise = torch.Tensor(num_samples, self.num_inputs).normal_() - device = next(self.parameters()).device - noise = noise.to(device) - if cond_inputs is not None: - cond_inputs = cond_inputs.to(device) - samples = self.forward(noise, cond_inputs, mode='inverse')[0] - return samples diff --git a/nnest/sampler.py b/nnest/sampler.py index 2921164..1a1c7fd 100755 --- a/nnest/sampler.py +++ b/nnest/sampler.py @@ -34,20 +34,33 @@ def __init__(self, num_layers=2, log_dir='logs/test', use_gpu=False, + base_dist=None, + scale='' ): self.x_dim = x_dim + self.num_derived = num_derived self.num_params = x_dim + num_derived def safe_loglike(x): + if isinstance(x, list): + x = np.array(x) if len(x.shape) == 1: assert x.shape[0] == self.x_dim x = np.expand_dims(x, 0) - logl = loglike(x) + res = loglike(x) + if isinstance(res, tuple): + logl, derived = res + else: + logl = res + # Set derived shape to be (batch size, 0) + derived = np.array([[] for _ in x]) if len(logl.shape) == 0: logl = np.expand_dims(logl, 0) logl[np.logical_not(np.isfinite(logl))] = -1e100 - return logl + if len(derived.shape) == 1 or derived.shape[1] != self.num_derived: + raise ValueError('Is the number of derived parameters correct and derived has the correct shape?') + return logl, derived self.loglike = safe_loglike @@ -55,10 +68,13 @@ def safe_loglike(x): self.transform = lambda x: x else: def safe_transform(x): + if isinstance(x, list): + x = np.array(x) if len(x.shape) == 1: assert x.shape[0] == self.x_dim x = np.expand_dims(x, 0) return transform(x) + self.transform = safe_transform self.use_mpi = False @@ -79,25 +95,32 @@ def safe_transform(x): args.update(vars(self)) if self.log: - self.logs = make_run_dir(log_dir, run_num, append_run_num= append_run_num) + self.logs = make_run_dir(log_dir, run_num, append_run_num=append_run_num) log_dir = self.logs['run_dir'] self._save_params(args) else: log_dir = None - + self.logger = create_logger(__name__) self.trainer = Trainer( - x_dim, - hidden_dim, - nslow=num_slow, - batch_size=batch_size, - flow=flow, - num_blocks=num_blocks, - num_layers=num_layers, - log_dir=log_dir, - log=self.log, - use_gpu=use_gpu) + x_dim, + hidden_dim, + nslow=num_slow, + batch_size=batch_size, + flow=flow, + num_blocks=num_blocks, + num_layers=num_layers, + log_dir=log_dir, + log=self.log, + use_gpu=use_gpu, + base_dist=base_dist, + scale=scale) + + if self.log: + self.logger.info('Num base params [%d]' % (self.x_dim)) + self.logger.info('Num derived params [%d]' % (self.num_derived)) + self.logger.info('Total params [%d]' % (self.num_params)) def _save_params(self, my_dict): my_dict = {k: str(v) for k, v in my_dict.items()} @@ -113,7 +136,7 @@ def _chain_stats(self, samples, mean=None, std=None): ess = effective_sample_size(samples, mean, std) jump_distance = mean_jump_distance(samples) self.logger.info( - 'Acceptance [%5.4f] min ESS [%5.4f] max ESS [%5.4f] jump distance [%5.4f]' % + 'Acceptance [%5.4f] min ESS [%5.4f] max ESS [%5.4f] average jump distance [%5.4f]' % (acceptance, np.min(ess), np.max(ess), jump_distance)) return acceptance, ess, jump_distance @@ -127,7 +150,7 @@ def _save_samples(self, samples, weights, logl, min_weight=1e-30, outfile='chain f.write("\n") elif len(samples.shape) == 3: for ib in range(samples.shape[0]): - with open(os.path.join(self.logs['chains'], outfile + '_%s.txt' % (ib+1)), 'w') as f: + with open(os.path.join(self.logs['chains'], outfile + '_%s.txt' % (ib + 1)), 'w') as f: for i in range(samples.shape[1]): f.write("%.5E " % max(weights[ib, i], min_weight)) f.write("%.5E " % -logl[ib, i]) diff --git a/nnest/trainer.py b/nnest/trainer.py index 5eea795..74e5d94 100644 --- a/nnest/trainer.py +++ b/nnest/trainer.py @@ -13,8 +13,8 @@ import torch from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter import numpy as np -from tensorboardX import SummaryWriter from sklearn.model_selection import train_test_split import scipy.spatial from tqdm import tqdm @@ -36,9 +36,10 @@ def __init__(self, nslow=0, batch_size=100, flow='nvp', - translate_only=False, + scale='', num_blocks=5, num_layers=2, + base_dist=None, oversample_rate=-1, train=True, load_model='', @@ -68,10 +69,10 @@ def __init__(self, if flow.lower() == 'nvp': if nslow > 0: self.netG = FastSlow(nfast, nslow, ndim, num_blocks, num_layers, device=self.device, - translate_only=translate_only) + scale=scale, base_dist=base_dist) else: self.netG = SingleSpeed(xdim, ndim, num_blocks, num_layers, device=self.device, - translate_only=translate_only) + scale=scale, base_dist=base_dist) else: raise NotImplementedError @@ -193,15 +194,75 @@ def train( self.netG.load_state_dict(best_model.state_dict()) - def sample( + def rejection_sample( self, + loglike, + loglstar, + init_x=None, + transform=None, + max_prior=None, + enlargement_factor=1.3, + constant_efficiency_factor=None): + + self.netG.eval() + + if transform is None: + def transform(x): return x + + if init_x is not None: + z, log_det_J = self.netG(torch.from_numpy(init_x).float().to(self.device)) + # We want max det dx/dz to set envelope for rejection sampling + m = torch.max(-log_det_J) + z = z.detach().cpu().numpy() + r = np.max(np.linalg.norm(z, axis=1)) + else: + r = 1 + + if constant_efficiency_factor is not None: + enlargement_factor = (1 / constant_efficiency_factor) ** (1 / self.x_dim) + + nc = 0 + while True: + if hasattr(self.netG.base_dist, 'usample'): + z = self.netG.base_dist.usample(sample_shape=(1,)) * enlargement_factor + else: + z = np.random.randn(self.x_dim) + z = enlargement_factor * r * z * np.random.rand() ** (1. / self.x_dim) / np.sqrt(np.sum(z ** 2)) + z = np.expand_dims(z, 0) + x, log_det_J = self.netG(torch.from_numpy(z).float().to(self.device), mode='inverse') + delta_log_det_J = (log_det_J - m).detach() + log_ratio_1 = delta_log_det_J.squeeze(dim=1) + x = x.detach().cpu().numpy() + + # Check not out of prior range + if np.any(np.abs(x) > max_prior): + continue + + # Check volume constraint + rnd_u = torch.rand(log_ratio_1.shape, device=self.device) + ratio = (log_ratio_1).exp().clamp(max=1) + if rnd_u > ratio: + continue + + logl = loglike(transform(x)) + idx = np.where(np.isfinite(logl) & (logl < loglstar))[0] + log_ratio_1[idx] = -np.inf + ratio = (log_ratio_1).exp().clamp(max=1) + + nc += 1 + if rnd_u < ratio: + break + + return x, logl, nc + + def mcmc_sample( + self, + loglike, mcmc_steps=20, alpha=1.0, dynamic=True, batch_size=1, - loglike=None, init_x=None, - logl=None, loglstar=None, transform=None, show_progress=False, @@ -213,6 +274,7 @@ def sample( self.netG.eval() samples = [] + derived_samples = [] likes = [] if transform is None: @@ -225,26 +287,20 @@ def transform(x): return x # Add the backward version of x rather than init_x due to numerical precision x, _ = self.netG(z, mode='inverse') x = x.detach().cpu().numpy() - if logl is None: - logl = loglike(transform(x)) + logl, derived = loglike(transform(x)) else: - if logl is None: - for i in range(max_start_tries): - z = torch.randn(batch_size, self.z_dim, device=self.device) - x, _ = self.netG(z, mode='inverse') - x = x.detach().cpu().numpy() - logl = loglike(transform(x)) - if np.all(logl > -1e30): - break - if i == max_start_tries - 1: - raise Exception('Could not find starting value') - else: + for i in range(max_start_tries): z = torch.randn(batch_size, self.z_dim, device=self.device) x, _ = self.netG(z, mode='inverse') x = x.detach().cpu().numpy() - logl = loglike(transform(x)) + logl, derived = loglike(transform(x)) + if np.all(logl > -1e30): + break + if i == max_start_tries - 1: + raise Exception('Could not find starting value') samples.append(x) + derived_samples.append(derived) likes.append(logl) iters = range(mcmc_steps) @@ -262,7 +318,7 @@ def transform(x): return x else: files = [open(out_chain + '_%s.txt' % (ib + 1), 'w') for ib in range(batch_size)] - for i in iters: + for _ in iters: dz = torch.randn_like(z) * scale if self.nslow > 0 and np.random.uniform() < self.oversample_rate: @@ -282,9 +338,7 @@ def transform(x): return x # Check not out of prior range if max_prior is not None: - prior = np.logical_or( - np.abs(x) > max_prior, - np.abs(x_prime) > max_prior) + prior = np.logical_or(np.abs(x) > max_prior, np.abs(x_prime) > max_prior) idx = np.where([np.any(p) for p in prior]) log_ratio_1[idx] = -np.inf @@ -292,6 +346,7 @@ def transform(x): return x ratio = log_ratio_1.exp().clamp(max=1) mask = (rnd_u < ratio).int() logl_prime = np.full(batch_size, logl) + derived_prime = np.copy(derived) # Only evaluate likelihood if prior and volume is accepted if loglike is not None and transform is not None: @@ -299,10 +354,11 @@ def transform(x): return x if im: if not fast: ncall += 1 - lp = loglike(transform(x_prime[idx])) + lp, der = loglike(transform(x_prime[idx])) if loglstar is not None: if np.isfinite(lp) and lp >= loglstar: logl_prime[idx] = lp + derived_prime[idx] = der else: mask[idx] = 0 else: @@ -310,6 +366,7 @@ def transform(x): return x logl_prime[idx] = lp elif rnd_u[idx].cpu().numpy() < np.clip(np.exp(lp - logl[idx]), 0, 1): logl_prime[idx] = lp + derived_prime[idx] = der else: mask[idx] = 0 @@ -326,6 +383,7 @@ def transform(x): return x m = mask[:, None].float() z = (z_prime * m + z * (1 - m)).detach() + derived = derived_prime * m.cpu().numpy() + derived * (1 - m.cpu().numpy()) m = mask logl = logl_prime * m.cpu().numpy() + logl * (1 - m.cpu().numpy()) @@ -333,6 +391,7 @@ def transform(x): return x x, _ = self.netG(z, mode='inverse') x = x.detach().cpu().numpy() samples.append(x) + derived_samples.append(derived) likes.append(logl) if out_chain is not None: @@ -341,19 +400,20 @@ def transform(x): return x files[ib].write("%.5E " % 1) files[ib].write("%.5E " % -logl[ib]) files[ib].write(" ".join(["%.5E" % vi for vi in v[ib]])) + files[ib].write(" ".join(["%.5E" % vi for vi in derived[ib]])) files[ib].write("\n") files[ib].flush() - # Transpose so shape is (chain_num, iteration, dim) + # Transpose samples so shape is (chain_num, iteration, dim) samples = np.transpose(np.array(samples), axes=[1, 0, 2]) + derived_samples = np.transpose(np.array(derived_samples), axes=[1, 0, 2]) likes = np.transpose(np.array(likes), axes=[1, 0]) if self.path and plot: cmap = plt.cm.jet cmap.set_under('w', 1) fig, ax = plt.subplots() - ax.hist2d(samples[:, -1, 0], samples[:, -1, 1], - bins=200, cmap=cmap, vmin=1, alpha=0.2) + ax.hist2d(samples[:, -1, 0], samples[:, -1, 1], bins=200, cmap=cmap, vmin=1, alpha=0.2) if self.writer is not None: self.writer.add_figure('chain', fig, self.total_iters) @@ -361,7 +421,7 @@ def transform(x): return x for ib in range(batch_size): files[ib].close() - return samples, likes, scale, ncall + return samples, derived_samples, likes, scale, ncall def _jacobian(self, z): """ Calculate det d f^{-1} (z)/dz diff --git a/requirements.txt b/requirements.txt index c1c431c..143330b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,10 @@ -tensorflow -tensorboardX getdist numpy scipy matplotlib pandas -torch +torch >= 1.3.1 tqdm scikit-learn -pillow \ No newline at end of file +pillow +tensorboard >= 1.14 \ No newline at end of file diff --git a/setup.py b/setup.py index 9b31449..99c3921 100644 --- a/setup.py +++ b/setup.py @@ -4,17 +4,24 @@ from setuptools import find_packages, setup setup( - name = "nnest", - version = "0.1.2", - description = "Neural network nested sampling", - author = "Adam Moss", - author_email = "adam.moss@nottingham.ac.uk", - maintainer = "Adam Moss", - maintainer_email = "adam.moss@nottingham.ac.uk", - url = "https://github.com/adammoss/nnest/", - license = "MIT", - packages = find_packages(), - provides = ["nnest"], - requires = ["torch", "tensorflow", "tensorboardX", "numpy", "scipy", "matplotlib", "pandas", - "scikitlearn", "tqdm", "pillow"], + name="nnest", + version="0.2.0", + description="Neural network nested sampling", + author="Adam Moss", + author_email="adam.moss@nottingham.ac.uk", + maintainer="Adam Moss", + maintainer_email="adam.moss@nottingham.ac.uk", + url="https://github.com/adammoss/nnest/", + license="MIT", + packages=find_packages(), + provides=["nnest"], + requires=["torch>=1.3.1", + "tensorboard>=1.14", + "numpy", + "scipy", + "matplotlib", + "pandas", + "scikitlearn", + "tqdm", + "pillow"], )