Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Implement pure-Python ICA with mixture modeling #51

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,10 @@ Repository to prepare the ICA-AROMA tutorial for Brainhack Donostia.
This repository is a clone of [maartenmennes/ICA-AROMA](https://github.com/maartenmennes/ICA-AROMA),
which has an Apache 2.0 license. We plan to modify this clone in order to restructure it into a
pure Python package, but we will make sure to fulfill the requirements of the license.

#### Source of the Gaussian-Gamma Mixture Model

The Gaussian-Gamma Mixture Model used in this package (`aroma.mixture.GGM`) was taken from
the [`nipy`](https://github.com/nipy/nipy) package. As such, the file in which the model
is defined (`aroma/mixture.py`) falls under `nipy`'s licensing. The license for that file
is stored in the file's docstring.
46 changes: 16 additions & 30 deletions aroma/aroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import shutil

import nibabel as nib
import numpy as np
from nilearn import masking

from . import utils, features

Expand Down Expand Up @@ -152,51 +154,35 @@ def aroma_workflow(
# Define/create mask. Either by making a copy of the specified mask, or by
# creating a new one.
new_mask = op.join(out_dir, "mask.nii.gz")
if mask:
shutil.copyfile(mask, new_mask)
elif in_feat and op.isfile(op.join(in_feat, "example_func.nii.gz")):
# If a Feat directory is specified, and an example_func is present use
# example_func to create a mask
bet_command = "{0} {1} {2} -f 0.3 -n -m -R".format(
op.join(fsl_dir, "bet"),
op.join(in_feat, "example_func.nii.gz"),
op.join(out_dir, "bet"),
)
os.system(bet_command)
os.rename(op.join(out_dir, "bet_mask.nii.gz"), new_mask)
if op.isfile(op.join(out_dir, "bet.nii.gz")):
os.remove(op.join(out_dir, "bet.nii.gz"))
else:
if in_feat:
print(
" - No example_func was found in the Feat directory. "
"A mask will be created including all voxels with varying "
"intensity over time in the fMRI data. Please check!\n"
)
math_command = "{0} {1} -Tstd -bin {2}".format(
op.join(fsl_dir, "fslmaths"), in_file, new_mask
)
os.system(math_command)
if not mask:
mask = masking.compute_epi_mask(in_file)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More aggressive masking seems to be necessary. I noticed that the ICA components were showing up outside the brain with the old method.

mask.to_filename(new_mask)

# Run ICA-AROMA
print("Step 1) MELODIC")
utils.runICA(fsl_dir, in_file, out_dir, mel_dir, new_mask, dim, TR)
print("Step 1) ICA")
components_img_z_thresh, mixing_ica, mixing_power_spectra = utils.run_ica(
in_file, new_mask, n_components=-1, t_r=TR
)
mel_IC = op.join(out_dir, "melodic_IC_thr.nii.gz")
os.mkdir(op.join(out_dir, "melodic.ica")) # necessary until we drop MELODIC completely
mel_mix = op.join(out_dir, "melodic.ica", "melodic_mix")
mel_FT_mix = op.join(out_dir, "melodic.ica", "melodic_FTmix")
components_img_z_thresh.to_filename(mel_IC)
np.savetxt(mel_mix, mixing_ica)
np.savetxt(mel_FT_mix, mixing_power_spectra)

print("Step 2) Automatic classification of the components")
print(" - registering the spatial maps to MNI")
mel_IC = op.join(out_dir, "melodic_IC_thr.nii.gz")
mel_IC_MNI = op.join(out_dir, "melodic_IC_thr_MNI2mm.nii.gz")
utils.register2MNI(fsl_dir, mel_IC, mel_IC_MNI, affmat, warp)

print(" - extracting the CSF & Edge fraction features")
edge_fract, csf_fract = features.feature_spatial(mel_IC_MNI)

print(" - extracting the Maximum RP correlation feature")
mel_mix = op.join(out_dir, "melodic.ica", "melodic_mix")
max_RP_corr = features.feature_time_series(mel_mix, mc)

print(" - extracting the High-frequency content feature")
mel_FT_mix = op.join(out_dir, "melodic.ica", "melodic_FTmix")
HFC = features.feature_frequency(mel_FT_mix, TR)

print(" - classification")
Expand Down
2 changes: 1 addition & 1 deletion aroma/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def feature_spatial(mel_IC):
----------
mel_IC : str
Full path of the nii.gz file containing mixture-modeled thresholded
(p<0.5) Z-maps, registered to the MNI152 2mm template
(prob>0.5) Z-maps, registered to the MNI152 2mm template

Returns
-------
Expand Down
302 changes: 302 additions & 0 deletions aroma/mixture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
"""
This file was taken from the nipy Python library and reduced to only contain
code necessary to define and use the GGM class. Minor changes have been made
to the documentation, in order to follow project convention.

License
-------
Copyright (c) 2006-2018, NIPY Developers
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

* Neither the name of the NIPY Developers nor the names of any
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Notes
-----
Taken from the nipy version at commit c7ae00435ef1134cee28eb0d31d2f1a253328f6b.
"""
import numpy as np
import scipy.special as sp


def _dichopsi_log(u, v, y, eps=0.00001):
""" Implements the dichotomic part of the solution of psi(c)-log(c)=y
"""
if u > v:
u, v = v, u
t = (u + v) / 2
if np.absolute(u - v) < eps:
return t
else:
if sp.psi(t) - np.log(t) > y:
return _dichopsi_log(u, t, y, eps)
else:
return _dichopsi_log(t, v, y, eps)


def _psi_solve(y, eps=0.00001):
""" Solve psi(c)-log(c)=y by dichotomy
"""
if y > 0:
print("y", y)
raise ValueError("y>0, the problem cannot be solved")
u = 1.
if y > sp.psi(u) - np.log(u):
while sp.psi(u) - np.log(u) < y:
u *= 2
u /= 2
else:
while sp.psi(u) - np.log(u) > y:
u /= 2
return _dichopsi_log(u, 2 * u, y, eps)


def _compute_c(x, z, eps=0.00001):
"""
this function returns the mle of the shape parameter if a 1D gamma
density
"""
eps = 1.e-7
y = np.dot(z, np.log(x)) / np.sum(z) - np.log(np.dot(z, x) / np.sum(z))
if y > - eps:
c = 10
else:
c = _psi_solve(y, eps=0.00001)
return c


def _gaus_dens(mean, var, x):
""" evaluate the gaussian density (mean,var) at points x
"""
Q = - (x - mean) ** 2 / (2 * var)
return 1. / np.sqrt(2 * np.pi * var) * np.exp(Q)


def _gam_dens(shape, scale, x):
"""evaluate the gamma density (shape,scale) at points x

Notes
-----
Returns 0 on negative subspace
"""
ng = np.zeros(np.size(x))
cst = - shape * np.log(scale) - sp.gammaln(shape)
i = np.ravel(np.nonzero(x > 0))
if np.size(i) > 0:
lz = cst + (shape - 1) * np.log(x[i]) - x[i] / scale
ng[i] = np.exp(lz)
return ng


def _gam_param(x, z):
""" Compute the parameters of a gamma density from data weighted points

Parameters
----------
x: array of shape(nbitem) the learning points
z: array of shape(nbitem), their membership within the class

Notes
-----
if no point is positive then the couple (1, 1) is returned
"""
eps = 1.e-5
i = np.ravel(np.nonzero(x > 0))
szi = np.sum(z[i])
if szi > 0:
shape = _compute_c(x[i], z[i], eps)
scale = np.dot(x[i], z[i]) / (szi * shape)
else:
shape = 1
scale = 1
return shape, scale


class GGM(object):
"""
This is the basic one dimensional Gaussian-Gamma Mixture estimation class
Note that it can work with positive or negative values,
as long as there is at least one positive value.
NB : The gamma distribution is defined only on positive values.

5 scalar members
- mean: gaussian mean
- var: gaussian variance (non-negative)
- shape: gamma shape (non-negative)
- scale: gamma scale (non-negative)
- mixt: mixture parameter (non-negative, weight of the gamma)
"""

def __init__(self, shape=1, scale=1, mean=0, var=1, mixt=0.5):
self.shape = shape
self.scale = scale
self.mean = mean
self.var = var
self.mixt = mixt

def parameters(self):
""" print the paramteres of self
"""
print("Gaussian: mean: ", self.mean, "variance: ", self.var)
print("Gamma: shape: ", self.shape, "scale: ", self.scale)
print("Mixture gamma: ", self.mixt, "Gaussian: ", 1 - self.mixt)

def Mstep(self, x, z):
"""
Mstep of the model: maximum likelihood
estimation of the parameters of the model

Parameters
----------
x : array of shape (nbitems,)
input data
z array of shape(nbitrems, 2)
the membership matrix
"""
# z[0,:] is the likelihood to be generated by the gamma
# z[1,:] is the likelihood to be generated by the gaussian

tiny = 1.e-15
sz = np.maximum(tiny, np.sum(z, 0))

self.shape, self.scale = _gam_param(x, z[:, 0])
self.mean = np.dot(x, z[:, 1]) / sz[1]
self.var = np.dot((x - self.mean) ** 2, z[:, 1]) / sz[1]
self.mixt = sz[0] / np.size(x)

def Estep(self, x):
"""
E step of the estimation:
Estimation of ata membsership

Parameters
----------
x: array of shape (nbitems,)
input data

Returns
-------
z: array of shape (nbitems, 2)
the membership matrix
"""
eps = 1.e-15
z = np.zeros((np.size(x), 2), 'd')
z[:, 0] = _gam_dens(self.shape, self.scale, x)
z[:, 1] = _gaus_dens(self.mean, self.var, x)
z = z * np.array([self.mixt, 1. - self.mixt])
sz = np.maximum(np.sum(z, 1), eps)
L = np.sum(np.log(sz)) / np.size(x)
z = (z.T / sz).T
return z, L

def estimate(self, x, niter=10, delta=0.0001, verbose=False):
""" Complete EM estimation procedure

Parameters
----------
x : array of shape (nbitems,)
the data to be processed
niter : int, optional
max nb of iterations
delta : float, optional
criterion for convergence
verbose : bool, optional
If True, print values during iterations

Returns
-------
LL, float
average final log-likelihood
"""
if x.max() < 0:
# all the values are generated by the Gaussian
self.mean = np.mean(x)
self.var = np.var(x)
self.mixt = 0.
L = 0.5 * (1 + np.log(2 * np.pi * self.var))
return L

# proceed with standard estimate
z, L = self.Estep(x)
L0 = L - 2 * delta
for i in range(niter):
self.Mstep(x, z)
z, L = self.Estep(x)
if verbose:
print(i, L)
if (L < L0 + delta):
break
L0 = L
return L

def show(self, x):
""" Visualization of the mm based on the empirical histogram of x

Parameters
----------
x : array of shape (nbitems,)
the data to be processed
"""
step = 3.5 * np.std(x) / np.exp(np.log(np.size(x)) / 3)
bins = max(10, int((x.max() - x.min()) / step))
h, c = np.histogram(x, bins)
h = h.astype(np.float) / np.size(x)
p = self.mixt

dc = c[1] - c[0]
y = (1 - p) * _gaus_dens(self.mean, self.var, c) * dc
z = np.zeros(np.size(c))
z = _gam_dens(self.shape, self.scale, c) * p * dc

import matplotlib.pylab as mp
mp.figure()
mp.plot(0.5 * (c[1:] + c[:-1]), h)
mp.plot(c, y, 'r')
mp.plot(c, z, 'g')
mp.plot(c, z + y, 'k')
mp.title('Fit of the density with a Gamma-Gaussians mixture')
mp.legend(('data', 'gaussian acomponent', 'gamma component',
'mixture distribution'))

def posterior(self, x):
"""Posterior probability of observing the data x for each component

Parameters
----------
x: array of shape (nbitems,)
the data to be processed

Returns
-------
y, pg : arrays of shape (nbitem)
the posterior probability
"""
p = self.mixt
pg = p * _gam_dens(self.shape, self.scale, x)
y = (1 - p) * _gaus_dens(self.mean, self.var, x)
return y / (y + pg), pg / (y + pg)
Loading