-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e4d970b
Showing
26 changed files
with
7,538 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
.DS_Store | ||
*.texe | ||
*.pyc | ||
.ipynb_checkpoints | ||
__pycache__/ | ||
slurmlogs/ | ||
|
||
# python pickel | ||
*.pkl | ||
|
||
# theano compile dirs | ||
.theano* | ||
|
||
.slurmlogs | ||
_rslurm*/ | ||
|
||
# R | ||
*.tmp.R | ||
|
||
# LaTeX: | ||
tex/build/ | ||
Thesis/build/ | ||
*.synctex | ||
*.aux | ||
*.glo | ||
*.idx | ||
*.log | ||
*.toc | ||
*.ist | ||
*.acn | ||
*.acr | ||
*.alg | ||
*.bbl | ||
*.blg | ||
*.dvi | ||
*.glg | ||
*.gls | ||
*.ilg | ||
*.ind | ||
*.lof | ||
*.lot | ||
*.maf | ||
*.mtc | ||
*.mtc1 | ||
*.out | ||
*.synctex.gz | ||
*.fdb_latexmk | ||
*.fls | ||
!Thesis/figures/ |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#' --- | ||
#' jupyter: | ||
#' jupytext: | ||
#' text_representation: | ||
#' extension: .R | ||
#' format_name: spin | ||
#' format_version: '1.0' | ||
#' jupytext_version: 0.8.5 | ||
#' kernelspec: | ||
#' display_name: R | ||
#' language: R | ||
#' name: ir | ||
#' language_info: | ||
#' codemirror_mode: r | ||
#' file_extension: .r | ||
#' mimetype: text/x-r-source | ||
#' name: R | ||
#' pygments_lexer: r | ||
#' version: 3.4.1 | ||
#' --- | ||
|
||
#SBATCH --job-name="DESeq on DR" | ||
#SBATCH --out=.slurmlogs/slurm.dr_deseq2.%j_%N.out | ||
#SBATCH --ntasks=1 | ||
#SBATCH --cpus-per-task=1 | ||
#SBATCH --mem=7000 | ||
#SBATCH --time=1-00:00:00 | ||
#SBATCH --mail-type=FAIL | ||
#SBATCH [email protected] | ||
|
||
# This script uses DESeq2 and fdrtool in dimensional reduced | ||
# data to estimate the share of differntially expressed features. | ||
|
||
if (!exists("ncomps")){ | ||
args = commandArgs(trailingOnly = TRUE) | ||
if (length(args) < 1){ | ||
stop(paste0("No numer of components specified.\n", | ||
"You can specify the number by providing the it behind ", | ||
"--args when calling the script from the command line ", | ||
"or setting the 'ncomps' variable in the R enviorment.")) | ||
}else{ | ||
ncomps = as.numeric(args[1]) | ||
} | ||
} | ||
ncomps = ceiling(ncomps) | ||
if (!is.finite(ncomps) || ncomps < 1) { | ||
stop("The number of components musst be an integer larger than 0.") | ||
} | ||
|
||
if (!exists("cohort") && length(args) > 1) { | ||
cohort = args[2] | ||
} else if (!exists("cohort")) { | ||
cohort = NULL | ||
} | ||
|
||
message(paste("This is ncomps:", ncomps, "; for cohort: ", cohort)) | ||
|
||
suppressPackageStartupMessages(library(DESeq2)) | ||
suppressPackageStartupMessages(library(fdrtool)) | ||
|
||
inPath = "data/deg_counts_prior" | ||
outPath = "data/DR_DESeq2_prior" | ||
dir.create(outPath, showWarnings = FALSE, recursive = TRUE) | ||
|
||
pheno = readRDS(paste0(inPath, "/pheno.RDS")) | ||
counts = readRDS(paste0(inPath, "/counts.RDS")) | ||
|
||
dg_path = paste0(inPath, "/", ncomps - 1, "-comp.RDS") | ||
if (!file.exists(dg_path)) { | ||
message(paste('The file', dg_path, 'does not exist. Stopping...')) | ||
quit() | ||
} | ||
raw_deg_counts = readRDS(dg_path) | ||
deg_counts = data.frame(t(raw_deg_counts)) | ||
colnames(deg_counts) = paste0(colnames(counts), "-DR") | ||
rownames(deg_counts) = rownames(counts) | ||
|
||
deg_pheno = pheno | ||
rownames(deg_pheno) = paste0(rownames(deg_pheno), "-DR") | ||
pheno$DR = TRUE | ||
deg_pheno$DR = FALSE | ||
|
||
a_counts = cbind(counts, deg_counts) | ||
a_pheno = rbind(pheno, deg_pheno) | ||
|
||
if (!is.null(cohort)) { | ||
ind = a_pheno$Cohort == cohort | ||
a_counts = a_counts[, ind] | ||
a_pheno = a_pheno[ind, ] | ||
} | ||
|
||
broken = apply(is.na(a_counts), 1, any) | ||
if (any(broken)) { | ||
message(paste0("There are ", sum(broken), | ||
" features with NA values that are removed.")) | ||
a_counts = a_counts[!broken, ] | ||
} | ||
|
||
dds_dat = DESeqDataSetFromMatrix(countData = round(a_counts), | ||
colData = a_pheno, | ||
design = ~ DR) | ||
|
||
dds = DESeq(dds_dat) | ||
|
||
res = results(dds) | ||
|
||
|
||
ind = is.finite(res$pvalue) | ||
pdf(paste0(outPath, "/fdrtool_plot_", cohort, "_", ncomps - 1, "-comp.pdf")) | ||
fdr_result = fdrtool(res$pvalue[ind], statistic = "pvalue", verbose = FALSE) | ||
dev.off() | ||
|
||
share_diff_expr = max(fdr_result$qval, na.rm = TRUE) | ||
|
||
write(share_diff_expr, paste0(outPath, "/share_diff_expr_", | ||
cohort, "_", ncomps - 1, "-comp.txt")) | ||
|
||
data = list(dds_data = dds_dat, dds = dds, dds_result = res, | ||
fdr_result = fdr_result) | ||
saveRDS(data, paste0(outPath, "/DESeq2_results_", cohort, "_", | ||
ncomps - 1, "-comp.RDS")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/usr/bin/env python | ||
#SBATCH --job-name="invert digamma" | ||
#SBATCH --out=.slurmlogs/slurm.dr_inv_digamma.%j_%N.out | ||
#SBATCH --ntasks=1 | ||
#SBATCH --cpus-per-task=32 | ||
#SBATCH --mem=240G | ||
#SBATCH --time=10-00:00:00 | ||
#SBATCH --mail-type=ALL | ||
#SBATCH [email protected] | ||
|
||
import os | ||
os.environ['OMP_NUM_THREADS'] = '1' # single thread openMP and BLAS | ||
if 'SLURM_SUBMIT_DIR' in os.environ: | ||
os.chdir(os.environ['SLURM_SUBMIT_DIR']) | ||
import numpy as np | ||
import pickle | ||
from models import marker_model as my_model | ||
from tqdm import tqdm | ||
from multiprocessing import Pool | ||
from DR_plots import * | ||
from rpy2.robjects import r, globalenv, numpy2ri | ||
numpy2ri.activate() | ||
|
||
dg, sub = dgamma(counts) | ||
def pool_f(n): | ||
red = inv_phi(phi(dg, n * 10)) | ||
deg_counts = inv_dg(red, sub, parallel = False) | ||
with open('data/deg_counts_sd/digamma_deg_' + str(n * 10) + '-comp.pkl', 'wb') as buff: | ||
pickle.dump(deg_counts, buff) | ||
rPath = 'data/deg_counts_sd/digamma_deg_' + str(n * 10) + '-comp.RDS' | ||
nr,nc = deg_counts.shape | ||
dg_r = r.matrix(deg_counts, nrow=nr, ncol=nc) | ||
r.saveRDS(dg_r, rPath) | ||
|
||
tasks = set(range(np.floor(counts.shape[0] / 10).astype(int))) | ||
tasks = tasks.union(range(10)) | ||
|
||
if __name__ == "__main__": | ||
with Pool() as pool: | ||
_ = list(tqdm(pool.imap_unordered(pool_f, tasks), total=len(tasks))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import argparse | ||
import matplotlib | ||
if __name__ == "__main__": | ||
matplotlib.use('Agg') | ||
|
||
parser = argparse.ArgumentParser(description = 'Correlation plot of before and after DR to specified dimension.') | ||
parser.add_argument('integers', metavar = 'N', type=int, nargs = '+', | ||
help = 'The number of dimensions to project to. (multiple possible)') | ||
parser.add_argument("--res", type = int, default = 200, | ||
help = 'Corrleation plot resolution.') | ||
args = parser.parse_args() | ||
|
||
import os | ||
os.environ['OMP_NUM_THREADS'] = '1' # single thread openMP and BLAS | ||
from matplotlib import pyplot as plt | ||
import numpy as np | ||
import pickle | ||
from tqdm import tqdm_notebook, tqdm | ||
from multiprocessing import Pool | ||
from scipy.stats import gaussian_kde, spearmanr, pearsonr | ||
from scipy.special import digamma | ||
from scipy.optimize import minimize_scalar | ||
|
||
with open(os.environ['degOutPath'] + 'other_data.pkl', 'rb') as f: | ||
data = pickle.load(f) | ||
|
||
pca = data['pca'] | ||
counts = data['counts'] | ||
n = counts.shape[1] | ||
|
||
def beta(counts): | ||
alpha = counts + 1 | ||
sub = np.log(np.sum(alpha, axis = 1, keepdims = True)) | ||
nom = np.log(alpha) | ||
result = nom - sub | ||
return result | ||
|
||
def inv_beta(log_alpha, seq_depth = 1e8, prior = 0): | ||
#bound = np.percentile(log_alpha, 20, axis = 1, keepdims = True) | ||
n = log_alpha.shape[1] | ||
alpha = np.exp(log_alpha) | ||
result = alpha*(seq_depth + n) - 1 - prior | ||
result = np.clip(result, 0, None) | ||
return result | ||
|
||
def re_counts(log_p, seq_depth = 1e8): | ||
return np.exp(log_p) * seq_depth | ||
|
||
def phi(X, ncomp): | ||
ind = range(ncomp) | ||
if pca.mean_ is not None: | ||
X = X - pca.mean_ | ||
X_transformed = np.dot(X, pca.components_[ind, :].T) | ||
return X_transformed | ||
|
||
def inv_phi(X): | ||
ind = range(X.shape[1]) | ||
return np.dot(X, pca.components_[ind, :]) + pca.mean_ | ||
|
||
def dgamma(counts): | ||
seq_depth = np.sum(counts, axis = 1, keepdims = True) | ||
alpha = counts + 1 | ||
sub = digamma(seq_depth + counts.shape[1]) | ||
nom = digamma(alpha) | ||
result = nom - sub | ||
return result, seq_depth | ||
|
||
def proj_O(log_counts): | ||
n = log_counts.shape[1] | ||
diff = log_counts.sum(axis = 1, keepdims = True) / n | ||
return log_counts - diff | ||
|
||
def rev_O(o_counts): | ||
level = np.exp(o_counts).sum(axis = 1, keepdims = True) | ||
t = np.log(level) | ||
return o_counts - t | ||
|
||
def _objective(l_count, dg, sub): | ||
alpha = np.exp(l_count) + 1 | ||
nom = digamma(alpha) | ||
result = nom - sub | ||
return (result - dg)**2 | ||
|
||
def _single(dat): | ||
lalpha, sub = dat | ||
res = minimize_scalar(_objective, args = (lalpha, sub), tol = 1e-20) | ||
return np.round(np.exp(res.x)) | ||
|
||
def inv_dg(log_alpha, sub = 18, parallel = True): | ||
results = [] | ||
N = np.repeat(1, 10).shape | ||
if len(sub) != N: | ||
sub = np.repeat(sub, N) | ||
iterator = range(log_alpha.shape[0]) | ||
if parallel: | ||
iterator = tqdm_notebook(iterator) | ||
for samp in iterator: | ||
tasks = [(lalpha, sub[samp]) for lalpha in log_alpha[samp, :]] | ||
if parallel: | ||
with Pool() as pool: | ||
samp_res = pool.map(_single, tasks) | ||
else: | ||
samp_res = [_single(t) for t in tasks] | ||
results.append(np.array(samp_res)) | ||
return np.stack(results, axis = 0) | ||
|
||
def degenerate(X, ncomp, m=1e8): | ||
return inv_beta(rev_O(inv_phi(phi(beta(X), ncomp))), m) | ||
|
||
def fast_deg(beta_counts, ncomp, m=1e8, prior=0): | ||
return inv_beta(rev_O(inv_phi(phi(beta_counts, ncomp))), m, prior) | ||
|
||
def lin_deg(beta_counts, ncomp): | ||
return rev_O(inv_phi(phi(beta_counts, ncomp))) | ||
|
||
def deplot(x, y, legend = True, ax = None, fig = None, res = 200): | ||
# fit an array of size [Ndim, Nsamples] | ||
data = np.vstack([x, y]) | ||
kde = gaussian_kde(data) | ||
|
||
# evaluate on a regular grid | ||
minx = min(x) | ||
maxx = max(x) | ||
miny = min(y) | ||
maxy = max(y) | ||
xgrid = np.linspace(minx, maxx, res) | ||
ygrid = np.linspace(miny, maxy, res) | ||
Xgrid, Ygrid = np.meshgrid(xgrid, ygrid) | ||
Z = kde.evaluate(np.vstack([Xgrid.ravel(), Ygrid.ravel()])) | ||
|
||
# Plot the result as an image | ||
if ax is None: | ||
ax = plt | ||
if fig is None: | ||
fig = plt | ||
img = ax.imshow(Z.reshape(Xgrid.shape), | ||
origin='lower', aspect='auto', | ||
extent=[minx, maxx, miny, maxy], | ||
cmap='Blues') | ||
if legend: | ||
cb = fig.colorbar(img) | ||
cb.set_label("density") | ||
|
||
# draw diagonal | ||
vals = [max(minx, miny), min(maxx, maxy)] | ||
ax.plot(vals, vals, ls = '--', c = '.3') | ||
|
||
# add text | ||
spearman = spearmanr(x, y)[0] | ||
person = pearsonr(x, y)[0] | ||
text = ('Spearman Correlation = {:f}' + | ||
'\nPearson Correlation = {:f}').format(spearman, person) | ||
xpos = (maxx - minx) * .05 + minx | ||
ypos = - (maxy - miny) * .05 + maxy | ||
ax.text(xpos, ypos, text, | ||
verticalalignment = 'top') | ||
|
||
def save_cor_plot(n): | ||
deg_counts = lin_deg(beta_counts, n + 1) | ||
fig = plt.figure() | ||
ax = fig.add_subplot(111) | ||
deplot(beta_counts.flatten(), deg_counts.flatten(), ax = ax, res = args.res) | ||
plt.savefig('data/correlations/' + str(n) + '-comp.png') | ||
|
||
if __name__ == "__main__": | ||
tasks = args.integers | ||
if len(tasks) == 1: | ||
save_cor_plot(tasks[0]) | ||
elif len(tasks) > 1: | ||
with Pool() as pool: | ||
_ = list(tqdm(pool.imap(save_cor_plot, tasks), total=len(tasks))) |
Oops, something went wrong.