Data analyses for 13C tracer analysis reveals the landscape of metabolic checkpoints in human CD8+ T cell differentiation and exhaustion (Kirchmair et al., Frontiers in Immunology 2023)
# source lib/make_env.sh # initial code to make conda envs
conda env create -n cd8 -f env/cd8.yml # recreate conda env
conda env create -n cd8_ngscm -f env/cd8_ngscm.yml # recreate conda env
conda activate cd8
Rscript -e 'devtools::install_github("AlexanderKirchmair/datamisc")'
Rscript -e 'devtools::install_github("AlexanderKirchmair/c13ms")'
Rscript -e 'install.packages("qualpalr", repos = "https://cran.wu.ac.at/")'
Rscript -e 'devtools::install_github("AlexanderKirchmair/DeLuciatoR")' # version forked from https://github.com/infotroph/DeLuciatoR
mkdir logs
Set up NGSCheckMate-1.0.0
cd lib
git clone https://github.com/parklab/NGSCheckMate.git
echo 'SAMTOOLS=samtools' > lib/NGSCheckMate/ncm.conf
echo 'BCFTOOLS=bcftools' >> lib/NGSCheckMate/ncm.conf
echo 'REF=/data/genomes/hg38/fasta/gencode/GRCh38.primary_assembly.genome.fa' >> lib/NGSCheckMate/ncm.conf
cd ..
conda activate cd8
Memory differentiation samples (GSE234099):
mkdir -p data/rnaseq/MEM/00_RAW
accs=$(awk 'NR>1 {print $2 "-" $1}' "tables/GSE234099.txt")
for acc in $accs
do
qsub lib/run_download.sh ${acc%-*} ${acc#*-} data/rnaseq/MEM/00_RAW ~/myScratch/tmp
while [ $(qstat -s pr | grep -w -c "DOWNLOAD") -gt 3 ]; do sleep 3; done
done
Exhaustion samples (GSE234100):
mkdir -p data/rnaseq/EXH/00_RAW
accs=$(awk 'NR>1 {print $2 "-" $1}' "tables/GSE234100.txt")
for acc in $accs
do
qsub lib/run_download.sh ${acc%-*} ${acc#*-} data/rnaseq/EXH/00_RAW ~/myScratch/tmp
while [ $(qstat -s pr | grep -w -c "DOWNLOAD") -gt 3 ]; do sleep 3; done
done
Trimming:
mkdir data/rnaseq/MEM/01_TRIMMED
for file in data/rnaseq/MEM/00_RAW/*fastq.gz
do
qsub lib/run_trimming.sh $file data/rnaseq/MEM/01_TRIMMED
done
mkdir data/rnaseq/EXH/01_TRIMMED
for file in data/rnaseq/EXH/00_RAW/*fastq.gz
do
qsub lib/run_trimming.sh $file data/rnaseq/EXH/01_TRIMMED
done
Read alignment and quantification using the nf-core/rnaseq-3.4 pipeline (set genome paths in lib/run_rnaseq.sh
):
bash -i lib/run_rnaseq.sh 'tables/samplesheet_mem.csv' 'data/rnaseq/MEM/02_NF_results'
mv .nextflow.log logs/mem.nextflow.log
bash -i lib/run_rnaseq.sh 'tables/samplesheet_exh.csv' 'data/rnaseq/EXH/02_NF_results'
mv .nextflow.log logs/exh.nextflow.log
Check if the paired samples are matching with NGSCheckMate-1.0.0:
conda activate cd8_ngscm
mkdir data/rnaseq/MEM/samplecheck
ls -d data/rnaseq/MEM/02_NF_results/star_salmon/*bam > data/rnaseq/MEM/samplecheck/files.txt
qsub lib/run_NGSCheckMate.sh 'data/rnaseq/MEM/samplecheck/files.txt' 'data/rnaseq/MEM/samplecheck'
mv r_script.r.Rout data/rnaseq/MEM/samplecheck/
mkdir data/rnaseq/EXH/samplecheck
ls -d data/rnaseq/EXH/02_NF_results/star_salmon/*bam > data/rnaseq/EXH/samplecheck/files.txt
qsub lib/run_NGSCheckMate.sh 'data/rnaseq/EXH/samplecheck/files.txt' 'data/rnaseq/EXH/samplecheck'
mv r_script.r.Rout data/rnaseq/EXH/samplecheck/
Rscript lib/plot_NGSCheckMate.R
Gene sets were prepared by running Rscript lib/prepare_genesets.R
.
13C metabolomics data: data/metabolomics
Seahorse data: data/seahorse
The main analyses can be reproduced by rendering the the .Rmd files:
conda activate cd8
Rscript -e "rmarkdown::render('analyses/01-RNA-Differentiation.Rmd')"
Rscript -e "rmarkdown::render('analyses/02-13C-Differentiation.Rmd')"
Rscript -e "rmarkdown::render('analyses/03-RNA-Exhaustion.Rmd')"
Rscript -e "rmarkdown::render('analyses/04-13C-Exhaustion.Rmd')"
Rscript -e "rmarkdown::render('analyses/05-RNA-Exhaustion-Public.Rmd')"
Rscript -e "rmarkdown::render('analyses/06-RNA-Mitochondria.Rmd')"
Rscript -e "rmarkdown::render('analyses/07-Public-Dataset-Comparison.Rmd')"
To reproduce the final figures and tables, run:
conda activate cd8
Rscript -e "rmarkdown::render('analyses/08-Results.Rmd')"