-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_GSEA.qsub
144 lines (125 loc) · 4.8 KB
/
run_GSEA.qsub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/bash -l
#$ -S /bin/bash
#$ -cwd
#$ -j y
#$ -m eas
#
# qsub script to run GSEA using Java command-line interface
# Adam Gower
echo "=========================================================="
echo "Starting on : $(date)"
echo "Running on node : $(hostname)"
echo "Current job ID : $JOB_ID"
echo "Current job name : $JOB_NAME"
echo "Task index number : $TASK_ID"
echo "=========================================================="
# Load and list R module
module load R/3.6.0
module list
if [[ $# -lt 4 ]]
then
# Check for argument arity and terminate with usage statement if incorrect
echo -n "Usage: run_GSEA.qsub [GSEA version] [.rnk filename] "
echo -n "[GSEA parameter filename] [textual description of comparison] "
echo "[optional FDR q cutoff]"
else
# Otherwise, extract command-line arguments to variables
gsea_version="${1}"
rnk_file="${2}"
parameter_file="${3}"
comparison="${4}"
q_cutoff="${5}"
# Place output in the same path as the RNK input file
output_path="$(dirname "${rnk_file}")"
# Check that the GSEA_PATH environment variable was set;
# if not, set it to "." (current working directory)
[[ ${GSEA_PATH} == "" ]] && export GSEA_PATH="."
# Define filenames
postprocess_R_script="${GSEA_PATH}/postprocess_GSEA.R"
if [[ ${gsea_version} < 4 ]]
then
# Check for .jar file before proceeding
jar_file="$(ls ${GSEA_PATH}/*${gsea_version}.jar)"
if [[ "${jar_file}" == "" ]]
then
echo -n "A .jar file could not be found "
echo "for GSEA version ${gsea_version}; terminating."
exit
fi
else
# Check for gsea-cli.sh before proceeding
gsea_cli_script="${GSEA_PATH}/GSEA_${gsea_version}/gsea-cli.sh"
if [[ ! -e "${gsea_cli_script}" ]]
then
echo -n "File gsea-cli.sh could not be found "
echo "for GSEA version ${gsea_version}; terminating."
exit
fi
fi
# Extract .chip and .gmx filenames from parameter file
chip_file=$(grep "^chip" "$parameter_file" | cut -f2)
gmx_files=$(grep "^gmx" "$parameter_file" | cut -f2)
gmx_files=${gmx_files//,/ }
# Create report label from comparison by replacing hyphens (illegal in CLI
# arguments) and other problematic characters with legal characters/phrases
rpt_label="$comparison"
rpt_label="${rpt_label//+\/+/_wildtype}"
rpt_label="${rpt_label//+\/-/_het}"
rpt_label="${rpt_label//-\/-/_null}"
rpt_label="${rpt_label// /_}"
rpt_label="${rpt_label//:/_}"
rpt_label="${rpt_label//-/_}"
# Create name for tab-delimited output file from comparison
# by replacing spaces with underscores
output_file="$TMPDIR/${rpt_label}.txt"
echo "Performing preranked GSEA"
echo " Using GSEA version: ${gsea_version}"
echo " Using ranked list file: ${rnk_file}"
echo " Using GSEA parameters in file: ${parameter_file}"
echo " Features ranked according to: ${comparison}"
echo
# Copy .chip and .gmx files to temporary folder for convenience
cp -av "${chip_file}" $TMPDIR/
cp -av ${gmx_files} $TMPDIR/
# Copy .rnk to renamed tempfile for compatibility with command-line GSEA
cp -av "${rnk_file}" $TMPDIR/${rpt_label}.rnk
# Copy parameter file to renamed tempfile
cp -av "${parameter_file}" $TMPDIR/${rpt_label}_parameters.txt
# Change to temporary folder
cd $TMPDIR/
if [[ ${gsea_version} < 4 ]]
then
# Run preranked GSEA with 4 GB RAM
# (originally set to 1GB RAM, but was running out as of MSigDB 6.0;
# then set to 4GB RAM, but running out with MSigDB 7.4 including c7 sets)
java -cp ${jar_file} -Xmx8g xtools.gsea.GseaPreranked \
-rnk "${rpt_label}.rnk" \
-rpt_label "${rpt_label}" \
-out . \
-gui false \
-param_file "${rpt_label}_parameters.txt"
else
# Note: java 11 is specifically listed as a dependency for GSEA 4.x
module load java/11.0.4
module list
bash ${gsea_cli_script} GseaPreranked \
-rnk "${rpt_label}.rnk" \
-rpt_label "${rpt_label}" \
-out . \
-param_file "${rpt_label}_parameters.txt"
fi
# Rename run folder
mv -v "${rpt_label}.GseaPreranked".* "${rpt_label}"
# Postprocess GSEA run folder to remove extraneous files, re-annotate HTML,
# and create a file for Excel import
R --vanilla < "${postprocess_R_script}" \
--args "${rpt_label}" "${comparison}" "${output_file}" "${q_cutoff}"
# Zip the GSEA folder
zip -r "${rpt_label}.zip" "${rpt_label}"/
# Clean up
rsync -av "${rpt_label}.zip" "${output_file}" "${output_path}"/
rm -rf *
echo "=========================================================="
echo "Finished on : $(date)"
echo "=========================================================="
fi