-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun-jupyterGPU.sbatch
executable file
·87 lines (55 loc) · 2.08 KB
/
run-jupyterGPU.sbatch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/bin/bash
#SBATCH --job-name=jupyterGPU
#SBATCH --nodes=1
#SBATCH --cpus-per-task=2
#SBATCH --mem=12GB
#SBATCH --time=6:00:00
#SBATCH --gres=gpu
#SBATCH --output="%A_%x_%u.out"
#SBATCH --error="%A_%x_%u.out"
## load nessesary modules (terrible hack for tensorflow -- pytorch can be
## installed using conda alone, please tell me if there is a better way).
module load tensorflow/python3.6/1.5.0
module swap python3/intel anaconda3/5.3.1
## load the anaconda ml environment
source activate ml
## choose a random port number
port=$(shuf -i 6000-9999 -n 1)
# setup nunnels between the compute machine and the login machines
/usr/bin/ssh -N -f -R $port:localhost:$port log-0
/usr/bin/ssh -N -f -R $port:localhost:$port log-1
# print instructions for the user
cat<<EOF
Jupyter server is running on: $(hostname)
Job starts at: $(date)
Step 1 :
If you are working in NYU campus, please open an iTerm window, run command
ssh -NL $port:localhost:$port [email protected]
If you are working off campus, you should already have ssh tunneling setup through HPC bastion host,
that you can directly login to prince with command
ssh $USER@prince
Please open an iTerm window, run command
ssh -NL $port:localhost:$port $USER@prince
Step 2:
Keep the iTerm windows in the previouse step open. Now open browser, find the line with
The Jupyter Notebook is running at: $(hostname)
the URL is something: http://localhost:${port}/?token=XXXXXXXX (see your token below)
you should be able to connect to jupyter notebook running remotly on prince compute node with above url
EOF
## Looked this up once but I forget. It is nessesary.
unset XDG_RUNTIME_DIR
if [ "$SLURM_JOBTMP" != "" ]; then
export XDG_RUNTIME_DIR=$SLURM_JOBTMP
fi
## print some info about the system
printf "\n\n===================SYSTEM INFO===================\n\n"
printf "\nPython Stuff:\n\n"
which python
which jupyter
printf "\n\nConda Export:\n\n"
conda env export
printf "\n\nGPU Stuff:\n\n"
nvidia-smi
printf "\n\n\n===========CALLING NOTEBOOK==========\n\n\n"
## launch the notebook server
jupyter notebook --no-browser --port $port