From 207c602c6ee18834d6c16f5069cfc519c37f03e7 Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Wed, 30 May 2018 10:38:13 -0700 Subject: [PATCH 01/17] implemented coroutines with asyncio --- batchspawner/batchspawner.py | 59 +++++++++++++++--------------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index f8081f45..ad8ca817 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -16,7 +16,7 @@ * job names instead of PIDs """ import pwd -import os +import os, asyncio import xml.etree.ElementTree as ET @@ -177,53 +177,46 @@ def parse_job_id(self, output): def cmd_formatted_for_batch(self): return ' '.join(self.cmd + self.get_args()) - @gen.coroutine - def run_command(self, cmd, input=None, env=None): - proc = Subprocess(cmd, shell=True, env=env, stdin=Subprocess.STREAM, stdout=Subprocess.STREAM,stderr=Subprocess.STREAM) - inbytes = None + async def run_command(self, cmd, input=None, env=None): + proc = await asyncio.create_subprocess_shell(cmd, env=env, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE) + inbytes=None + if input: - inbytes = input.encode() - try: - yield proc.stdin.write(inbytes) - except StreamClosedError as exp: - # Apparently harmless - pass - proc.stdin.close() - out = yield proc.stdout.read_until_close() - eout = yield proc.stderr.read_until_close() - proc.stdout.close() - proc.stderr.close() + inbytes=input.encode() + + out, eout = await proc.communicate(input=inbytes) + eout = eout.decode().strip() - try: - err = yield proc.wait_for_exit() - except CalledProcessError: - self.log.error("Subprocess returned exitcode %s" % proc.returncode) + + err = proc.returncode + + if err != 0: + self.log.error("Subprocess returned exitcode %s" % err) self.log.error(eout) raise RuntimeError(eout) - if err != 0: - return err # exit error? else: out = out.decode().strip() return out - @gen.coroutine - def _get_batch_script(self, **subvars): + async def _get_batch_script(self, **subvars): """Format batch script from vars""" # Colud be overridden by subclasses, but mainly useful for testing return format_template(self.batch_script, **subvars) - @gen.coroutine - def submit_batch_script(self): + async def submit_batch_script(self): subvars = self.get_req_subvars() cmd = self.exec_prefix + ' ' + self.batch_submit_cmd cmd = format_template(cmd, **subvars) subvars['cmd'] = self.cmd_formatted_for_batch() if hasattr(self, 'user_options'): subvars.update(self.user_options) - script = yield self._get_batch_script(**subvars) + script = await self._get_batch_script(**subvars) self.log.info('Spawner submitting job using ' + cmd) self.log.info('Spawner submitted script:\n' + script) - out = yield self.run_command(cmd, input=script, env=self.get_env()) + out = await self.run_command(cmd, input=script, env=self.get_env()) try: self.log.info('Job submitted. cmd: ' + cmd + ' output: ' + out) self.job_id = self.parse_job_id(out) @@ -238,8 +231,7 @@ def submit_batch_script(self): "and self.job_id as {job_id}." ).tag(config=True) - @gen.coroutine - def read_job_state(self): + async def read_job_state(self): if self.job_id is None or len(self.job_id) == 0: # job not running self.job_status = '' @@ -250,7 +242,7 @@ def read_job_state(self): cmd = format_template(cmd, **subvars) self.log.debug('Spawner querying job: ' + cmd) try: - out = yield self.run_command(cmd) + out = await self.run_command(cmd) self.job_status = out except Exception as e: self.log.error('Error querying job ' + self.job_id) @@ -262,14 +254,13 @@ def read_job_state(self): help="Command to stop/cancel a previously submitted job. Formatted like batch_query_cmd." ).tag(config=True) - @gen.coroutine - def cancel_batch_job(self): + async def cancel_batch_job(self): subvars = self.get_req_subvars() subvars['job_id'] = self.job_id cmd = self.exec_prefix + ' ' + self.batch_cancel_cmd cmd = format_template(cmd, **subvars) self.log.info('Cancelling job ' + self.job_id + ': ' + cmd) - yield self.run_command(cmd) + await self.run_command(cmd) def load_state(self, state): """load job_id from state""" From 28a72070f957f26c50726aa04fdee0798ce6e4b6 Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Mon, 4 Jun 2018 18:11:56 -0700 Subject: [PATCH 02/17] Made start, stop, poll asyncio coroutines too. --- batchspawner/batchspawner.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index ad8ca817..81206f04 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -299,11 +299,10 @@ def state_gethost(self): "Return string, hostname or addr of running job, likely by parsing self.job_status" raise NotImplementedError("Subclass must provide implementation") - @gen.coroutine - def poll(self): + async def poll(self): """Poll the process""" if self.job_id is not None and len(self.job_id) > 0: - yield self.read_job_state() + await self.read_job_state() if self.state_isrunning() or self.state_ispending(): return None else: @@ -319,8 +318,7 @@ def poll(self): help="Polling interval (seconds) to check job state during startup" ).tag(config=True) - @gen.coroutine - def start(self): + async def start(self): """Start the process""" if self.user and self.user.server and self.user.server.port: self.port = self.user.server.port @@ -329,7 +327,7 @@ def start(self): (jupyterhub.version_info >= (0,7) and not self.port): self.port = random_port() self.db.commit() - job = yield self.submit_batch_script() + job = await self.submit_batch_script() # We are called with a timeout, and if the timeout expires this function will # be interrupted at the next yield, and self.stop() will be called. @@ -337,7 +335,7 @@ def start(self): # should either raise and Exception or loop forever. assert len(self.job_id) > 0 while True: - yield self.poll() + await self.poll() if self.state_isrunning(): break else: @@ -347,7 +345,7 @@ def start(self): self.log.warn('Job ' + self.job_id + ' neither pending nor running.\n' + self.job_status) assert self.state_ispending() - yield gen.sleep(self.startup_poll_interval) + await gen.sleep(self.startup_poll_interval) self.current_ip = self.state_gethost() if jupyterhub.version_info < (0,7): @@ -361,22 +359,21 @@ def start(self): return self.current_ip, self.port - @gen.coroutine - def stop(self, now=False): + async def stop(self, now=False): """Stop the singleuser server job. Returns immediately after sending job cancellation command if now=True, otherwise tries to confirm that job is no longer running.""" self.log.info("Stopping server job " + self.job_id) - yield self.cancel_batch_job() + await self.cancel_batch_job() if now: return for i in range(10): - yield self.poll() + await self.poll() if not self.state_isrunning(): return - yield gen.sleep(1.0) + await gen.sleep(1.0) if self.job_id: self.log.warn("Notebook server job {0} at {1}:{2} possibly failed to terminate".format( self.job_id, self.current_ip, self.port) From ee961fe03b2ec8cadb83d934e72ed11bdf4c2190 Mon Sep 17 00:00:00 2001 From: William Krinsman Date: Tue, 5 Jun 2018 15:24:39 -0700 Subject: [PATCH 03/17] Maybe better errorr handling/catching. --- batchspawner/batchspawner.py | 37 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 81206f04..b90786ff 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -187,23 +187,34 @@ async def run_command(self, cmd, input=None, env=None): if input: inbytes=input.encode() - out, eout = await proc.communicate(input=inbytes) - - eout = eout.decode().strip() - - err = proc.returncode - - if err != 0: - self.log.error("Subprocess returned exitcode %s" % err) - self.log.error(eout) - raise RuntimeError(eout) + try: + out, eout = await proc.communicate(input=inbytes) + except: + self.log.debug("Exception raised when trying to run command: %s" % command) + proc.kill() + self.log.debug("Running command failed done kill") + out, eout = await proc.communicate() + out = out.decode.strip() + eout = eout.decode.strip() + self.log.debug("Running command failed done communicate") + self.log.debug("Subprocess returned exitcode %s" % proc.returncode) + self.log.debug("Subprocess returned standard output %s" % out) + self.log.debug("Subprocess returned standard error %s" % eout) + raise else: - out = out.decode().strip() - return out + eout = eout.decode().strip() + err = proc.returncode + if err != 0: + self.log.error("Subprocess returned exitcode %s" % err) + self.log.error(eout) + raise RuntimeError(eout) + + out = out.decode().strip() + return out async def _get_batch_script(self, **subvars): """Format batch script from vars""" - # Colud be overridden by subclasses, but mainly useful for testing + # Could be overridden by subclasses, but mainly useful for testing return format_template(self.batch_script, **subvars) async def submit_batch_script(self): From 673072d4857c48fab29dddf5daaf9091bf6a85a3 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Sat, 2 Feb 2019 12:18:27 +0200 Subject: [PATCH 04/17] Add documentation of #58, selecting the port on the remote side - Closes: #126 --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 3feb88f4..d0bae984 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ This package formerly included WrapSpawner and ProfilesSpawner, which provide me ```python c = get_config() c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' + import batchspawner.api ``` 3. Depending on the spawner, additional configuration will likely be needed. @@ -52,6 +53,7 @@ to run Jupyter notebooks on an academic supercomputer cluster. ```python # Select the Torque backend and increase the timeout since batch jobs may take time to start + import batchspawner.api c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' c.Spawner.http_timeout = 120 @@ -117,6 +119,7 @@ clusters, as well as an option to run a local notebook directly on the jupyterhu ```python # Same initial setup as the previous example + import batchspawner.api c.JupyterHub.spawner_class = 'wrapspawner.ProfilesSpawner' c.Spawner.http_timeout = 120 #------------------------------------------------------------------------------ @@ -171,6 +174,10 @@ Added (developer) Changed +* PR #58 changes logic of port selection, so that it is selected *after* the singleuser server starts. This means that the port number has to be conveyed back to JupyterHub. This requires the following changes: + - `jupyterhub_config.py` *must* explicitely import `batchspawner.api` + - If you override `Spawner.cmd`, note that the default command is now `batchspawner-singleuser`, not the default `jupyterhub-singleuser`. This is to add a hook to report the port number back to the hub. + - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`. * Update minimum requirements to JupyterHub 0.8.1 and Python 3.4. * Update Slurm batch script. Now, the single-user notebook is run in a job step, with a wrapper of `srun`. This may need to be removed using `req_srun=''` if you don't want environment variables limited. * Pass the environment dictionary to the queue and cancel commands as well. This is mostly user environment, but may be useful to these commands as well in some cases. #108, #111 If these envioronment variables were used for authentication as an admin, be aware that there are pre-existing security issues because they may be passed to the user via the batch submit command, see #82. From 4f793faf7a8106e50068b4881c50ce0b3b11f677 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Wed, 2 May 2018 11:13:56 +0300 Subject: [PATCH 05/17] Add progress indicators when spawning - Uses JupyterHub 0.9 feature, no effect for <0.9. - Closes: #81 --- .travis.yml | 1 + batchspawner/batchspawner.py | 20 ++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 22 insertions(+) diff --git a/.travis.yml b/.travis.yml index caf9a7e2..0d8e6f6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,6 +30,7 @@ install: - pip install attrs>17.4.0 - pip install --pre -r jupyterhub/dev-requirements.txt - pip install --pre -e jupyterhub + - pip install --pre -f travis-wheels/wheelhouse -r requirements.txt script: - travis_retry py.test --lf --cov batchspawner batchspawner/tests -v diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 52789bd4..15a06387 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -15,9 +15,12 @@ * remote execution via submission of templated scripts * job names instead of PIDs """ +import asyncio +from async_generator import async_generator, yield_, yield_from_ import pwd import os import re +import sys import xml.etree.ElementTree as ET @@ -411,6 +414,23 @@ def stop(self, now=False): self.job_id, self.current_ip, self.port) ) + @async_generator + async def progress(self): + while True: + if self.state_ispending(): + await yield_({ + "message": "Pending in queue...", + }) + elif self.state_isrunning(): + await yield_({ + "message": "Cluster job running... waiting to connect", + }) + return + else: + await yield_({ + "message": "Unknown status...", + }) + await gen.sleep(.1) class BatchSpawnerRegexStates(BatchSpawnerBase): """Subclass of BatchSpawnerBase that uses config-supplied regular expressions diff --git a/requirements.txt b/requirements.txt index ee0a79e1..55ac7a89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +async_generator>=1.8 jinja2 jupyterhub>=0.5 From 7fbaee7e73d769e5b6edf9c9c35767c94c18fba9 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Mon, 17 Jun 2019 10:13:45 +0300 Subject: [PATCH 06/17] Update comments on #141 for clarity --- batchspawner/batchspawner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 9e320f8f..f6b8bf27 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -187,6 +187,7 @@ def parse_job_id(self, output): return output def cmd_formatted_for_batch(self): + """The command which is substituted inside of the batch script""" return ' '.join(['batchspawner-singleuser'] + self.cmd + self.get_args()) @gen.coroutine @@ -230,8 +231,11 @@ def _get_batch_script(self, **subvars): @gen.coroutine def submit_batch_script(self): subvars = self.get_req_subvars() + # `cmd` is submitted to the batch system cmd = ' '.join((format_template(self.exec_prefix, **subvars), format_template(self.batch_submit_cmd, **subvars))) + # `subvars['cmd']` is what is run _inside_ the batch script, + # put into the template. subvars['cmd'] = self.cmd_formatted_for_batch() if hasattr(self, 'user_options'): subvars.update(self.user_options) From ffd3f93ee0bfcd56dd03e35e75128cee36d251d0 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Mon, 17 Jun 2019 01:24:01 +0300 Subject: [PATCH 07/17] progress: decrease interval to one second --- batchspawner/batchspawner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 15a06387..5afe2e6a 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -430,7 +430,7 @@ async def progress(self): await yield_({ "message": "Unknown status...", }) - await gen.sleep(.1) + await gen.sleep(1) class BatchSpawnerRegexStates(BatchSpawnerBase): """Subclass of BatchSpawnerBase that uses config-supplied regular expressions From 581c9de7e76e312b6f46c89bf783c6ac5c34151e Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Mon, 17 Jun 2019 10:18:19 +0300 Subject: [PATCH 08/17] remote port selection: separate wrapper into separate traitlet - Needed in cases where path to the wrapper may need explicit specification. --- batchspawner/batchspawner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index f6b8bf27..f06eaf3f 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -156,6 +156,12 @@ def _req_keepvars_default(self): "Must include {cmd} which will be replaced with the jupyterhub-singleuser command line." ).tag(config=True) + batchspawner_wrapper = Unicode('batchspawner-singleuser', + help="A wrapper which is capable of special batchspawner setup: currently sets the port on " + "the remote host. Not needed to be set under normal circumstances, unless path needs " + "specification." + ).tag(config=True) + # Raw output of job submission command unless overridden job_id = Unicode() @@ -188,7 +194,7 @@ def parse_job_id(self, output): def cmd_formatted_for_batch(self): """The command which is substituted inside of the batch script""" - return ' '.join(['batchspawner-singleuser'] + self.cmd + self.get_args()) + return ' '.join([self.batchspawner_wrapper] + self.cmd + self.get_args()) @gen.coroutine def run_command(self, cmd, input=None, env=None): From 0aa766aec4e16ed3e873724d0467db5788cac991 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Thu, 20 Jun 2019 00:58:26 +0300 Subject: [PATCH 09/17] Update Changelog --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d0bae984..fc7e4199 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ clusters, as well as an option to run a local notebook directly on the jupyterhu ## Changelog -### dev (requires minimum JupyterHub 0.7.2 and Python 3.4) +### dev (requires minimum JupyterHub 0.9 and Python 3.5) Added (user) @@ -165,23 +165,27 @@ Added (user) * Add new option exec_prefix, which defaults to `sudo -E -u {username}`. This replaces explicit `sudo` in every batch command - changes in local commands may be needed. * New option: `req_keepvars_extra`, which allows keeping extra variables in addition to what is defined by JupyterHub itself (addition of variables to keep instead of replacement). #99 * Add `req_prologue` and `req_epilogue` options to scripts which are inserted before/after the main jupyterhub-singleuser command, which allow for generic setup/cleanup without overriding the entire script. #96 -* SlurmSpawner: add the `req_reservation` option. # +* SlurmSpawner: add the `req_reservation` option. #91 +* Add basic support for JupyterHub progress updates, but this is not used much yet. #86 Added (developer) * Add many more tests. * Add a new page `SPAWNERS.md` which information on specific spawners. Begin trying to collect a list of spawner-specific contacts. #97 +* Rename `current_ip` and `current_port` commands to `ip` and `port`. No user impact. #139 +* Update to Python 3.5 `async` / `await` syntax to support JupyterHub progress updates. #90 Changed -* PR #58 changes logic of port selection, so that it is selected *after* the singleuser server starts. This means that the port number has to be conveyed back to JupyterHub. This requires the following changes: +* PR #58 and #141 changes logic of port selection, so that it is selected *after* the singleuser server starts. This means that the port number has to be conveyed back to JupyterHub. This requires the following changes: - `jupyterhub_config.py` *must* explicitely import `batchspawner.api` - - If you override `Spawner.cmd`, note that the default command is now `batchspawner-singleuser`, not the default `jupyterhub-singleuser`. This is to add a hook to report the port number back to the hub. + - Add a new option `batchspawner_singleuser_cmd` which is used as a wrapper in the single-user servers, which conveys the remote port back to JupyterHub. This is now an integral part of the spawn process. - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`. -* Update minimum requirements to JupyterHub 0.8.1 and Python 3.4. +* Update minimum requirements to JupyterHub 0.9 and Python 3.5. #143 * Update Slurm batch script. Now, the single-user notebook is run in a job step, with a wrapper of `srun`. This may need to be removed using `req_srun=''` if you don't want environment variables limited. * Pass the environment dictionary to the queue and cancel commands as well. This is mostly user environment, but may be useful to these commands as well in some cases. #108, #111 If these envioronment variables were used for authentication as an admin, be aware that there are pre-existing security issues because they may be passed to the user via the batch submit command, see #82. + Fixed * Improve debugging on failed submission by raising errors including error messages from the commands. #106 From a0ab3d7c8c503a85701117fa6f51411173b8f936 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Thu, 20 Jun 2019 01:31:37 +0300 Subject: [PATCH 10/17] fixup! remote port selection: separate wrapper into separate traitlet --- batchspawner/batchspawner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 50b65ba0..4219764d 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -159,7 +159,7 @@ def _req_keepvars_default(self): "Must include {cmd} which will be replaced with the jupyterhub-singleuser command line." ).tag(config=True) - batchspawner_wrapper = Unicode('batchspawner-singleuser', + batchspawner_singleuser_cmd = Unicode('batchspawner-singleuser', help="A wrapper which is capable of special batchspawner setup: currently sets the port on " "the remote host. Not needed to be set under normal circumstances, unless path needs " "specification." @@ -191,7 +191,7 @@ def parse_job_id(self, output): def cmd_formatted_for_batch(self): """The command which is substituted inside of the batch script""" - return ' '.join([self.batchspawner_wrapper] + self.cmd + self.get_args()) + return ' '.join([self.batchspawner_singleuser_cmd] + self.cmd + self.get_args()) async def run_command(self, cmd, input=None, env=None): proc = await asyncio.create_subprocess_shell(cmd, env=env, From a4aaf8f41f72037829eee4ef6d6adcb323b0c30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20R=2E=20H=C3=B6lzlwimmer?= Date: Sun, 7 Jul 2019 18:43:23 +0200 Subject: [PATCH 11/17] Circumvent bug in slurm `sbatch --parsable` command (see #148) --- batchspawner/batchspawner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 5185d618..66722a24 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -612,6 +612,8 @@ class SlurmSpawner(UserEnvMixin,BatchSpawnerRegexStates): def parse_job_id(self, output): # make sure jobid is really a number try: + # use only last line to circumvent slurm bug + output = output.splitlines()[-1] id = output.split(';')[0] int(id) except Exception as e: From b088e2de5effee648e0b66ccfac17db5694e9262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20R=2E=20H=C3=B6lzlwimmer?= Date: Sun, 7 Jul 2019 23:03:12 +0200 Subject: [PATCH 12/17] Add missing information how to setup batchspawner (see issues #129 and #126) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 22ab2232..6cff47ef 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ This package formerly included WrapSpawner and ProfilesSpawner, which provide me ```python c = get_config() + c.JupyterHub.extra_handlers = [(r"/api/batchspawner", 'batchspawner.api.BatchSpawnerAPIHandler')] c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' ``` 3. Depending on the spawner, additional configuration will likely be needed. From 70f4992e49b8f5456d99a5ac6df92aa75ecf7bc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20R=2E=20H=C3=B6lzlwimmer?= Date: Tue, 9 Jul 2019 11:14:54 +0200 Subject: [PATCH 13/17] Change recommendation how to setup batchspawner (see issues #129 and #126) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6cff47ef..21d74aed 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,9 @@ This package formerly included WrapSpawner and ProfilesSpawner, which provide me 2. add lines in jupyterhub_config.py for the spawner you intend to use, e.g. ```python + import batchspawner + c = get_config() - c.JupyterHub.extra_handlers = [(r"/api/batchspawner", 'batchspawner.api.BatchSpawnerAPIHandler')] c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' ``` 3. Depending on the spawner, additional configuration will likely be needed. From 7539961ad01016bf35bdcb8606461f824258794e Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Wed, 17 Jul 2019 01:24:56 +0300 Subject: [PATCH 14/17] API doc: import batchspawner, not batchspawner.api - importing batchspawner.api could become depreciated in the future, but importing "batchspawner" will always work (and imports .api within it). Recommend only importing batchspawner then. - Add clarifying comment in one of the docs. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d0bae984..928e1c9d 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ This package formerly included WrapSpawner and ProfilesSpawner, which provide me ```python c = get_config() c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' - import batchspawner.api + import batchspawner # Even though not used, needed to register batchspawner interface ``` 3. Depending on the spawner, additional configuration will likely be needed. @@ -53,7 +53,7 @@ to run Jupyter notebooks on an academic supercomputer cluster. ```python # Select the Torque backend and increase the timeout since batch jobs may take time to start - import batchspawner.api + import batchspawner c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' c.Spawner.http_timeout = 120 @@ -119,7 +119,7 @@ clusters, as well as an option to run a local notebook directly on the jupyterhu ```python # Same initial setup as the previous example - import batchspawner.api + import batchspawner c.JupyterHub.spawner_class = 'wrapspawner.ProfilesSpawner' c.Spawner.http_timeout = 120 #------------------------------------------------------------------------------ @@ -175,7 +175,7 @@ Added (developer) Changed * PR #58 changes logic of port selection, so that it is selected *after* the singleuser server starts. This means that the port number has to be conveyed back to JupyterHub. This requires the following changes: - - `jupyterhub_config.py` *must* explicitely import `batchspawner.api` + - `jupyterhub_config.py` *must* explicitely import `batchspawner` - If you override `Spawner.cmd`, note that the default command is now `batchspawner-singleuser`, not the default `jupyterhub-singleuser`. This is to add a hook to report the port number back to the hub. - If you have installed with `pip install -e`, you will have to re-install so that the new script `batchspawner-singleuser` is added to `$PATH`. * Update minimum requirements to JupyterHub 0.8.1 and Python 3.4. From da150bb714a12aa7d4d604c667f8d218a205b74a Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Wed, 17 Jul 2019 01:31:02 +0300 Subject: [PATCH 15/17] Remove "import batchspawner", also done in #130 --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 21d74aed..22ab2232 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,6 @@ This package formerly included WrapSpawner and ProfilesSpawner, which provide me 2. add lines in jupyterhub_config.py for the spawner you intend to use, e.g. ```python - import batchspawner - c = get_config() c.JupyterHub.spawner_class = 'batchspawner.TorqueSpawner' ``` From 00d5b11ba3523e1c8c372dd12daabf7833dc8855 Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Wed, 17 Jul 2019 13:29:35 +0300 Subject: [PATCH 16/17] README.md: Add some debugging hints --- README.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/README.md b/README.md index 22ab2232..c14507c0 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,47 @@ clusters, as well as an option to run a local notebook directly on the jupyterhu ``` +## Debugging batchspawner + +Sometimes it can be hard to debug batchspawner, but it's not really +once you know how the pieces interact. Check the following places for +error messages: + +* Check the JupyterHub logs for errors. + +* Check the JupyterHub logs for the batch script that got submitted + and the command used to submit it. Are these correct? (Note that + there are submission environment variables too, which aren't + displayed.) + +* At this point, it's a matter of checking the batch system. Is the + job ever scheduled? Does it run? Does it succeed? Check the batch + system status and output of the job. The most comon failure + patterns are a) job never starting due to bad scheduler options, b) + job waiting in the queue beyond the `start_timeout`, causing + JupyterHub to kill the job. + +* At this point the job starts. Does it fail immediately, or before + Jupyter starts? Check the scheduler output files (stdout/stderr of + the job), wherever it is stored. To debug the job script, you can + add debugging into the batch script, such as an `env` or `set + -x`. + +* At this point Jupyter itself starts - check its error messages. Is + it starting with the right options? Can it communicate with the + hub? At this point there usually isn't anything + batchspawner-specific, with the one exception below. The error log + would be in the batch script output (same file as above). There may + also be clues in the JupyterHub logfile. + +Common problems: + +* Did you `import batchspawner` in the `jupyterhub_config.py` file? + This is needed in order to activate the batchspawer API in + JupyterHub. + + + ## Changelog ### dev (requires minimum JupyterHub 0.7.2 and Python 3.4) From 14dbd4bbbe74f03f90fa3000940d760d9af77b7f Mon Sep 17 00:00:00 2001 From: Richard Darst Date: Fri, 19 Jul 2019 12:40:17 +0300 Subject: [PATCH 17/17] Remove extraneous space --- batchspawner/batchspawner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batchspawner/batchspawner.py b/batchspawner/batchspawner.py index 1ecaf475..b79dd05c 100644 --- a/batchspawner/batchspawner.py +++ b/batchspawner/batchspawner.py @@ -225,7 +225,7 @@ async def run_command(self, cmd, input=None, env=None): self.log.error("Subprocess returned exitcode %s" % err) self.log.error(eout) raise RuntimeError(eout) - + out = out.decode().strip() return out