cognitiveailab · MarcCote · Oct 24, 2022 · Nov 8, 2022
diff --git a/Dockerfile b/Dockerfile
@@ -26,29 +26,29 @@ ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 
-EXPOSE 5001 8883 8888 9000
-EXPOSE 25300-25600
+#EXPOSE 5001 8883 8888 9000
+#EXPOSE 25300-25600
 USER root:root
 WORKDIR /opt
-RUN wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-10-05.zip
+#RUN wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-10-05.zip
 RUN apt-get update \
    &&  apt-get install -y --no-install-recommends default-jre
-RUN apt-get install -y --no-install-recommends unzip
-RUN  unzip stanford-corenlp-full-2018-10-05.zip \
-   &&  mv $(ls -d stanford-corenlp-full-*/) corenlp \
-   &&  rm *.zip
-EXPOSE 5002-5100
-EXPOSE 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018
-EXPOSE 50022 50023 50024 50025 50026 50027 50028 50029 50030 50031 50032 50034 50035 50036 50037 50038 50039
-EXPOSE 50022 50032 50042 50052 50062 50072 50082 50092 50102 50112 50122 50132 50142 50152 50162 50172 50182
-COPY . /tdqn-scienceworld
-RUN pip install -r /tdqn-scienceworld/requirements.txt
+#RUN apt-get install -y --no-install-recommends unzip
+#RUN  unzip stanford-corenlp-full-2018-10-05.zip \
+#   &&  mv $(ls -d stanford-corenlp-full-*/) corenlp \
+#   &&  rm *.zip
+#EXPOSE 5002-5100
+#EXPOSE 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018
+#EXPOSE 50022 50023 50024 50025 50026 50027 50028 50029 50030 50031 50032 50034 50035 50036 50037 50038 50039
+#EXPOSE 50022 50032 50042 50052 50062 50072 50082 50092 50102 50112 50122 50132 50142 50152 50162 50172 50182
+COPY . /drrn-scienceworld
+RUN pip install -r /drrn-scienceworld/requirements.txt
 
 RUN pip3 install torch==1.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
 
 WORKDIR /
 
-ENV PYTHONPATH=/tdqn-scienceworld/drrn
+ENV PYTHONPATH=/drrn-scienceworld/drrn
 ENV HOME=""
 
-WORKDIR /tdqn-scienceworld/drrn
+WORKDIR /drrn-scienceworld/drrn
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # DRRN Agent (Modified for ScienceWorld)
 
-This repository contains a reference implementation DRRN as mentioned in [Interactive Fiction Games: A Colossal Adventure](https://arxiv.org/abs/1909.05398), that has been modified for use with the [ScienceWorld](https://www.github.com/allenai/ScienceWorld) environment. 
+This repository contains a reference implementation DRRN as mentioned in [Interactive Fiction Games: A Colossal Adventure](https://arxiv.org/abs/1909.05398), that has been modified for use with the [ScienceWorld](https://www.github.com/allenai/ScienceWorld) environment.
 
 
 # Quickstart
@@ -12,20 +12,20 @@ git clone https://github.com/cognitiveailab/drrn-scienceworld.git
 cd drrn-scienceworld
 
 # Create conda environment
-conda create --name drrn1 python=3.8
-conda activate drrn1
+conda create --name drrn-scienceworld python=3.8
+conda activate drrn-scienceworld
 pip install -r requirements.txt
 
 ```
 
-An example of training the DRRN model (using 8 threads, for 10k training steps, evaluating on dev every 1k steps):
+An example of training the DRRN model (using 8 parallel envs, for 10k training steps, evaluating on dev every 1k steps):
 ```bash
 cd drrn
-python3 train-scienceworld.py --num_envs=8 --max_steps=10000 --task_idx=13 --simplification_str=easy --priority_fraction=0.50 --memory_size=100000 --env_step_limit=100 --eval_freq=1000 --eval_set=dev --historySavePrefix=drrn-task13-results-seed0-dev 
+python train-scienceworld.py --num_envs=8 --max_steps=10000 --task_idx=13 --simplification_str=easy --priority_fraction=0.50 --memory_size=100000 --env_step_limit=100 --eval_freq=1000 --eval_set=dev --historySavePrefix=drrn-task13-results-seed0-dev
 ```
 Here:
-- **max_steps:** Maximum number of steps to train for (per environment thread)
-- **num_envs:** The number of environment threads to simultaneously use during training (8 is a common number)
+- **max_steps:** Maximum number of steps to train for (per environment)
+- **num_envs:** The number of environments to simultaneously use during training (8 is a common number)
 - **task_idx:** The ScienceWorld task index (0-29). *See **task list** below*
 - **env_step_limit:** the maximum number of steps to run an environment for, before it times out and resets (100 typical)
 - **eval_freq:** the number of steps between evaluations
@@ -37,7 +37,7 @@ This configuration generally takes about 1-2 hours to run (to 10k steps).
 
 ## ScienceWorld Task List
 ```
-TASK LIST: 
+TASK LIST:
     0: 	                                                 task-1-boil  (30 variations)
     1: 	                        task-1-change-the-state-of-matter-of  (30 variations)
     2: 	                                               task-1-freeze  (30 variations)
@@ -71,15 +71,13 @@ TASK LIST:
 ```
 
 # Hardware requirements
-This code generally runs best with at least num_threads+1 CPU cores (e.g. about 10 cores for an 8-thread environment).
+This code generally runs best with at least num_envs+1 CPU cores.
 
-The GPU memory requirements are variable, but generally stay below 8gb. 
+The GPU memory requirements are variable, but generally stay below 8gb.
 
 
 # Known issues
 
-- *Many threads*: If you are attempting to use a large number of threads (e.g. 20+), you may need to add an additional several-second delay after the threads spawn before the rest of the program runs.  (The ScienceWorld API already adds a 5 second delay, which handles small numbers of threads well.) 
-
 - *Model saving with manys steps*: Very occassionally, on very long runs (generally 1M+ steps), the periodic pickling the model when saving checkpoints runs into issues and freezes.  The cause is unknown, but as a workaround the save has been wrapped in a timeout, so that if it takes longer than 2 minutes to save the model, the checkpoint is not saved and training continues.  Subsequent checkpoints usually save without issue.
 
 

diff --git a/beaker/batchSubmission.py b/beaker/batchSubmission.py
@@ -11,7 +11,7 @@
   - name: sciworld-may31-drrn-8x100k-taskTASKID-seedSEEDNUM
     image:
       beaker: peterj/sciworld-drrn2c
-    arguments: [python3, train-scienceworld.py, --num_envs=8, --max_steps=100000, --task_idx=TASKID, --simplification_str=easy, --priority_fraction=0.50, --memory_size=100000, --env_step_limit=100, --log_freq=100, --checkpoint_freq=100000, --eval_freq=2000, --seed=SEEDNUM, --maxHistoriesPerFile=1000, --historySavePrefix=/results1/drrn1/results-seedSEEDNUM]
+    arguments: [python3, train-scienceworld.py, --num_envs=8, --max_steps=100000, --task_idx=TASKID, --simplification_str=easy, --priority_fraction=0.50, --memory_size=100000, --env_step_limit=100, --log_freq=100, --checkpoint_freq=5000, --eval_freq=1000, --seed=SEEDNUM]
     result:
       path: /results1/drrn1/
     resources:
@@ -20,10 +20,11 @@
       cluster: ai2/raja_p100
       priority: normal
 """
+template_command = "python train-scienceworld.py --num_envs=8 --max_steps=100000 --task_idx=TASKID --simplification_str=easy --priority_fraction=0.50 --memory_size=100000 --env_step_limit=100 --log_freq=100 --checkpoint_freq=5000 --eval_freq=1000 --seed=SEEDNUM --output_dir logs/drrn-8x100k-taskTASKID-seedSEEDNUM"
 
 
 def populateTemplate(taskId, seedNum):
-    outStr = templateStr    
+    outStr = template_command
     outStr = outStr.replace("SEEDNUM", str(seedNum))
     outStr = outStr.replace("TASKID", str(taskId))
 
@@ -47,17 +48,18 @@ def submitJob(filenameToRun):
 
 numJobs = 0
 for seed in range(0, 1):
-    for taskIdx in range(0, 30):    
+    for taskIdx in range(0, 30):
         tempFilename = "submit.yml"
 
-        print("Creating job (" + str(numJobs) + "): Task: " + str(taskIdx) + " seed: " + str(seed))
+        #print("Creating job (" + str(numJobs) + "): Task: " + str(taskIdx) + " seed: " + str(seed))
         scriptStr = populateTemplate(taskIdx, seed)
-        writeTemplate(tempFilename, scriptStr)
-        submitJob(tempFilename)
+        #writeTemplate(tempFilename, scriptStr)
+        print(scriptStr)
+        #submitJob(tempFilename)
 
-        time.sleep(1)
+        #time.sleep(1)
         numJobs += 1
-        print("")
+        #print("")
         #print(populateTemplate(10, 2))
 
 print("Submitted " + str(numJobs) + " jobs.")
diff --git a/drrn/closeLeftoverEnvs.py b/drrn/closeLeftoverEnvs.py
diff --git a/drrn/drrn.py b/drrn/drrn.py
@@ -73,14 +73,17 @@ def build_state(self, obs, infos):
         """ Returns a state representation built from various info sources. """
         obs_ids = [self.sp.EncodeAsIds(o) for o in obs]
         # TextWorld
-        look_ids = [self.sp.EncodeAsIds(info['look']) for info in infos]
-        inv_ids = [self.sp.EncodeAsIds(info['inv']) for info in infos]
+        #look_ids = [self.sp.EncodeAsIds(info['look']) for info in infos]
+        #inv_ids = [self.sp.EncodeAsIds(info['inv']) for info in infos]
+        look_ids = [self.sp.EncodeAsIds(look) for look in infos['look']]
+        inv_ids = [self.sp.EncodeAsIds(inv) for inv in infos['inv']]
+
         # ScienceWorld
 
         #print("obs:")
         #print(obs)
         #print("infos:")
-        #print(infos)        
+        #print(infos)
         #look_ids = [self.sp.EncodeAsIds(info['look']) for info in infos]
         #inv_ids = [self.sp.EncodeAsIds(info['inv']) for info in infos]
 
@@ -146,11 +149,11 @@ def save(self, suffixStr=""):
         print("Saving agent to path: " + str(self.save_path))
         print("Started saving at: " + str(startTime))
         sys.stdout.flush()
-        
+
         # First, remove any old backups
         print("Removing old backups")
         sys.stdout.flush()
-        try: 
+        try:
             files = os.listdir(self.save_path + "/bak")
             for filename in files:
                 if (filename.startswith("memory")) or (filename.startswith("model") or (filename.startswith("progress") or (filename.startswith("log")))):
@@ -167,9 +170,9 @@ def save(self, suffixStr=""):
                 os.makedirs(self.save_path + "/bak", exist_ok=True)
                 files = os.listdir(self.save_path)
                 for filename in files:
-                    if filename.startswith("memory") or filename.startswith("model"):                
+                    if filename.startswith("memory") or filename.startswith("model"):
                         shutil.move(self.save_path + "/" + filename, self.save_path + "/bak/" + filename)
-                    if filename.startswith("progress") or filename.startswith("log"):                
+                    if filename.startswith("progress") or filename.startswith("log"):
                         shutil.copy(self.save_path + "/" + filename, self.save_path + "/bak/" + filename)
 
 
@@ -181,7 +184,7 @@ def save(self, suffixStr=""):
 
             self.lastSaveSuccessful = False
             with timeout(120):
-                print("Pickle")            
+                print("Pickle")
                 print("Length: " + str(len(self.memory)) )
                 sys.stdout.flush()
                 pickle.dump(self.memory, open(pjoin(self.save_path, "memory" + str(suffixStr) + ".pkl"), 'wb'))
@@ -195,7 +198,7 @@ def save(self, suffixStr=""):
             if (self.lastSaveSuccessful == False):
                 print("* Model failed to save (timeout).")
                 self.numSaveErrors += 1
-            
+
             print("Total number of save timeouts since running: " + str(self.numSaveErrors))
 
             sys.stdout.flush()