add citations and correct max_len with int

bytedance · Jan 25, 2025 · 3998653 · 3998653
1 parent 2da6bf8
commit 3998653
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -128,6 +128,7 @@ export MODEL_NAME=YOUR_MODEL_NAME
 export MODEL_DIR=YOUR_MODEL_DIR
 export TASK=YOUR_TASK
 export TEMP=YOUR_TEMP
+export MAX_TOKEN_LENGTH=YOUR_MAX_TOKEN_LENGTH # Replace with the maximum token length for the model
 export ALPHA=YOUR_ALPHA # the lower bound for high-quality trajectories
 export BETA=YOUR_BETA # The distinguishable gap
 
@@ -203,4 +204,10 @@ Their contributions to the open-source community have been invaluable and greatl
 ## Citation
 If you use this code in your research, please cite:
 ```bibtex
-```# Agent-R
+@article{yuan2025agent,
+  title={Agent-R: Training Language Model Agents to Reflect via Iterative Self-Training},
+  author={Yuan, Siyu and Chen, Zehui and Xi, Zhiheng and Ye, Junjie and Du, Zhengyin and Chen, Jiecao},
+  journal={arXiv preprint arXiv:2501.11425},
+  year={2025}
+}
+```
diff --git a/mcts_collection.py b/mcts_collection.py
@@ -61,7 +61,7 @@ def setup_conversation(env):
 def perform_mcts_search(Task, calling, env, conv, model_name, idx):
 
     recent_actions = []
-    mcts_search = ExtendedMCTS(calling=calling, max_len=os.environ["MAX_TOKEN_LENGTH"], model_name=model_name, env=env, idx=idx)
+    mcts_search = ExtendedMCTS(calling=calling, max_len=int(os.environ["MAX_TOKEN_LENGTH"]), model_name=model_name, env=env, idx=idx)
 
     mcts_search.search(env, conv, recent_actions)
     dir_path = f"mcts_result/{Task}/{model_name}"

diff --git a/path_collection.py b/path_collection.py
@@ -66,7 +66,7 @@ def revise_worst_path(calling, worst_path, best_path, task_description):
     for node in worst_path[1:]:
         # Generate the prompt for the verifier
         action_obs_prompt = '\n'.join(action_obs)
-        max_len = 7600
+        max_len = int(os.environ["MAX_TOKEN_LENGTH"])
         while len(action_obs_prompt.split()) > max_len - 60:
             action_obs_prompt = action_obs_prompt[6:]  # Truncate prompt if too long