Visit Individual Folders for Demo Videos!
- Project Inspired by OpenAI's "Emergent Tool Use from Multi-Agent Autocurricula" Link Here
env = gym.make(
"LunarLander-v2",
continuous = True,
gravity = -10.0,
render_mode = None
)
agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4)
np.random.seed(0)
score_history = []
for i in range(1000):
done = False
score = 0
obs, _ = env.reset()
while not done:
print(obs.shape)
act = agent.choose_action(obs)
new_state, reward, terminated, truncated, info = env.step(act)
done = terminated or truncated
agent.remember(obs, act, reward, new_state, int(done))
agent.learn()
score += reward
obs = new_state
score_history.append(score)
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
if i % 25 == 0:
agent.save_models()
![image](https://private-user-images.githubusercontent.com/91924667/289270025-ab4ceeb9-465e-4c67-8938-f837e117de87.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzg5NjI1NTgsIm5iZiI6MTczODk2MjI1OCwicGF0aCI6Ii85MTkyNDY2Ny8yODkyNzAwMjUtYWI0Y2VlYjktNDY1ZS00YzY3LTg5MzgtZjgzN2UxMTdkZTg3LnBuZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjA3VDIxMDQxOFomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTg1NTY0M2IxM2YwNzhjMGJhMjg3ZDY2ZTczMjUxYjdmYjIyOTcxMjE0ZTk0MTdhOWJjMDNiMmNhOTgzZTFhOWYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.77KXaytwhRHzYuJziC10KatQKAuCe7schhqegCElu1o)