diff --git a/.gitignore b/.gitignore
index 222d4b8..3188c83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -89,4 +89,9 @@ target/
.mypy_cache/
rewards-api/src/__pycache__
-rewards-api/__pycache__
\ No newline at end of file
+rewards-api/__pycache__
+tmp.py
+flaskApp.py
+old_code
+saved_models
+temp.py
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index d240184..8923de6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,5 +19,4 @@ RUN mkdir rewards-api
COPY ./rewards-api rewards-api
WORKDIR "/rewards-api"
EXPOSE 8900
-ENTRYPOINT [ "uvicorn", "main:app"]
-CMD [ "--reload" ]
\ No newline at end of file
+CMD ["python", "main.py"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 945e8b2..ab3807c 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,23 @@
-### **Rewards API**
+### **Rewards API**
-**rewards API** is a `REST` API where creating experiments, integrating environments and building agents are just some sets of CRUD operations and API calls. Our API internally uses [**rewards-sdk**](https://github.com/rewards-ai/rewards-SDK).
+**rewards API** is a `REST` API where creating experiments, integrating environments and building agents are just some sets of CRUD operations and API calls. Our API internally uses [**rewards-sdk**](https://github.com/rewards-ai/rewards-SDK).
-#### **How to run the project**
+#### **How to run the project**
First clone the project by running:
+
```bash
git clone https://github.com/rewards-ai/rewards-api.git
```
-`rewards-api` runs on `fastapi`. So install fastapi by running:
+`rewards-api` runs on `flask`. So install fastapi by running:
```bash
-pip install fastapi
+pip install flask
```
Now for installing additional dependencies just run:
@@ -28,15 +29,14 @@ pip install -r requirements.txt
Now go to the `rewards-api` directory and run:
```
-uvicorn main:app --reload
+python main.py
```
-This will open the the link `http://127.0.0.1:8000` and then go to `http://127.0.0.1:8000/docs/`. There you will find all the endpoints with it's instructions and curl commands after running each.
-
+This will open the the link `http://127.0.0.1:8000` and then go to `http://127.0.0.1:8000/docs/`. There you will find all the endpoints with it's instructions and curl commands after running each.
TODO:
- [ ] Logging functionality
-- [ ] Support for custom exceptions
+- [ ] Support for custom exceptions
- [X] Supporting streaming the game screen
-- [X] Integration with the frontend
\ No newline at end of file
+- [X] Integration with the frontend
diff --git a/main.py b/main.py
index c11e72a..b26d7e8 100644
--- a/main.py
+++ b/main.py
@@ -1,75 +1,41 @@
-import os
-import json
-import pygame
-from fastapi.logger import logger
-from fastapi import FastAPI, Request
-from fastapi.responses import StreamingResponse
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.exceptions import RequestValidationError
-
-# configs and import from other modules
-
-from src.config import CONFIG
-from src.schemas import (
- AgentConfiguration,
- TrainingConfigurations,
- EnvironmentConfigurations,
- RewardFunction
-)
-from src.exceptions import (
- validation_exception_handler,
- python_exception_handler
-)
-
-import src.utils as utils
-from src.streamer import RewardsStreamer
-
-
-# TODO:
-# -----
-# Add a response model for returning all the configuration in structured format
-# Also add a error response model
-
-
-app = FastAPI(
- title="RewardsAI API for interacting with rewards-platform",
- version="1.0.0",
- description="""
- rewards-api is the easy to use API for interacting with agents and environments.
- It enables users to easily create experiments and manage each of the experiments
- by changing different types of parameters and reward function and also pushing the
- model to other location while competing.
- """
-)
-
-app.add_middleware(
- CORSMiddleware, allow_origins=["*"],
- allow_credentials=True, allow_methods=["*"],
- allow_headers=["*"],
-)
-app.add_exception_handler(RequestValidationError, validation_exception_handler)
-app.add_exception_handler(Exception, python_exception_handler)
-
-@app.on_event('startup')
-async def startup_event():
+from rewards import QTrainer, LinearQNet, CarGame
+from werkzeug.exceptions import HTTPException
+from flask import Flask, Response, request, Request
+from werkzeug.exceptions import NotFound
+import matplotlib.pyplot as plt
+from src.config import CONFIG
+from flask_cors import CORS
+import src.utils as utils
+from flasgger import Swagger
+import numpy as np
+import json
+import cv2
+import os
+
+app = Flask(__name__)
+CORS(app)
+Swagger(app)
+
+@app.before_first_request
+def startup_event():
utils.create_folder_struct()
- logger.info("Folder Created")
- logger.info("Starting up")
-
+ app.logger.info("Folder Created")
+ app.logger.info("Starting up")
-@app.post('/api/v1/create_session/{session_id}')
-def create_new_session(session_id : str):
+@app.post('/api/v1/create_session')
+def create_new_session():
"""
- This create a new session in the rewards-platform where
- user can now initialize with different environment, agent configuration.
-
- Args:
-
- - `session_id (str)`:The session ID is a unique string with a format of __.
- In each of the session, a unique type of model can be made (which will be remain unchanged)
- Howevar during a session some parameters including environment, agent and some training
- can be changed.
+ Create a new session in the rewards-platform where user can now initialize with different environment, agent configuration.
+
+ ---
+ parameters:
+ - name: session_id
+ in: query
+ type: string
+ required: true
+ description: The session ID is a unique string. In each of the session, a unique type of model can be made (which will remain unchanged). However, during a session, some parameters including environment, agent, and some training can be changed.
"""
+ session_id = request.args.get("session_id")
utils.create_session(session_name=session_id)
return {
'status' : 200,
@@ -77,35 +43,38 @@ def create_new_session(session_id : str):
}
-@app.post('/api/v1/delete_session/{session_id}')
-def delete_session(session_id : str):
+@app.post('/api/v1/delete_session')
+def delete_session():
"""
Deletes an existing session. Mainly done when there is no need of that session.
"""
+ session_id = request.args.get("session_id")
return utils.delete_session(session_id = session_id)
@app.post('/api/v1/write_env_params')
-def push_env_parameters(request : Request, body : EnvironmentConfigurations):
+def push_env_parameters():
"""
Create and save environment parameters for the given environment.
- List of environment parameters:
+ List of environment ---
+ parameters:
- environment_name : The name of the environment defaults to 'car-race'
- environment_world : The environment map to choose options : 0/1/2
- mode : training/validation mode
- car_speed : the speed of the car (agent)
- Args:
+ ---
+ parameters:
- - `request (Request)`: Incoming request headers
+ - request (Request): Incoming request headers
- `body (EnvironmentConfigurations)`: Request body
"""
- print(body)
+ body = request.json
utils.add_inside_session(
- session_id = body.session_id, config_name="env_params",
- environment_name = body.environment_name,
- environment_world = body.environment_world,
- mode = body.mode,
- car_speed = body.car_speed
+ session_id = body["session_id"], config_name="env_params",
+ environment_name = body["environment_name"],
+ environment_world = body["environment_world"],
+ mode = body["mode"],
+ car_speed = body["car_speed"]
)
return {
@@ -115,43 +84,47 @@ def push_env_parameters(request : Request, body : EnvironmentConfigurations):
@app.post("/api/v1/write_agent_params")
-def push_agent_parameters(request : Request, body : AgentConfiguration):
+def push_agent_parameters():
"""
Create and save agent parameters for the given session
- List of the agent parameters:
+ List of the agent ---
+ parameters:
- model_configuration : example: '[[5, 128], [128, 64], [64, 3]]'
- learning_rate : example : 0.01
- loss_fn : example : mse
- optimizer : example : adam
- num_episodes : The number of episodes to train the agent. example : 100
- Args:
+ ---
+ parameters:
- - `request (Request)`: Incoming request headers
- - `body (EnvironmentConfigurations)`: Request body
+ - request (Request): Incoming request headers
+ - body (EnvironmentConfigurations): Request body
"""
+ body = request.json
utils.add_inside_session(
- session_id=body.session_id, config_name="agent_params",
- model_configuration = body.model_configuration,
- learning_rate = body.learning_rate,
- loss_fn = body.loss_fn,
- optimizer = body.optimizer,
- gamma = body.gamma,
- epsilon = body.epsilon,
- num_episodes = body.num_episodes
+ session_id=body["session_id"], config_name="agent_params",
+ model_configuration = body["model_configuration"],
+ learning_rate = body["learning_rate"],
+ loss_fn = body["loss_fn"],
+ optimizer = body["optimizer"],
+ gamma = body["gamma"],
+ epsilon = body["epsilon"],
+ num_episodes = body["num_episodes"]
)
return {
'status' : 200,
'response' : 'Agent configurations saved sucessfully',
- 'test': body.model_configuration
+ 'test': body["model_configuration"]
}
@app.post("/api/v1/write_training_params")
-def push_training_parameters(request : Request, body : TrainingConfigurations):
+def push_training_parameters():
"""
Create and save training parameters for the given session
- List of the training parameters:
+ List of the training ---
+ parameters:
- learning_algorithm : example : 0.01
- enable_wandb : example : mse
- reward_function : example : Callable a reward function looks like this:
@@ -171,16 +144,18 @@ def reward_func(props):
return reward
```
- Args:
+ ---
+ parameters:
- - `request (Request)`: Incoming request headers
- - `body (EnvironmentConfigurations)`: Request body
+ - request (Request): Incoming request headers
+ - body (EnvironmentConfigurations): Request body
"""
+ body = request.json
utils.add_inside_session(
- session_id=body.session_id, config_name = "training_params",
- learning_algorithm = body.learning_algorithm,
- enable_wandb = body.enable_wandb == 1,
- reward_function = body.reward_function
+ session_id=body["session_id"], config_name = "training_params",
+ learning_algorithm = body["learning_algorithm"],
+ enable_wandb = body["enable_wandb"] == 1,
+ reward_function = body["reward_function"]
)
return {
@@ -190,32 +165,36 @@ def reward_func(props):
@app.post("/api/v1/write_reward_fn")
-def write_reward_function(request : Request, body : RewardFunction):
+def write_reward_function():
"""
Rewriting the reward function during the time of experimentation
- Args:
+ ---
+ parameters:
- - `request (Request)`: Incoming request headers
- - `body (EnvironmentConfigurations)`: Request body
+ - request (Request): Incoming request headers
+ - body (EnvironmentConfigurations): Request body
"""
+ body = request.json
utils.add_inside_session(
- session_id=body.session_id, config_name="training_params",
+ session_id=body["session_id"], config_name="training_params",
rewrite=True,
- reward_function = body.reward_function
+ reward_function = body["reward_function"]
)
-@app.get('/api/v1/get_all_params/{session_id}')
-async def get_all_parameters(session_id : str):
+@app.get('/api/v1/get_all_params')
+async def get_all_parameters():
"""
Listing all the parameters (environment, agent and training) parameters
as one single json response.
- Args:
+ ---
+ parameters:
- - `session_id (str)`: The session ID which was used in the start.
+ - session_id (str): The session ID which was used in the start.
"""
+ session_id = request.args.get("session_id")
file_response = utils.get_session_files(session_id)
file_response['status'] = 200
return file_response
@@ -231,45 +210,6 @@ def get_all_sessions():
"""
return utils.get_all_sessions_info()
-# make streamer as the generator
-# make this endpoint as the client
-# so it will be back and forth connections between the client and the server
-
-
-
-@app.get('/api/v1/start_training/{session_id}')
-async def start_training(session_id : str):
- """/start_training is the endpoint to start training the agent
- These are the sets of events that will happen during this session while triggering this endpoint
-
- - Validation of all the parameters (TODO)
- - Loading the model and the agent
- - Start loading the game and streaming the results
-
- Args:
- session_id (str): The session.
- Using this session id we can train any of the experiment
- """
- rewards_response = utils.get_session_files(session_id)
- streamer = RewardsStreamer(session_id = session_id, response = rewards_response)
- return StreamingResponse(streamer.stream_episode())
-
-
-
-@app.get('/api/v1/stop_training/')
-def stop_training():
- # NOTE: (TODO)
- # Stop training does not work for now.
- # One main reason is to make it into a different threading. This might introduce
- # more bugs and problems. One can stop training by just clicking the cross button.
- # Howevar it will automatically close once episode gets finished
-
- pygame.quit()
- return {
- "status" : 200,
- "message" : "Stopped training successfully"
- }
-
@app.post("/api/v1/validate_exp")
def validate_latest_expriment():
@@ -289,7 +229,7 @@ def validate_latest_expriment():
@app.post("/api/v1/push_model")
-def push_model(model_name : str):
+def push_model():
# In the frontend we will show the list of available model agents and their infos like
# How much they were trained
# their total reward
@@ -311,3 +251,129 @@ def get_all_envs():
@app.post("/api/v1/get_all_tracks")
def get_all_tracks():
return utils.get_all_tracks("car-racer")
+
+def enableStreaming():
+ global stop_streaming
+ stop_streaming = False
+
+def convert_str_func_to_exec(str_function: str, function_name: str):
+ globals_dict = {}
+ exec(str_function, globals_dict)
+ new_func = globals_dict[function_name]
+ return new_func
+
+def generate(session_id):
+ r = utils.get_session_files(session_id)
+ print(session_id)
+ print("check" ,session_id)
+ record = 0
+ done = False
+ enableStreaming()
+
+ # environment parameters
+ env_name = r["env_params"]["environment_name"]
+ env_world = int(r["env_params"]["environment_world"])
+ mode = r["env_params"]["mode"]
+ car_speed = r["env_params"]["car_speed"]
+
+ # agent parameters
+ layer_config = eval(r["agent_params"]["model_configuration"])
+ if type(layer_config) == str:
+ layer_config = eval(layer_config)
+
+ lr = r["agent_params"]["learning_rate"]
+ loss = r["agent_params"]["loss_fn"]
+ optimizer = r["agent_params"]["optimizer"]
+ gamma = r["agent_params"]["gamma"]
+ epsilon = r["agent_params"]['epsilon']
+ num_episodes = r["agent_params"]["num_episodes"]
+
+ reward_function = r["training_params"]["reward_function"]
+
+ global lock
+
+ checkpoint_folder_path = os.path.join(
+ utils.get_home_path(),
+ CONFIG["REWARDS_PARENT_CONFIG_DIR"],
+ f"{session_id}/{CONFIG['REWARDS_CONFIG_MODEL_FOLDER_NAME']}/"
+ )
+
+ model = LinearQNet(layer_config)
+
+ agent = QTrainer(
+ lr = lr,
+ gamma = gamma,
+ epsilon = epsilon,
+ model = model,
+ loss = loss,
+ optimizer = optimizer,
+ checkpoint_folder_path = checkpoint_folder_path,
+ model_name = "model.pth"
+ )
+
+ game = CarGame(
+ track_num=env_world,
+ mode = mode,
+ reward_function=convert_str_func_to_exec(
+ reward_function,
+ function_name="reward_function"
+ ),
+ display_type="surface",
+ screen_size=(800, 700)
+ )
+ game.FPS = car_speed
+
+ record = 0
+ plot_scores = []
+ plot_mean_scores = []
+ total_score = 0
+ done = False
+
+ while True:
+ global stop_streaming
+ if stop_streaming or agent.n_games == num_episodes:
+ return {"status": 204}
+ reward, done, score, pix = agent.train_step(game)
+ game.timeTicking()
+
+ if done:
+ game.initialize()
+ agent.n_games += 1
+ agent.train_long_memory()
+ if score > record:
+ record = score
+ agent.model.save(
+ checkpoint_folder_path,
+ 'model.pth',
+ device = "cpu"
+ )
+ print('Game', agent.n_games, 'Score', score, 'Record:', record)
+ plot_scores.append(score)
+ total_score += score
+ mean_score = total_score / agent.n_games
+ plot_mean_scores.append(mean_score)
+ utils.update_graphing_file(session_id, {"plot_scores": plot_scores, "plot_mean_scores": plot_mean_scores})
+ img = np.fliplr(pix)
+ img = np.rot90(img)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ img = cv2.imencode(".png", img)[1]
+ yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + bytearray(img) + b'\r\n')
+
+@app.route('/api/v1/stream', methods = ['GET'])
+def stream():
+ session_id = request.args.get('session_id')
+ print(session_id)
+ return Response(generate(session_id), mimetype = "multipart/x-mixed-replace; boundary=frame")
+
+@app.route('/api/v1/stop')
+def stop():
+ global stop_streaming
+ stop_streaming = True
+ return {"status": 204}
+
+if __name__ == '__main__':
+ host = "127.0.0.1"
+ port = 8000
+ debug = True
+ options = None
+ app.run(host, port, debug, options)
diff --git a/requirements.txt b/requirements.txt
index 5c0ba61..5b4a9b4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,8 +2,8 @@ torch
pandas
numpy
scikit-learn
-fastapi==0.88.0
+Flask==2.2.3
+Flask-Cors==3.0.10
rewards
-wandb
matplotlib
pygame
\ No newline at end of file
diff --git a/src/exceptions.py b/src/exceptions.py
deleted file mode 100644
index b1ed5ff..0000000
--- a/src/exceptions.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import json
-import traceback
-
-from fastapi.logger import logger
-from fastapi import Request, status
-from fastapi.responses import JSONResponse
-from fastapi.exceptions import RequestValidationError
-
-from src.config import CONFIG
-
-# TODO: Also put a custom error handler message and
-# also a logging function that can log all the exceptions
-
-
-def get_error_response(request, exc) -> dict:
- """Generic error handling function
-
- Args:
- request (Request): Incoming request
- exc (_type_): execution
-
- Returns:
- dict: Error response
- """
- error_ressonse = {"error": True, "message": str(exc)}
-
- if CONFIG["DEBUG"]:
- error_ressonse["traceback"] = "".join(
- traceback.format_exception(type(exc), value=exc, tb=exc.__traceback__)
- )
- return error_ressonse
-
-
-async def validation_exception_handler(request: Request, exc: RequestValidationError):
- """
- Handling error in validating requests
- """
- return JSONResponse(
- status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, content=get_error_response(request, exc)
- )
-
-
-async def python_exception_handler(request: Request, exc: Exception):
- """
- Handling any internal error
- """
-
- # Log requester infomation
- logger.error(
- "Request info:\n"
- + json.dumps(
- {
- "host": request.client.host,
- "method": request.method,
- "url": str(request.url),
- "headers": str(request.headers),
- "path_params": str(request.path_params),
- "query_params": str(request.query_params),
- "cookies": str(request.cookies),
- },
- indent=4,
- )
- )
-
- return JSONResponse(
- status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=get_error_response(request, exc)
- )
diff --git a/src/socket_server.py b/src/socket_server.py
deleted file mode 100644
index af711d8..0000000
--- a/src/socket_server.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import asyncio
-import websockets
-
-import src.utils as utils
-from src.config import CONFIG
-from src.streamer import RewardsStreamer
-
-class WebsocketStreamingServer:
- def __init__(self, host, port, secret):
- self._secret = secret
-
- async def _start(self, websocket, endpoint):
- print(f"=> Connected to {endpoint}")
- secret = await websocket.recv()
- if secret == self._secret :
- ...
\ No newline at end of file
diff --git a/src/streamer.py b/src/streamer.py
deleted file mode 100644
index f93ea76..0000000
--- a/src/streamer.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import os
-import cv2
-import ast
-import sys
-import time
-import json
-import pygame
-import asyncio
-import multiprocessing as mp
-from typing import Dict, Any
-
-import src.utils as utils
-from src.config import CONFIG
-
-from rewards import workflow, QTrainer, LinearQNet, CarGame
-
-from fastapi.responses import JSONResponse
-from fastapi.encoders import jsonable_encoder
-
-class RewardsStreamer:
- def __init__(self, session_id, response: Dict[str, Any]) -> None:
- """
- RewardsStreamer is the class which is responsible for streaming, saving
- metrics and also streaming frames over the network.
- """
- self.session_id = session_id
- self.enable_wandb = False
- self.device = "cpu"
-
- # environment parameters
- self.env_name = response["env_params"]["environment_name"]
- self.env_world = int(response["env_params"]["environment_world"])
- self.mode = response["env_params"]["mode"]
- self.car_speed = response["env_params"]["car_speed"]
-
- # agent parameters
- self.layer_config = ast.literal_eval(response["agent_params"]["model_configuration"])
- self.lr = response["agent_params"]["learning_rate"]
- self.loss = response["agent_params"]["loss_fn"]
- self.optimizer = response["agent_params"]["optimizer"]
- self.gamma = response["agent_params"]["gamma"]
- self.epsilon = response["agent_params"]['epsilon']
- self.num_episodes = response["agent_params"]["num_episodes"]
- self.checkpoint_folder_path = os.path.join(
- utils.get_home_path(),
- CONFIG["REWARDS_PARENT_CONFIG_DIR"],
- f"{session_id}/{CONFIG['REWARDS_CONFIG_MODEL_FOLDER_NAME']}/"
- )
-
-
- # reward function
- self.reward_function = response["training_params"]["reward_function"]
-
- # make the model
- self.model = LinearQNet(self.layer_config)
-
- # make the agent
- self.agent = QTrainer(
- lr = self.lr,
- gamma = self.gamma,
- epsilon = self.epsilon,
- model = self.model,
- loss = self.loss,
- optimizer = self.optimizer,
- checkpoint_folder_path = self.checkpoint_folder_path,
- model_name = "model.pth"
- )
-
- # build the screen and the game
- self.game = CarGame(
- track_num=self.env_world,
- mode = self.mode,
- reward_function=self._convert_str_func_to_exec(
- self.reward_function,
- function_name="reward_function"
- ),
- display_type="surface",
- screen_size=(800, 700) if self.mode == "training" else (1000, 700)
- )
-
- def _convert_str_func_to_exec(self, str_function: str, function_name: str):
- """Converts a string like function skeleton to a Callable
-
- Args:
- str_function (str): The string function skeleton
- function_name (str): The name of the function
-
- Returns:
- Callable: Actual callable function of type <'function'>
- """
- globals_dict = {}
- exec(str_function, globals_dict)
- new_func = globals_dict[function_name]
- return new_func
-
- def stream_episode(self, yield_response: bool = False):
- for episode in range(1, self.num_episodes + 1):
- self.game.initialize()
- self.game.FPS = self.car_speed
- total_score, record = 0, 0
- done = False
-
-
- while not done:
- _, done, score, pixel_data = self.agent.train_step(self.game)
- self.game.timeTicking()
-
- self.agent.n_games += 1
- self.agent.train_long_memory()
-
- if score > record:
- self.agent.model.save(
- self.checkpoint_folder_path,
- 'model.pth',
- device = self.device
- )
- total_score += score
-
- # stream all the metrics to a file
- utils.add_inside_session(
- self.session_id,
- config_name="metrics",
- rewrite=True, multi_config=True,
- episode_number = episode,
- episode_score = score,
- mean_score = total_score / self.agent.n_games
- )
-
- response = {
- 'episode_number' : episode,
- 'episode_score' : score,
- 'episode_mean_score' : total_score / self.agent.n_games
- }
-
- yield json.dumps(response) + "\n"
\ No newline at end of file
diff --git a/src/utils.py b/src/utils.py
index c10e96a..f485d71 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -1,17 +1,18 @@
-import os
-import json
-import shutil
-from typing import Optional, Union, Any, Dict
-
-# rewards package
+from typing import Optional, Union, Any, Dict
from src.config import CONFIG
-from rewards import workflow, Agent
+import shutil
+import json
+import os
def get_home_path():
# get the home directory using os
return os.path.expanduser("~")
+def update_graphing_file(session_id: str, data: dict, dir: Optional[str] = None, name: Optional[str] = None) -> None:
+ f = open("D:/Prototypes/rewards.ai/training-platform\src/assets/temp.json", "w")
+ f.write(json.dumps(data))
+
def create_folder_struct(dir: Optional[str] = None, name: Optional[str] = None) -> None:
"""Creates a root folder to store all the session configuration