diff --git a/backend/scripts/__init__.py b/backend/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/delete_old_data.py b/backend/scripts/delete_old_data.py similarity index 90% rename from backend/delete_old_data.py rename to backend/scripts/delete_old_data.py index 2a4e30a3..a7581068 100755 --- a/backend/delete_old_data.py +++ b/backend/scripts/delete_old_data.py @@ -1,4 +1,6 @@ import asyncio +import os +import sys from datetime import datetime from typing import Any @@ -6,6 +8,9 @@ load_dotenv(find_dotenv()) +# Add the parent directory to the Python path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + # flake8: noqa E402 from src.constants import API_VERSION diff --git a/backend/scripts/local.py b/backend/scripts/local.py new file mode 100644 index 00000000..38665478 --- /dev/null +++ b/backend/scripts/local.py @@ -0,0 +1,104 @@ +import argparse +import asyncio +import json +import os +import sys +from datetime import datetime + +# Add the parent directory to the Python path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +os.environ["LOCAL"] = "True" + +# flake8: noqa E402 + +from src.aggregation.layer0 import get_user_data +from src.processing.user import get_top_languages, get_top_repos +from src.processing.wrapped.package import get_wrapped_data + + +def parse_args(): + parser = argparse.ArgumentParser(description="GitHub Trends Script") + + parser.add_argument("--user_id", required=True, help="GitHub user ID", type=str) + parser.add_argument( + "--access_token", required=True, help="GitHub access token", type=str + ) + parser.add_argument( + "--start_date", + default="2023-01-01", + help="Start date in YYYY-MM-DD format", + type=str, + ) + parser.add_argument( + "--end_date", + default="2023-01-31", + help="End date in YYYY-MM-DD format", + type=str, + ) + parser.add_argument( + "--timezone", default="America/New_York", help="Timezone", type=str + ) + parser.add_argument( + "--output_dir", default="./", help="Output directory path", type=str + ) + + return parser.parse_args() + + +async def main(): + args = parse_args() + + start_date = datetime.strptime(args.start_date, "%Y-%m-%d") + end_date = datetime.strptime(args.end_date, "%Y-%m-%d") + + print("Local script running...") + print("User ID:", args.user_id) + print("Access token:", args.access_token) + print("Start date:", start_date) + print("End date:", end_date) + print("Timezone:", args.timezone) + print("Output directory:", args.output_dir) + print() + + raw_output = await get_user_data( + args.user_id, start_date, end_date, args.timezone, args.access_token + ) + + with open(os.path.join(args.output_dir, "raw.json"), "w") as f: + f.write(raw_output.model_dump_json(indent=2)) + + langs_output = get_top_languages( + raw_output, loc_metric="changed", include_private=True + ) + + langs_output = ( + [json.loads(x.model_dump_json()) for x in langs_output[0]], + langs_output[1], + ) + + repos_output = get_top_repos( + raw_output, loc_metric="changed", include_private=True, group="none" + ) + + repos_output = ( + [json.loads(x.model_dump_json()) for x in repos_output[0]], + repos_output[1], + ) + + with open(os.path.join(args.output_dir, "langs.json"), "w") as f: + f.write(json.dumps(langs_output, indent=2)) + + with open(os.path.join(args.output_dir, "repos.json"), "w") as f: + f.write(json.dumps(repos_output, indent=2)) + + wrapped_user = get_wrapped_data(raw_output, 2023) + + with open(os.path.join(args.output_dir, "wrapped.json"), "w") as f: + f.write(wrapped_user.model_dump_json(indent=2)) + + print("Wrote output to", args.output_dir) + + +if __name__ == "__main__": + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) diff --git a/backend/src/__init__.py b/backend/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/src/constants.py b/backend/src/constants.py index 534a0123..c9a2d94e 100644 --- a/backend/src/constants.py +++ b/backend/src/constants.py @@ -1,6 +1,7 @@ import os # GLOBAL +LOCAL = os.getenv("LOCAL", "False") == "True" PROD = os.getenv("PROD", "False") == "True" DOCKER = os.getenv("DOCKER", "False") == "True" PROJECT_ID = "github-334619" diff --git a/backend/src/data/mongo/main.py b/backend/src/data/mongo/main.py index f950fc99..d06b7087 100644 --- a/backend/src/data/mongo/main.py +++ b/backend/src/data/mongo/main.py @@ -1,18 +1,21 @@ +from motor.core import AgnosticCollection from motor.motor_asyncio import AsyncIOMotorClient -from src.constants import MONGODB_PASSWORD, PROD +from src.constants import LOCAL, MONGODB_PASSWORD, PROD def get_conn_str(password: str, database: str) -> str: return f"mongodb://root:{password}@backend-shard-00-00.aqlpb.mongodb.net:27017,backend-shard-00-01.aqlpb.mongodb.net:27017,backend-shard-00-02.aqlpb.mongodb.net:27017/{database}?ssl=true&replicaSet=atlas-25pkcv-shard-0&authSource=admin&retryWrites=true&w=majority" -if PROD: +if LOCAL: + DB = None +elif PROD: conn_str = get_conn_str(MONGODB_PASSWORD, "prod_backend") CLIENT = AsyncIOMotorClient( conn_str, serverSelectionTimeoutMS=5000, tlsInsecure=True ) - DB = CLIENT.prod_backend + DB = CLIENT.prod_backend # type: ignore else: conn_str = get_conn_str(MONGODB_PASSWORD, "dev_backend") CLIENT = AsyncIOMotorClient( # type: ignore @@ -20,7 +23,7 @@ def get_conn_str(password: str, database: str) -> str: ) DB = CLIENT.dev_backend # type: ignore -SECRETS = DB.secrets - -USERS = DB.users -USER_MONTHS = DB.user_months +# Overwrite type since only None if Local=True +SECRETS: AgnosticCollection = None if DB is None else DB.secrets # type: ignore +USERS: AgnosticCollection = None if DB is None else DB.users # type: ignore +USER_MONTHS: AgnosticCollection = None if DB is None else DB.user_months # type: ignore diff --git a/docs/FAQ.md b/docs/FAQ.md index 1a30b840..e382ea06 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -23,6 +23,16 @@ Alternatively, users can use the private workflow which creates a token with rea ``` +**Question**: How can I see my stats without giving GitHub Trends my access token? + +**Answer**: You will need to run the code locally. Clone the repository, navigate to the `backend` folder, install the dependencies (`pip install -r requirements.txt`), and then run the following script: + +```bash +python ./scripts/local.py --user_id=USER_ID --access_token=ACCESS_TOKEN --start_date=2023-01-01 --end_date=2023-01-31 --output_dir=OUTPUT_DIR +``` + +The script will output the raw and processed JSONs into the output directory specified. + **Question**: What if I find a bug, or want to contribute? **Answer**: Raise an [issue](https://github.com/avgupta456/github-trends/issues/new) or [pull request](https://github.com/avgupta456/github-trends/compare) through GitHub. I would be happy to discuss and implement any suggestions or improvements.