Skip to content

Commit

Permalink
Merge pull request #300 from larinam/pytest-introduction
Browse files Browse the repository at this point in the history
Introduce tests with pytest
  • Loading branch information
dartpain authored Aug 14, 2023
2 parents 9a393b4 + 85f9ae5 commit c3c7878
Show file tree
Hide file tree
Showing 23 changed files with 103 additions and 45 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Run python tests with pytest

on: [push, pull_request]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
cd application
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
run: |
python -m pytest
4 changes: 2 additions & 2 deletions application/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ FROM python:3.10-slim-bullseye
COPY --from=builder /usr/local/ /usr/local/

WORKDIR /app
COPY . /app
COPY . /app/application
ENV FLASK_APP=app.py
ENV FLASK_DEBUG=true

EXPOSE 7091

CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "wsgi:app"]
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
Empty file added application/__init__.py
Empty file.
20 changes: 11 additions & 9 deletions application/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
from pymongo import MongoClient
from werkzeug.utils import secure_filename

from core.settings import settings
from error import bad_request
from worker import ingest_worker
from application.core.settings import settings
from application.error import bad_request
from application.worker import ingest_worker
from bson.objectid import ObjectId

# os.environ["LANGCHAIN_HANDLER"] = "langchain"
Expand Down Expand Up @@ -68,19 +68,20 @@
dotenv.load_dotenv()

# load the prompts
with open("prompts/combine_prompt.txt", "r") as f:
current_dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:
template = f.read()

with open("prompts/combine_prompt_hist.txt", "r") as f:
with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f:
template_hist = f.read()

with open("prompts/question_prompt.txt", "r") as f:
with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f:
template_quest = f.read()

with open("prompts/chat_combine_prompt.txt", "r") as f:
with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f:
chat_combine_template = f.read()

with open("prompts/chat_reduce_prompt.txt", "r") as f:
with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
chat_reduce_template = f.read()

api_key_set = settings.API_KEY is not None
Expand All @@ -92,7 +93,7 @@
app.config["CELERY_RESULT_BACKEND"] = settings.CELERY_RESULT_BACKEND
app.config["MONGO_URI"] = settings.MONGO_URI
celery = Celery()
celery.config_from_object("celeryconfig")
celery.config_from_object("application.celeryconfig")
mongo = MongoClient(app.config["MONGO_URI"])
db = mongo["docsgpt"]
vectors_collection = db["vectors"]
Expand Down Expand Up @@ -129,6 +130,7 @@ def get_vectorstore(data):
vectorstore = ""
else:
vectorstore = ""
vectorstore = os.path.join("application", vectorstore)
return vectorstore


Expand Down
1 change: 1 addition & 0 deletions application/parser/file/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 1 addition & 1 deletion application/parser/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any, List

from langchain.docstore.document import Document as LCDocument
from parser.schema.base import Document
from application.parser.schema.base import Document


class BaseReader:
Expand Down
18 changes: 9 additions & 9 deletions application/parser/file/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
from pathlib import Path
from typing import Callable, Dict, List, Optional, Union

from parser.file.base import BaseReader
from parser.file.base_parser import BaseParser
from parser.file.docs_parser import DocxParser, PDFParser
from parser.file.epub_parser import EpubParser
from parser.file.html_parser import HTMLParser
from parser.file.markdown_parser import MarkdownParser
from parser.file.rst_parser import RstParser
from parser.file.tabular_parser import PandasCSVParser
from parser.schema.base import Document
from application.parser.file.base import BaseReader
from application.parser.file.base_parser import BaseParser
from application.parser.file.docs_parser import DocxParser, PDFParser
from application.parser.file.epub_parser import EpubParser
from application.parser.file.html_parser import HTMLParser
from application.parser.file.markdown_parser import MarkdownParser
from application.parser.file.rst_parser import RstParser
from application.parser.file.tabular_parser import PandasCSVParser
from application.parser.schema.base import Document

DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".pdf": PDFParser(),
Expand Down
2 changes: 1 addition & 1 deletion application/parser/file/docs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import Dict

from parser.file.base_parser import BaseParser
from application.parser.file.base_parser import BaseParser


class PDFParser(BaseParser):
Expand Down
2 changes: 1 addition & 1 deletion application/parser/file/epub_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import Dict

from parser.file.base_parser import BaseParser
from application.parser.file.base_parser import BaseParser


class EpubParser(BaseParser):
Expand Down
2 changes: 1 addition & 1 deletion application/parser/file/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
from typing import Dict, Union

from parser.file.base_parser import BaseParser
from application.parser.file.base_parser import BaseParser


class HTMLParser(BaseParser):
Expand Down
2 changes: 1 addition & 1 deletion application/parser/file/markdown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import tiktoken
from parser.file.base_parser import BaseParser
from application.parser.file.base_parser import BaseParser


class MarkdownParser(BaseParser):
Expand Down
2 changes: 1 addition & 1 deletion application/parser/file/rst_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

from parser.file.base_parser import BaseParser
from application.parser.file.base_parser import BaseParser


class RstParser(BaseParser):
Expand Down
2 changes: 1 addition & 1 deletion application/parser/file/tabular_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import Any, Dict, List, Union

from parser.file.base_parser import BaseParser
from application.parser.file.base_parser import BaseParser


class CSVParser(BaseParser):
Expand Down
1 change: 1 addition & 0 deletions application/parser/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 1 addition & 1 deletion application/parser/schema/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass

from langchain.docstore.document import Document as LCDocument
from parser.schema.schema import BaseDocument
from application.parser.schema.schema import BaseDocument


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion application/parser/token_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List

import tiktoken
from parser.schema.base import Document
from application.parser.schema.base import Document


def separate_header_and_body(text):
Expand Down
1 change: 1 addition & 0 deletions application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ pymongo==4.3.3
pyowm==3.3.0
PyPDF2==3.0.1
PySocks==1.7.1
pytest
python-dateutil==2.8.2
python-dotenv==1.0.0
python-jose==3.3.0
Expand Down
10 changes: 5 additions & 5 deletions application/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import nltk
import requests

from core.settings import settings
from parser.file.bulk import SimpleDirectoryReader
from parser.open_ai_func import call_openai_api
from parser.schema.base import Document
from parser.token_func import group_split
from application.core.settings import settings
from application.parser.file.bulk import SimpleDirectoryReader
from application.parser.open_ai_func import call_openai_api
from application.parser.schema.base import Document
from application.parser.token_func import group_split

try:
nltk.download('punkt', quiet=True)
Expand Down
2 changes: 1 addition & 1 deletion application/wsgi.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from app import app
from application.app import app

if __name__ == "__main__":
app.run(debug=True, port=7091)
8 changes: 4 additions & 4 deletions docker-compose-azure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ services:
ports:
- "7091:7091"
volumes:
- ./application/indexes:/app/indexes
- ./application/inputs:/app/inputs
- ./application/vectors:/app/vectors
- ./application/indexes:/app/application/indexes
- ./application/inputs:/app/application/inputs
- ./application/vectors:/app/application/vectors
depends_on:
- redis
- mongo

worker:
build: ./application
command: celery -A app.celery worker -l INFO
command: celery -A application.app.celery worker -l INFO
environment:
- API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY
Expand Down
8 changes: 4 additions & 4 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ services:
ports:
- "7091:7091"
volumes:
- ./application/indexes:/app/indexes
- ./application/inputs:/app/inputs
- ./application/vectors:/app/vectors
- ./application/indexes:/app/application/indexes
- ./application/inputs:/app/application/inputs
- ./application/vectors:/app/application/vectors
depends_on:
- redis
- mongo

worker:
build: ./application
command: celery -A app.celery worker -l INFO
command: celery -A application.app.celery worker -l INFO
environment:
- API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY
Expand Down
2 changes: 0 additions & 2 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ tenacity==8.2.2
threadpoolctl==3.2.0
tiktoken==0.4.0
tokenizers==0.13.3
torch==2.0.1
torchvision==0.15.2
tqdm==4.65.0
transformers==4.31.0
typer==0.9.0
Expand Down
28 changes: 28 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from application.app import get_vectorstore
import os


# Test cases for get_vectorstore function
def test_no_active_docs():
data = {}
assert get_vectorstore(data) == os.path.join("application", "")


def test_local_default_active_docs():
data = {"active_docs": "local/default"}
assert get_vectorstore(data) == os.path.join("application", "")


def test_local_non_default_active_docs():
data = {"active_docs": "local/something"}
assert get_vectorstore(data) == os.path.join("application", "indexes/local/something")


def test_default_active_docs():
data = {"active_docs": "default"}
assert get_vectorstore(data) == os.path.join("application", "")


def test_complex_active_docs():
data = {"active_docs": "local/other/path"}
assert get_vectorstore(data) == os.path.join("application", "indexes/local/other/path")

1 comment on commit c3c7878

@vercel
Copy link

@vercel vercel bot commented on c3c7878 Aug 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

docs-gpt – ./

docs-gpt-brown.vercel.app
docs-gpt-git-main-arc53.vercel.app
docs-gpt-arc53.vercel.app

Please sign in to comment.