Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(ci) fix integrationtest vectorsearch #981

Merged
merged 8 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 65 additions & 15 deletions .github/workflows/integration-cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,58 @@ concurrency:
cancel-in-progress: false

jobs:
setup-environment:
runs-on: ubuntu-latest
outputs:
python-cache-key: ${{ steps.cache-python.outputs.cache-hit }}
steps:
- uses: actions/checkout@v4

- name: Set up Python
id: setup-python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Cache dependencies
id: cache-python
uses: actions/cache@v3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
uses: actions/cache@v3
uses: actions/cache@v4

with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}

- name: Install dependencies
if: steps.cache-python.outputs.cache-hit != 'true'
run: |
pip install -e .[dev]

- name: Upload Python environment
uses: actions/upload-artifact@v3
with:
name: python-environment
path: ~/.cache/pip
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why you would use this instead of simply rely on actions/cache for restoring as well?


integration-cloud:
runs-on: ubuntu-latest
needs: setup-environment
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
cache: "pip"
cache-dependency-path: |
pyproject.toml
python-version: "3.10"

- name: Download Python environment
uses: actions/download-artifact@v3
with:
name: python-environment
path: ~/.cache/pip
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use actions/cache + restore-key instead?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, outdated version:

Suggested change
- name: Download Python environment
uses: actions/download-artifact@v3
with:
name: python-environment
path: ~/.cache/pip
- name: Download Python environment
uses: actions/download-artifact@v4
with:
name: python-environment
path: ~/.cache/pip


- name: Install dependencies
run: |
pip install -e .[dev]

- name: Run integration tests
env:
VESPA_TEAM_API_KEY: ${{ secrets.VESPA_TEAM_API_KEY }}
Expand All @@ -35,42 +73,54 @@ jobs:

integration-cloud-token:
runs-on: ubuntu-latest
needs: integration-cloud
needs: setup-environment
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
cache: "pip"
cache-dependency-path: |
pyproject.toml
python-version: "3.10"

- name: Download Python environment
uses: actions/download-artifact@v3
with:
name: python-environment
path: ~/.cache/pip

- name: Install dependencies
run: |
pip install -e .[dev]

- name: Run integration tests
env:
VESPA_TEAM_API_KEY: ${{ secrets.VESPA_TEAM_API_KEY }}
VESPA_CLOUD_SECRET_TOKEN: ${{ secrets.VESPA_CLOUD_SECRET_TOKEN }}
VESPA_CLIENT_TOKEN_ID: ${{ secrets.VESPA_CLIENT_TOKEN_ID}}
VESPA_CLIENT_TOKEN_ID: ${{ secrets.VESPA_CLIENT_TOKEN_ID }}
run: |
pytest tests/integration/test_integration_vespa_cloud_token.py -s -v

integration-cloud-vector-search:
runs-on: ubuntu-latest
needs: integration-cloud-token
needs: setup-environment
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
cache: "pip"
cache-dependency-path: |
pyproject.toml
python-version: "3.10"

- name: Download Python environment
uses: actions/download-artifact@v3
with:
name: python-environment
path: ~/.cache/pip

- name: Install dependencies
run: |
pip install -e .[dev]

- name: Run integration tests
env:
VESPA_TEAM_API_KEY: ${{ secrets.VESPA_TEAM_API_KEY }}
Expand Down
68 changes: 32 additions & 36 deletions tests/integration/test_integration_vespa_cloud_vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
ContainerCluster,
Nodes,
DeploymentConfiguration,
EmptyDeploymentConfiguration,
Validation,
ValidationID,
)
Expand Down Expand Up @@ -93,7 +92,7 @@ def test_vector_indexing_and_query(self):

from datasets import load_dataset

sample_size = 1000
sample_size = 100
# streaming=True pages the data from S3. This is needed to avoid memory issues when loading the dataset.
dataset = load_dataset(
"KShivendu/dbpedia-entities-openai-1M", split="train", streaming=True
Expand Down Expand Up @@ -164,9 +163,7 @@ def callback(response: VespaResponse, id: str):
ok = 0
callbacks = 0
start_time = time.time()
dataset = load_dataset(
"KShivendu/dbpedia-entities-openai-1M", split="train", streaming=True
).take(100)

feed_with_wrong_field = dataset.map(
lambda x: {
"id": x["_id"],
Expand All @@ -186,9 +183,7 @@ def callback(response: VespaResponse, id: str):
self.assertEqual(callbacks, 100)

ok = 0
dataset = load_dataset(
"KShivendu/dbpedia-entities-openai-1M", split="train", streaming=True
).take(sample_size)

# Run update - assign all docs with a meta field

updates = dataset.map(lambda x: {"id": x["_id"], "fields": {"meta": "stuff"}})
Expand Down Expand Up @@ -239,7 +234,7 @@ def tearDown(self) -> None:


class TestProdDeploymentFromDisk(unittest.TestCase):
def setUp(self) -> None:
def test_setup(self) -> None:
self.app_package = create_vector_ada_application_package()
prod_region = "aws-us-east-1c"
self.app_package.clusters = [
Expand Down Expand Up @@ -302,32 +297,33 @@ def test_application_status(self):
def test_vector_indexing_and_query(self):
super().test_vector_indexing_and_query()

@unittest.skip("Do not run when not waiting for deployment.")
def tearDown(self) -> None:
self.app.delete_all_docs(
content_cluster_name="vector_content",
schema="vector",
namespace="benchmark",
)
time.sleep(5)
with self.app.syncio() as sync_session:
response: VespaResponse = sync_session.query(
{"yql": "select id from sources * where true", "hits": 10}
)
self.assertEqual(response.get_status_code(), 200)
self.assertEqual(len(response.hits), 0)
print(response.get_json())
# DO NOT skip tearDown-method, as test will not exit.
# @unittest.skip("Do not run when not waiting for deployment.")
# def tearDown(self) -> None:
# self.app.delete_all_docs(
# content_cluster_name="vector_content",
# schema="vector",
# namespace="benchmark",
# )
# time.sleep(5)
# with self.app.syncio() as sync_session:
# response: VespaResponse = sync_session.query(
# {"yql": "select id from sources * where true", "hits": 10}
# )
# self.assertEqual(response.get_status_code(), 200)
# self.assertEqual(len(response.hits), 0)
# print(response.get_json())

# Deployment is deleted by deploying with an empty deployment.xml file.
self.app_package.deployment_config = EmptyDeploymentConfiguration()
# # Deployment is deleted by deploying with an empty deployment.xml file.
# self.app_package.deployment_config = EmptyDeploymentConfiguration()

# Vespa won't push the deleted deployment.xml file unless we add a validation override
tomorrow = datetime.now() + timedelta(days=1)
formatted_date = tomorrow.strftime("%Y-%m-%d")
self.app_package.validations = [
Validation(ValidationID("deployment-removal"), formatted_date)
]
self.app_package.to_files(self.application_root)
# This will delete the deployment
self.vespa_cloud._start_prod_deployment(self.application_root)
shutil.rmtree(self.application_root, ignore_errors=True)
# # Vespa won't push the deleted deployment.xml file unless we add a validation override
# tomorrow = datetime.now() + timedelta(days=1)
# formatted_date = tomorrow.strftime("%Y-%m-%d")
# self.app_package.validations = [
# Validation(ValidationID("deployment-removal"), formatted_date)
# ]
# self.app_package.to_files(self.application_root)
# # This will delete the deployment
# self.vespa_cloud._start_prod_deployment(self.application_root)
# shutil.rmtree(self.application_root, ignore_errors=True)
Loading