Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full-text search using SearchVector #8430

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion tests/webapp/api/test_push_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from tests.conftest import IS_WINDOWS
from treeherder.etl.push import store_push_data
from treeherder.model.models import FailureClassification, JobNote, Push
from treeherder.model.models import Commit, FailureClassification, JobNote, Push
from treeherder.webapp.api import utils


Expand Down Expand Up @@ -365,6 +365,93 @@ def test_push_author_contains(client, test_repository):
assert results[0]["id"] == 3


def test_push_search(client, test_repository):
"""
Test the search parameter for filtering by Commit fields: revision, author, comments.
"""
now = datetime.datetime.now()
push1 = Push.objects.create(
repository=test_repository,
revision="1234abcd",
author="[email protected]",
time=now,
)
push2 = Push.objects.create(
repository=test_repository,
revision="2234abcd",
author="[email protected]",
time=now + datetime.timedelta(seconds=1),
)
push3 = Push.objects.create(
repository=test_repository,
revision="3234abcd",
author="[email protected]",
time=now + datetime.timedelta(seconds=2),
)

# Add Commit objects linked to the Push objects
Commit.objects.create(
push=push1, revision="1234abcd", author="kaz <[email protected]>", comments="Initial commit"
)
Commit.objects.create(
push=push2, revision="2234abcd", author="foo <[email protected]>", comments="Bug 12345567 - fix"
)
Commit.objects.create(
push=push3,
revision="3234abcd",
author="quxzan <qux@bar>.com",
comments="Bug 12345567 - Feature added",
)

# Test search by comments
resp = client.get(
reverse("push-list", kwargs={"project": test_repository.name}) + "?search=bug"
)
assert resp.status_code == 200

results = resp.json()["results"]
assert len(results) == 2
assert set([result["id"] for result in results]) == set([3, 2])

# Test search by bug number
resp = client.get(
reverse("push-list", kwargs={"project": test_repository.name}) + "?search=12345567"
)
assert resp.status_code == 200

results = resp.json()["results"]
assert len(results) == 2
assert set([result["id"] for result in results]) == set([3, 2])

# Test search by author
resp = client.get(
reverse("push-list", kwargs={"project": test_repository.name}) + "?search=foo"
)
assert resp.status_code == 200

results = resp.json()["results"]
assert len(results) == 1
assert results[0]["id"] == push2.id

# Test search by revision
resp = client.get(
reverse("push-list", kwargs={"project": test_repository.name}) + "?search=3234abcd"
)
assert resp.status_code == 200

results = resp.json()["results"]
assert len(results) == 1
assert results[0]["id"] == push3.id

# Test empty search input
resp = client.get(reverse("push-list", kwargs={"project": test_repository.name}) + "?search=")
assert resp.status_code == 200

results = resp.json()["results"]
assert len(results) == 3
assert set([result["id"] for result in results]) == set([3, 2, 1])


def test_push_reviewbot(client, test_repository):
"""
test the reviewbot parameter
Expand Down
1 change: 1 addition & 0 deletions treeherder/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
INSTALLED_APPS = [
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.postgres.search",
# Disable Django's own staticfiles handling in favour of WhiteNoise, for
# greater consistency between gunicorn and `./manage.py runserver`.
"whitenoise.runserver_nostatic",
Expand Down
24 changes: 24 additions & 0 deletions treeherder/model/migrations/0036_commit_search_vector_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 5.1.2 on 2025-01-24 07:42

import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("model", "0035_bugscache_optional_bugzilla_ref"),
]

operations = [
migrations.AddIndex(
model_name="commit",
index=django.contrib.postgres.indexes.GinIndex(
django.contrib.postgres.search.SearchVector(
"revision", "author", "comments", config="english"
),
name="search_vector_idx",
),
),
]
9 changes: 8 additions & 1 deletion treeherder/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import newrelic.agent
from django.contrib.auth.models import User
from django.contrib.postgres.search import TrigramSimilarity
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVector, TrigramSimilarity
Archaeopteryx marked this conversation as resolved.
Show resolved Hide resolved
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from django.core.validators import MinLengthValidator
Expand Down Expand Up @@ -188,6 +189,12 @@ class Commit(models.Model):
class Meta:
db_table = "commit"
unique_together = ("push", "revision")
indexes = [
GinIndex(
SearchVector("revision", "author", "comments", config="english"),
name="search_vector_idx",
),
]

def __str__(self):
return f"{self.push.repository.name} {self.revision}"
Expand Down
22 changes: 19 additions & 3 deletions treeherder/webapp/api/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@

import newrelic.agent
from cache_memoize import cache_memoize
from django.contrib.postgres.search import SearchQuery, SearchVector
from rest_framework import viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.status import HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND

from treeherder.log_parser.failureline import get_group_results
from treeherder.model.models import Job, JobType, Push, Repository
from treeherder.model.models import Commit, Job, JobType, Push, Repository
from treeherder.push_health.builds import get_build_failures
from treeherder.push_health.compare import get_commit_history
from treeherder.push_health.linting import get_lint_failures
Expand Down Expand Up @@ -42,7 +43,6 @@ def list(self, request, project):

# This will contain some meta data about the request and results
meta = {}

# support ranges for date as well as revisions(changes) like old tbpl
for param in [
"fromchange",
Expand All @@ -60,7 +60,6 @@ def list(self, request, project):
all_repos = request.query_params.get("all_repos")

pushes = Push.objects.order_by("-time")

if not all_repos:
try:
repository = Repository.objects.get(name=project)
Expand All @@ -71,6 +70,23 @@ def list(self, request, project):

pushes = pushes.filter(repository=repository)

search_param = filter_params.get("search")
if search_param:
repository = Repository.objects.get(name=project)
filtered_commits = (
Commit.objects.annotate(
Netacci marked this conversation as resolved.
Show resolved Hide resolved
search=SearchVector("revision", "author", "comments", config="english")
)
.filter(
search=SearchQuery(search_param, config="english"),
push__repository=repository,
)
.values_list("push_id", flat=True)
# Get most recent results and limit result to 200
.order_by("-push__time")
.distinct()[:200]
)
pushes = pushes.filter(id__in=filtered_commits)
for param, value in meta.items():
if param == "fromchange":
revision_field = "revision__startswith" if len(value) < 40 else "revision"
Expand Down