Merge branch 'master' into stable

cvisionai · May 1, 2022 · 3f67204 · 3f67204
2 parents 25b9d93 + ff92411
commit 3f67204
Show file tree

Hide file tree

Showing 51 changed files with 2,566 additions and 1,531 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -84,7 +84,7 @@ jobs:
     - run:
         name: Clone source on lightsail
         command: |
-          ssh lightsail 'export CIRCLE_BRANCH='"'$CIRCLE_BRANCH'"'; git clone -b $CIRCLE_BRANCH --recurse-submodules https://github.com/cvisionai/tator';
+          ssh lightsail 'export CIRCLE_BRANCH='"'$CIRCLE_BRANCH'"'; git clone -b ${CIRCLE_BRANCH:-stable} --recurse-submodules https://github.com/cvisionai/tator';
     - persist_to_workspace:
         root: ~/
         paths:
@@ -127,7 +127,7 @@ jobs:
   front-end-tests:
     machine:
       image: ubuntu-2004:202010-01
-    resource_class: large
+    resource_class: xlarge
     steps:
     - attach_workspace:
         at: ~/
@@ -141,7 +141,7 @@ jobs:
           wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb;
           sudo -E apt-get -yq --no-install-suggests --no-install-recommends install ./google-chrome-stable_current_amd64.deb;
           sudo -E apt-get update && sudo -E apt-get -yq --no-install-suggests --no-install-recommends install tesseract-ocr;
-          pip3 install playwright==1.21.0 pytest-playwright==0.1.2 pytesseract==0.3.9 opencv-python;
+          pip3 install playwright==1.17.2 pytest-playwright==0.1.2 pytesseract==0.3.9 opencv-python;
           export PATH=$PATH:$HOME/.local/bin:/snap/bin;
           playwright install;
           scp -r lightsail:/tmp/tator_py_whl/tator*.whl /tmp;
@@ -217,23 +217,23 @@ workflows:
         filters:
           tags:
             only: /.*/
-    - rest-tests:
+    - front-end-tests:
         requires:
-        - install-tator 
+        - install-tator
         context: cvisionai
         filters:
           tags:
             only: /.*/
-    - front-end-tests:
+    - rest-tests:
         requires:
-        - install-tator 
+        - front-end-tests
         context: cvisionai
         filters:
           tags:
             only: /.*/
     - tator-py-tests:
         requires:
-        - install-tator 
+        - front-end-tests
         context: cvisionai
         filters:
           tags:

diff --git a/main/backup.py b/main/backup.py
@@ -4,7 +4,7 @@
 import json
 import os
 from uuid import uuid4
-from typing import Dict, Tuple
+from typing import Generator
 
 from django.db import transaction
 
@@ -240,30 +240,32 @@ def get_store_info(self, project) -> bool:
 
         return success, store_info
 
-    def backup_resources(self, resource_qs) -> Tuple[int, Dict[int, set]]:
+    def backup_resources(self, resource_qs) -> Generator[tuple, None, None]:
         """
-        Copies the resources in the given queryset from the live store to the backup store for their
-        respective projects.  Returns a tuple where the first element is the number of resources
-        that were successfully backed up and the second is a dict that maps project ids to lists of
-        media ids with at least one resource that failed to back up properly.
+        Creates a generator that copies the resources in the given queryset from the live store to
+        the backup store for their respective projects. Yields a tuple with the first element being
+        the success of the backup operation for the current resource and the second element being
+        the resource in question, so the calling function can iterate over the queryset and keep
+        track of its progress.
 
         If there is no backup bucket for the given project (or a site-wide default), this will
-        return `False`.
+        yield `(False, resource)`.
 
         :param resource_qs: The resources to back up
         :type resource_qs: Queryset
-        :rtype: Tuple[int, Dict[int, set]]
+        :rtype: Generator[tuple, None, None]
         """
         successful_backups = set()
         for resource in resource_qs.iterator():
             project = self.project_from_resource(resource)
             path = resource.path
             success, store_info = self.get_store_info(project)
+            success = success and "backup" in store_info
 
-            if success and "backup" in store_info:
+            if success:
                 if store_info["backup"]["store"].check_key(path):
                     logger.info(f"Resource {path} already backed up")
-                    return True
+                    continue
 
                 # Get presigned url from the live bucket, set to expire in 1h
                 download_url = store_info["live"]["store"].get_download_url(path, 3600)

diff --git a/main/models.py b/main/models.py
@@ -1473,7 +1473,7 @@ class Leaf(Model, ModelDiffMixin):
     modified_by = ForeignKey(User, on_delete=SET_NULL, null=True, blank=True,
                              related_name='leaf_modified_by', db_column='modified_by')
     parent=ForeignKey('self', on_delete=SET_NULL, blank=True, null=True, db_column='parent')
-    path=PathField(unique=True)
+    path=PathField()
     name = CharField(max_length=255)
     deleted = BooleanField(default=False)
 

diff --git a/main/rest/_attribute_query.py b/main/rest/_attribute_query.py
@@ -15,6 +15,17 @@
 
 logger = logging.getLogger(__name__)
 
+
+def format_query_string(query_str: str) -> str:
+    """
+    Preformatting before passing the query to ElasticSearch.
+
+    :param query_str: The raw query string
+    :type query_str: str
+    """
+    return query_str.replace("/", "\\/")
+
+
 def get_attribute_es_query(query_params, query, bools, project,
                            is_media=True, annotation_bools=None, modified=None):
     """ TODO: add documentation for this """
@@ -93,10 +104,10 @@ def get_attribute_es_query(query_params, query, bools, project,
         if section_object.lucene_search:
             attr_query['media']['filter'].append({'bool': {
                 'should': [
-                    {'query_string': {'query': section_object.lucene_search}},
+                    {'query_string': {'query': format_query_string(section_object.lucene_search)}},
                     {'has_child': {
                         'type': 'annotation',
-                        'query': {'query_string': {'query': section_object.lucene_search}},
+                        'query': {'query_string': {'query': format_query_string(section_object.lucene_search)}},
                         },
                     },
                 ],
@@ -131,13 +142,13 @@ def get_attribute_es_query(query_params, query, bools, project,
 
         search = query_params.get('search')
         if search is not None:
-            search_query = {'query_string': {'query': search}}
+            search_query = {'query_string': {'query': format_query_string(search)}}
             query['query']['bool']['filter'].append(search_query)
 
         annotation_search = query_params.get('annotation_search')
         if annotation_search is not None:
             annotation_search_query = {'has_child': {'type': 'annotation',
-                                                     'query': {'query_string': {'query': annotation_search}}}}
+                                                     'query': {'query_string': {'query': format_query_string(annotation_search)}}}}
             query['query']['bool']['filter'].append(annotation_search_query)
 
     else:
@@ -172,13 +183,13 @@ def get_attribute_es_query(query_params, query, bools, project,
 
         search = query_params.get('search', None)
         if search is not None:
-            search_query = {'query_string': {'query': search}}
+            search_query = {'query_string': {'query': format_query_string(search)}}
             query['query']['bool']['filter'].append(search_query)
 
         media_search = query_params.get('media_search')
         if media_search is not None:
             media_search_query = {'has_parent': {'parent_type': 'media',
-                                                 'query': {'query_string': {'query': media_search}}}}
+                                                 'query': {'query_string': {'query': format_query_string(media_search)}}}}
             query['query']['bool']['filter'].append(media_search_query)
 
         if modified is not None:

diff --git a/main/rest/_util.py b/main/rest/_util.py
@@ -3,15 +3,16 @@
 import logging
 from urllib.parse import urlparse
 
+from django.contrib.contenttypes.models import ContentType
 from django.utils.http import urlencode
 from django.db.models.expressions import Subquery
 from rest_framework.reverse import reverse
 from rest_framework.exceptions import APIException
 from rest_framework.exceptions import PermissionDenied
 
-from ..models import type_to_obj
+from ..models import type_to_obj, ChangeLog, ChangeToObject, Project
 
-from ._attributes import convert_attribute
+from ._attributes import bulk_patch_attributes, convert_attribute
 
 logger = logging.getLogger(__name__)
 
@@ -174,3 +175,146 @@ def url_to_key(url, project_obj):
         path = '/'.join(parsed.path.split('/')[-num_tokens:])
     return path, bucket, upload
 
+
+def bulk_update_and_log_changes(queryset, project, user, update_kwargs=None, new_attributes=None):
+    """
+    Performs a bulk update and creates a single changelog referenced by all changed objects
+
+    :param queryset: The queryset to update
+    :param project: The project the request originates from
+    :param user: The user making the requests
+    :param update_kwargs: The dictionary of arguments for queryset.update(), will be used like this:
+                          `queryset.update(**update_kwargs)`
+    :param new_attributes: The validated attributes returned by `validate_attributes`, if any, will
+                           be used like this: `bulk_patch_attributes(new_attributes, queryset)`
+    """
+    if not queryset.exists():
+        logger.info("Queryset empty, not performing any updates")
+        return
+
+    if update_kwargs is None and new_attributes is None:
+        raise ValueError(
+            "Must specify at least one of the following arguments: update_kwargs, new_attributes"
+        )
+
+    if type(project) != Project:
+        project = Project.objects.get(pk=project)
+
+    # Get prior state data for ChangeLog creation
+    updated_ids = list(queryset.values_list("id", flat=True))
+    first_obj = queryset.first()
+    ref_table = ContentType.objects.get_for_model(first_obj)
+    model_dict = first_obj.model_dict
+
+    # Perform queryset update
+    if update_kwargs is not None:
+        queryset.update(**update_kwargs)
+    if new_attributes is not None:
+        bulk_patch_attributes(new_attributes, queryset)
+
+    # Create ChangeLog
+    first_obj = type(first_obj).objects.get(pk=first_obj.id)
+    cl = ChangeLog(
+        project=project,
+        user=user,
+        description_of_change=first_obj.change_dict(model_dict),
+    )
+    cl.save()
+    objs = (
+        ChangeToObject(ref_table=ref_table, ref_id=obj_id, change_id=cl) for obj_id in updated_ids
+    )
+    bulk_create_from_generator(objs, ChangeToObject)
+
+
+def bulk_delete_and_log_changes(queryset, project, user):
+    """
+    Performs a bulk delete and creates a changelog for it.
+
+    :param queryset: The queryset to mark for deletion
+    :param project: The project the request originates from
+    :param user: The user making the requests
+    """
+    delete_kwargs = {
+        "deleted": True,
+        "modified_datetime": datetime.datetime.now(datetime.timezone.utc),
+        "modified_by": user,
+    }
+    bulk_update_and_log_changes(queryset, project, user, update_kwargs=delete_kwargs)
+
+
+def log_changes(obj, model_dict, project, user):
+    """
+    Creates a changelog for a single updated object.
+
+    :param obj: The object to compare and create a change log for.
+    :param model_dict: The state retrieved from `obj.model_dict` **before updating**.
+    :param project: The project the request originates from
+    :param user: The user making the requests
+    """
+    if type(project) != Project:
+        project = Project.objects.get(pk=project)
+
+    ref_table = ContentType.objects.get_for_model(obj)
+    cl = ChangeLog(project=project, user=user, description_of_change=obj.change_dict(model_dict))
+    cl.save()
+    ChangeToObject(ref_table=ref_table, ref_id=obj.id, change_id=cl).save()
+
+
+def delete_and_log_changes(obj, project, user):
+    """
+    Deletes a single object and creates a changelog for it.
+
+    :param obj: The object to delete and create a change log for.
+    :param project: The project the request originates from
+    :param user: The user making the requests
+    """
+    model_dict = obj.model_dict
+    obj.deleted = True
+    obj.modified_datetime = datetime.datetime.now(datetime.timezone.utc)
+    obj.modified_by = user
+    obj.save()
+
+    log_changes(obj, model_dict, project, user)
+
+
+def log_creation(obj, project, user):
+    """
+    Creates changelogs for a new object.
+
+    :param obj: The new object to create a change log for.
+    :param project: The project the request originates from
+    :param user: The user making the requests
+    """
+    if type(project) != Project:
+        project = Project.objects.get(pk=project)
+
+    ref_table = ContentType.objects.get_for_model(obj)
+    cl = ChangeLog(project=project, user=user, description_of_change=obj.create_dict)
+    cl.save()
+    ChangeToObject(ref_table=ref_table, ref_id=obj.id, change_id=cl).save()
+
+
+def bulk_log_creation(objects, project, user):
+    """
+    Creates changelogs for multiple new objects.
+
+    :param obj: The new object to create a change log for.
+    :param project: The project the request originates from
+    :param user: The user making the requests
+    """
+    # Create ChangeLogs
+    objs = (
+        ChangeLog(project=project, user=user, description_of_change=obj.create_dict)
+        for obj in objects
+    )
+    change_logs = bulk_create_from_generator(objs, ChangeLog)
+
+    # Associate ChangeLogs with created objects
+    ref_table = ContentType.objects.get_for_model(objects[0])
+    ids = [obj.id for obj in objects]
+    objs = (
+        ChangeToObject(ref_table=ref_table, ref_id=ref_id, change_id=cl)
+        for ref_id, cl in zip(ids, change_logs)
+    )
+    bulk_create_from_generator(objs, ChangeToObject)
+    return ids
diff --git a/main/rest/change_log.py b/main/rest/change_log.py
@@ -10,7 +10,7 @@
 
 logger = logging.getLogger(__name__)
 
-CHANGE_LOG_PROPERTIES = ["id", "project", "user", "description_of_change"]
+CHANGE_LOG_PROPERTIES = ["id", "project", "user", "description_of_change", "modified_datetime"]
 
 
 class ChangeLogListAPI(BaseListView):

diff --git a/main/rest/download_info.py b/main/rest/download_info.py
@@ -44,7 +44,8 @@ def _post(self, params):
             if url is None:
                 upload = key.startswith('_uploads')
                 bucket = project_obj.get_bucket(upload=upload)
-                store_default = get_tator_store(bucket, upload=upload)
+                use_upload_bucket = upload and not bucket
+                store_default = get_tator_store(bucket, upload=use_upload_bucket)
 
                 tator_store = store_lookup.get(key, store_default)
                 # Make sure the key corresponds to the correct project.