Skip to content

Commit

Permalink
Merge pull request #204 from broadinstitute/development
Browse files Browse the repository at this point in the history
Release 1.10.0
  • Loading branch information
jlchang authored Feb 3, 2021
2 parents 0fedf7b + 4901903 commit 5333b6e
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 13 deletions.
30 changes: 25 additions & 5 deletions ingest/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def bq_dataset_exists(dataset):
bigquery_client.get_dataset(dataset_ref)
exists = True
except NotFound:
print(f'Dataset {dataset} not found')
print(f"Dataset {dataset} not found")
return exists


Expand All @@ -33,7 +33,7 @@ def bq_table_exists(dataset, table):
bigquery_client.get_table(table_ref)
exists = True
except NotFound:
print(f'Dataset {table} not found')
print(f"Dataset {table} not found")
return exists


Expand All @@ -60,19 +60,19 @@ def validate_arguments(parsed_args):
parsed_args.bq_dataset is None and parsed_args.bq_table is not None
):
raise ValueError(
'Missing argument: --bq_dataset and --bq_table are both required for BigQuery upload.'
"Missing argument: --bq_dataset and --bq_table are both required for BigQuery upload."
)
if parsed_args.bq_dataset is not None and not bq_dataset_exists(
parsed_args.bq_dataset
):
raise ValueError(
f' Invalid argument: unable to connect to a BigQuery dataset called {parsed_args.bq_dataset}.'
f" Invalid argument: unable to connect to a BigQuery dataset called {parsed_args.bq_dataset}."
)
if parsed_args.bq_table is not None and not bq_table_exists(
parsed_args.bq_dataset, parsed_args.bq_table
):
raise ValueError(
f' Invalid argument: unable to connect to a BigQuery table called {parsed_args.bq_table}.'
f" Invalid argument: unable to connect to a BigQuery table called {parsed_args.bq_table}."
)


Expand Down Expand Up @@ -106,6 +106,13 @@ def create_parser():
"--profile-memory", action="store_true", help=profile_memory_text
)

parser.add_argument(
"--user-metrics-uuid",
required=False,
type=is_valid_uuid,
help="User identifier for Bard, i.e. the user's Mixpanel distinct ID",
)

subparsers = parser.add_subparsers()

# Ingest expression files subparsers
Expand Down Expand Up @@ -242,3 +249,16 @@ def create_parser():
)

return parser


def is_valid_uuid(value):
import uuid

try:
if value:
uuid.UUID(value)
return value
else:
return None
except ValueError as e:
raise argparse.ArgumentTypeError(e)
12 changes: 9 additions & 3 deletions ingest/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# File is responsible for defining globals and initializing them
try:

from mongo_connection import MongoConnection
except ImportError:
from .mongo_connection import MongoConnection
Expand All @@ -8,12 +9,12 @@
MONGO_CONNECTION = MongoConnection()


def init(study_id, study_file_id):
def init(study_id, study_file_id, user_metric_uuid=None):
global __metric_properties

study = Study(study_id)
study_file = StudyFile(study_file_id)
__metric_properties = MetricProperties(study, study_file)
__metric_properties = MetricProperties(study, study_file, user_metric_uuid)


def set_parent_event_name(event_name):
Expand All @@ -34,8 +35,13 @@ def get_metric_properties():
class MetricProperties:
"""Sets default properties for Mixpanel events"""

def __init__(self, study, study_file):
# This is a generic write-only log token, not a secret
USER_ID = "2f30ec50-a04d-4d43-8fd1-b136a2045079"

def __init__(self, study, study_file, user_uuid=None):
distinct_id = user_uuid if user_uuid else MetricProperties.USER_ID
self.__properties = {
"distinct_id": distinct_id,
"studyAccession": study.accession,
"fileName": study_file.file_name,
"fileType": study_file.file_type,
Expand Down
6 changes: 5 additions & 1 deletion ingest/ingest_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,11 @@ def main() -> None:
validate_arguments(parsed_args)
arguments = vars(parsed_args)
# Initialize global variables for current ingest job
config.init(arguments["study_id"], arguments["study_file_id"])
config.init(
arguments["study_id"],
arguments["study_file_id"],
arguments["user_metrics_uuid"],
)
ingest = IngestPipeline(**arguments)
status, status_cell_metadata = run_ingest(ingest, arguments, parsed_args)
# Log Mixpanel events
Expand Down
3 changes: 0 additions & 3 deletions ingest/monitoring/metrics_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,10 @@ class MetricsService:
# Logger provides more details
dev_logger = setup_logger(__name__, "log.txt", format="support_configs")
BARD_HOST_URL = bard_host_url
user_id = "2f30ec50-a04d-4d43-8fd1-b136a2045079"

# Log metrics to Mixpanel
@classmethod
def log(cls, event_name, props={}):
# Log metrics to Mixpanel via Bard web service
props.update({"distinct_id": MetricsService.user_id})
properties = {"event": event_name, "properties": props.get_properties()}

post_body = json.dumps(properties)
Expand Down
15 changes: 15 additions & 0 deletions tests/test_cli_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys
import unittest
from argparse import ArgumentTypeError

sys.path.append("../ingest")
from cli_parser import is_valid_uuid


class TestAnnotations(unittest.TestCase):
def test_is_valid_uuid(self):
self.assertRaises(ArgumentTypeError, is_valid_uuid, "abc")
self.assertFalse(is_valid_uuid(""))

user_metrics_uuid = "123e4567-e89b-12d3-a456-426614174000"
self.assertEqual(is_valid_uuid(user_metrics_uuid), user_metrics_uuid)
26 changes: 25 additions & 1 deletion tests/test_metrics_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
import json
from test_ingest import IngestTestCase
import config
from config import MetricProperties
from monitoring.metrics_service import MetricsService


user_metrics_uuid = "123e4567-e89b-12d3-a456-426614174000"


# Mocked function that replaces MetricsService.post_event
def mock_post_event(props):
expected_props = {
Expand All @@ -20,9 +24,13 @@ def mock_post_event(props):
"fileSize": 400,
"appId": "single-cell-portal",
"functionName": "ingest_expression",
"distinct_id": MetricsService.user_id,
},
}

if MetricProperties.USER_ID in props:
expected_props["properties"]["distinct_id"] = MetricProperties.USER_ID
else:
expected_props["properties"]["distinct_id"] = user_metrics_uuid
props = json.loads(props)
del props["properties"]["perfTime"]
assert props == expected_props
Expand Down Expand Up @@ -53,6 +61,7 @@ class MetricsServiceTestCase(unittest.TestCase):
)
def test_log(self, mock_execute_ingest, mock_MONGO_CONNECTION, mock_post_event):
mock_MONGO_CONNECTION._client = MetricsServiceTestCase.client_mock
# Test when user uuid is not provided
args = [
"--study-id",
"5d276a50421aa9117c982845",
Expand Down Expand Up @@ -81,3 +90,18 @@ def test_log(self, mock_execute_ingest, mock_MONGO_CONNECTION, mock_post_event):
metrics_model = config.get_metric_properties()
# Log Mixpanel events
MetricsService.log(config.get_parent_event_name(), metrics_model)

# Test when user uuid is provided

args.insert(4, "--user-metrics-uuid")
args.insert(5, user_metrics_uuid)
# args.insert(3).append(user_metrics_uuid)
# Initialize global variables
config.init(
"5d276a50421aa9117c982845", "5dd5ae25421aa910a723a337", user_metrics_uuid
)
config.set_parent_event_name("ingest-pipeline:expression:ingest")
IngestTestCase.execute_ingest(args)
metrics_model = config.get_metric_properties()
# Log Mixpanel events
MetricsService.log(config.get_parent_event_name(), metrics_model)

0 comments on commit 5333b6e

Please sign in to comment.