-
Notifications
You must be signed in to change notification settings - Fork 0
/
dbgapmonitor.py
151 lines (121 loc) · 4.58 KB
/
dbgapmonitor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python
"""
File: dbgapmonitor.py
Author: Adam J. Taylor
Date: 2024-05-05
Description: A Python script to monitor dbGaP Authorized Requestors and send a message to Slack.
"""
from datetime import datetime, timedelta
import io
import json
import os
import pandas as pd
import polars as pl
import requests
def get_dbgap_requestors(phs_id):
"""
Retrieves the list of dbGaP Authorized Requestors for a given study ID.
Args:
phs_id (str): The study ID for which the requestors are to be retrieved.
Returns:
pandas.DataFrame: A DataFrame containing the dbGaP Authorized Requestors.
"""
# Download the tab-separated text file
url = f"https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/GetAuthorizedRequestDownload.cgi?study_id={phs_id}"
response = requests.get(url)
# Read the CSV file without header and with arbitrary column names
# Polars is used here as it simplified loading a rather non-standard TSV file
df = (
pl.read_csv(
io.StringIO(response.text),
separator="\t",
truncate_ragged_lines=True,
try_parse_dates=True,
)
.rename({"Cloud Service AdministratorData stewardRequestor": "Requestor"})
.with_columns(pl.col("Date of approval").str.to_date("%b%d, %Y"))
.sort("Date of approval", descending=True)
)
# Strip extra whitespace from the columns
df = df.with_columns(
pl.col("Requestor").str.strip_chars(),
pl.col("Affiliation").str.strip_chars(),
pl.col("Project").str.strip_chars(),
)
return df
def dataframe_to_slack_block_with_md_links(df):
"""
Converts a pandas DataFrame to a Slack message block with markdown links.
Args:
df (pandas.DataFrame): The DataFrame containing the data to be converted.
Returns:
dict: A dictionary representing the Slack message block with markdown links.
"""
blocks = [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "@channel New dbGaP Authorized Requestors added in the last 7 days!",
},
}
]
for index, row in df.iterrows():
line = f"{row['Requestor']} from {row['Affiliation']} {row['Request status']} on {row['Date of approval'].strftime('%a %d %B')}\n> {row['Project']}"
block = {"type": "section", "text": {"type": "mrkdwn", "text": f"{line}"}}
blocks.append(block)
return {"blocks": blocks}
def send_message_to_slack_blocks(webhook_url, blocks):
"""
Sends a message to Slack using the provided webhook URL and blocks.
Args:
webhook_url (str): The URL of the Slack webhook.
blocks (list): The blocks to be sent as part of the message.
Raises:
ValueError: If the request to Slack returns an error.
Returns:
None
"""
headers = {"Content-Type": "application/json"}
data = json.dumps(blocks)
response = requests.post(webhook_url, headers=headers, data=data)
if response.status_code != 200:
raise ValueError(
f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}"
)
def main():
# Get the webhook URL from a env variable called SLACK_WEBHOOK_URL
webhook_url = os.getenv("SLACK_WEBHOOK_URL")
# Get the study ID from an environment variable
phs_id = os.getenv("DBGAP_STUDY_ID")
# Declare the number of days to look back
lookback_days = 7
# Get the dbGaP Authorized Requestors for the study ID
df = get_dbgap_requestors(phs_id)
# Filter for those approved in the n days
today = datetime.today()
start_date = today - timedelta(days=lookback_days)
df_recent = df.filter(pl.col("Date of approval") > start_date)
# Perpeare the slack message blocks
if df_recent.to_pandas().empty:
# If no modified entities are found, prepare a simple message for Slack
slack_message_blocks = {
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"No new dbGaP Authorized Requestors added in the last {lookback_days} days",
},
}
]
}
else:
# If there are modified entities, format the message as before
slack_message_blocks = dataframe_to_slack_block_with_md_links(
df_recent.to_pandas()
)
# Send the message to Slack
send_message_to_slack_blocks(webhook_url, slack_message_blocks)
if __name__ == "__main__":
main()