Skip to content

Commit

Permalink
Reports (#5)
Browse files Browse the repository at this point in the history
* feat: added reports

feat: added reports

* updated readme

* updated readme

* updated gitignore

* refactoring
  • Loading branch information
a-chumagin authored Sep 20, 2024
1 parent 786f190 commit 5f05219
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,5 @@ yarn-error.log
.Pipfile

data/*data_contract.yml
.env
.env
data/results/*
37 changes: 34 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,44 @@ Why Vertica? Because Vertica creates initial data when starting the container, w
Run the Docker Compose file to start the Vertica container and the Soda Check container:

```bash
docker-compose -f ./docker/docker-compose.yml up -d
docker-compose -f ./docker/docker-compose.yml up
```

If you are using a Mac M Processor, use the following command to run the Vertica container:

```bash
export DOCKER_DEFAULT_PLATFORM=linux/arm64
export DOCKER_BUILDKIT=0
docker-compose -f ./docker/docker-compose.yml up -d
```
docker-compose -f ./docker/docker-compose.yml up
```

## Reports
### Viewing Reports
The reports are created using Streamlit and visualize the results from Soda scans. Once the reports service is running, you can access the reports at:
```
http://0.0.0.0:8501
```
### Report Components
The reports include the following components:
- **Summary**: An overview of the scan results, including the definition name, default data source, and the time taken for the scan.
- **Checks**: Detailed information about each check, including:
- **Name**: The name of the check.
- **Table name**: The table associated with the check.
- **Outcome**: The result of the check (pass or fail).
- **Diagnostics**: Diagnostic information related to the check.
- **Description**: A description of the check.
- **Passed Checks**: A table listing all the checks that passed.
- **Failed Checks**: A table listing all the checks that failed.
- **Logs**: Logs generated during the scan.
### Select Report:
You can select particular reports from selectbox. Just choose file name for rendering in reports
### Sharing Reports
Streamlit provides an option to share the report link. You can use the share link to share the report with others.
The link to the report is:
```
http://localhost:8501/?file=vertica_local_results_1726854394.0941107.json
11 changes: 11 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,14 @@ services:
- vertica_schema=public
depends_on:
- vertica
reports:
build:
context: ..
dockerfile: ./docker/Dockerfile
command: streamlit run /app/reports.py
volumes:
- ../data:/app/data
ports:
- "8501:8501"
depends_on:
- checks
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
soda-core==3.1.0
soda-core-contracts==3.1.0
soda-core-vertica==3.1.0
soda-core-contracts==3.2.4
soda-core-vertica==3.2.4
streamlit==1.36.0
pyyaml
3 changes: 2 additions & 1 deletion scripts/data_contract_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ def generate_data_contract(column_info):

data_contract['columns'].append(column_data)

return yaml.dump(data_contract, default_flow_style=False)
dump = yaml.dump(data_contract, default_flow_style=False, sort_keys=False)
return dump
68 changes: 68 additions & 0 deletions scripts/reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json, os
import pandas as pd
import streamlit as st

def load_data(folder_path):
filenames = os.listdir(folder_path)
query_params = st.query_params
if query_params == {}:
selected_filename = st.selectbox('Select a file', filenames)
else:
selected_filename = st.query_params.file

file_name = os.path.join(folder_path, selected_filename)
with open(file_name, "r") as f:
data = json.load(f, strict=False)
return pd.json_normalize(data), selected_filename

def calculate_time_running(start_time, end_time):
start_time = pd.to_datetime(start_time)
end_time = pd.to_datetime(end_time)
return (end_time - start_time).total_seconds()

def get_outcomes(checks, outcome):
all_checks = []
for check in checks:
if check['outcome'] == outcome:
all_checks.append({
'Name': check['name'],
'Table name': check['table'],
'Outcome': check['outcome'],
'Diagnostics': check['diagnostics'],
'Description': get_description_value(check)
})
return pd.DataFrame(all_checks)

def get_description_value(check):
description_value = check['resourceAttributes'][0]['value'] if check['resourceAttributes'] else 'N/A'
return description_value

def display_report(df, selected_filename):
st.markdown(f"[share](/?file={selected_filename})")
checks = df['checks'][0]
failed_checks = get_outcomes(checks, 'fail')
passed_checks = get_outcomes(checks, 'pass')
df['checks'] = df['checks'].astype(str)
st.write(f"Definition Name: {df['definitionName'][0]}")
st.write(f"Default data source: {df['defaultDataSource'][0]}")

time_running = calculate_time_running(df['scanStartTimestamp'][0], df['scanEndTimestamp'][0])
st.write(f"Time running, sec: {time_running}")

st.write(f"Checks count: {len(checks)}")
st.write('Number of failed checks:', len(failed_checks))
st.write('Number of passed checks:', len(passed_checks))

st.write('Passed checks:')
st.table(passed_checks)

st.write('Failed checks:')
st.table(failed_checks)

st.write('Logs:')
st.write(df['logs'][0])

# Call the function to display the report
FOLDER_PATH = './data/results'
data_f, filename = load_data(FOLDER_PATH)
display_report(data_f, filename)
6 changes: 5 additions & 1 deletion scripts/run_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
prepare a scan, execute the scan, and display the results.
"""

import time
from soda.contracts.data_contract_translator import DataContractTranslator
from soda.scan import Scan

Expand Down Expand Up @@ -48,10 +49,13 @@ def prepare_scan(self, sodacl_str):
Args:
sodacl_str (str): The Soda Checks Language string.
"""
data_source_name = "vertica_local"
result_file_name = f"./data/results/{data_source_name}_results_{time.time()}.json"
self.scan.set_verbose(True)
self.scan.set_data_source_name("vertica_local")
self.scan.set_data_source_name(data_source_name)
self.scan.add_configuration_yaml_file(file_path="configuration/configuration.yml")
self.scan.add_sodacl_yaml_str(sodacl_str)
self.scan.set_scan_results_file(result_file_name)

def execute_scan(self):
"""
Expand Down

0 comments on commit 5f05219

Please sign in to comment.