Skip to content

Commit

Permalink
Implement empty statistics task
Browse files Browse the repository at this point in the history
  • Loading branch information
aristizabal95 committed Sep 12, 2023
1 parent 363d976 commit 8ae2953
Showing 1 changed file with 2 additions and 42 deletions.
44 changes: 2 additions & 42 deletions mlcubes/data_preparation/project/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,6 @@
import pandas as pd


def get_statistics(data_df: pd.DataFrame) -> dict:
"""Computes statistics about the data. This statistics are uploaded
to the Medperf platform under the data owner's approval. Include
every statistic you consider useful for determining the nature of the
data, but keep in mind that we want to keep the data as private as
possible.
Args:
data_df (pd.DataFrame): DataFrame containing the prepared dataset
Returns:
dict: dictionary with all the computed statistics
"""
stats = {
"weight": {
"mean": float(data_df["weight"].mean()),
"std": float(data_df["weight"].std()),
"min": float(data_df["weight"].min()),
"max": float(data_df["weight"].max()),
},
"volume": {
"mean": float(data_df["volume"].mean()),
"std": float(data_df["volume"].std()),
"min": float(data_df["volume"].min()),
"max": float(data_df["volume"].max()),
},
"density": {
"mean": float(data_df["density"].mean()),
"std": float(data_df["density"].std()),
"min": float(data_df["density"].min()),
"max": float(data_df["density"].max()),
},
"size": len(data_df),
}

return stats


if __name__ == "__main__":
parser = argparse.ArgumentParser("MedPerf Statistics Example")
parser.add_argument(
Expand All @@ -60,10 +22,8 @@ def get_statistics(data_df: pd.DataFrame) -> dict:

args = parser.parse_args()

namesfile = os.path.join(args.data, "data.csv")
names_df = pd.read_csv(namesfile)

stats = get_statistics(names_df)
# TODO: implement statistics
stats = {}

with open(args.out_file, "w") as f:
yaml.dump(stats, f)

0 comments on commit 8ae2953

Please sign in to comment.