From 8ae2953f7fd2a3b6312d0b66f0cd46e93a539e8d Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Tue, 12 Sep 2023 15:22:26 -0500 Subject: [PATCH] Implement empty statistics task --- .../data_preparation/project/statistics.py | 44 +------------------ 1 file changed, 2 insertions(+), 42 deletions(-) diff --git a/mlcubes/data_preparation/project/statistics.py b/mlcubes/data_preparation/project/statistics.py index 31622c9e..04c7e0f6 100644 --- a/mlcubes/data_preparation/project/statistics.py +++ b/mlcubes/data_preparation/project/statistics.py @@ -4,44 +4,6 @@ import pandas as pd -def get_statistics(data_df: pd.DataFrame) -> dict: - """Computes statistics about the data. This statistics are uploaded - to the Medperf platform under the data owner's approval. Include - every statistic you consider useful for determining the nature of the - data, but keep in mind that we want to keep the data as private as - possible. - - Args: - data_df (pd.DataFrame): DataFrame containing the prepared dataset - - Returns: - dict: dictionary with all the computed statistics - """ - stats = { - "weight": { - "mean": float(data_df["weight"].mean()), - "std": float(data_df["weight"].std()), - "min": float(data_df["weight"].min()), - "max": float(data_df["weight"].max()), - }, - "volume": { - "mean": float(data_df["volume"].mean()), - "std": float(data_df["volume"].std()), - "min": float(data_df["volume"].min()), - "max": float(data_df["volume"].max()), - }, - "density": { - "mean": float(data_df["density"].mean()), - "std": float(data_df["density"].std()), - "min": float(data_df["density"].min()), - "max": float(data_df["density"].max()), - }, - "size": len(data_df), - } - - return stats - - if __name__ == "__main__": parser = argparse.ArgumentParser("MedPerf Statistics Example") parser.add_argument( @@ -60,10 +22,8 @@ def get_statistics(data_df: pd.DataFrame) -> dict: args = parser.parse_args() - namesfile = os.path.join(args.data, "data.csv") - names_df = pd.read_csv(namesfile) - - stats = get_statistics(names_df) + # TODO: implement statistics + stats = {} with open(args.out_file, "w") as f: yaml.dump(stats, f)