Renames "ground truth" to "references" throughout design

slacgismo · Feb 28, 2025 · 14f4e0a · 14f4e0a
1 parent 0df14b7
commit 14f4e0a
Show file tree

Hide file tree

Showing 22 changed files with 101 additions and 109 deletions.
diff --git a/.gitignore b/.gitignore
@@ -148,8 +148,8 @@ ec2/evaluation_scripts/*
 **/.vscode
 **/.venv
 
-s3Emulator/pv-validation-hub-bucket/data_files/analytical/*
-s3Emulator/pv-validation-hub-bucket/data_files/ground_truth/*
+s3Emulator/pv-validation-hub-bucket/data_files/files/*
+s3Emulator/pv-validation-hub-bucket/data_files/references/*
 s3Emulator/pv-validation-hub-bucket/submission_files/*
 s3Emulator/pv-validation-hub-bucket/evaluation_scripts/*
 

diff --git a/cleanup.sh b/cleanup.sh
@@ -49,8 +49,8 @@ S3_DIR='./s3Emulator'
 S3_BUCKET_DIR="${S3_DIR}/pv-validation-hub-bucket"
 
 S3_DATA_DIR="${S3_BUCKET_DIR}/data_files"
-S3_FILES_DIR="${S3_DATA_DIR}/analytical"
-S3_REFERENCE_DIR="${S3_DATA_DIR}/ground_truth"
+S3_FILES_DIR="${S3_DATA_DIR}/files"
+S3_REFERENCE_DIR="${S3_DATA_DIR}/references"
 
 if [ -d "${S3_FILES_DIR}" ]; then
     rm -rf "${S3_FILES_DIR:?}/"*

diff --git a/ec2/.gitignore b/ec2/.gitignore
@@ -1,2 +1,2 @@
 **/data/files/*.csv
-**/data/ground-truth/*.csv
+**/data/references/*.csv
diff --git a/ec2/Dockerfile b/ec2/Dockerfile
@@ -5,7 +5,7 @@ FROM python:3.11-slim
 WORKDIR /root/admin
 COPY . .
 
-ENV PORT 7000
+ENV PORT=7000
 EXPOSE 7000
 
 RUN apt-get update -qq

diff --git a/ec2/README.md b/ec2/README.md
@@ -27,7 +27,7 @@ A new analysis task for insertion into the PV Validation Hub needs to contain ce
 - `system_metadata.csv` - contains the metadata for each system associated with the data files
 - `template.py` - marimo template for the private results page for each submission
 - Data files - folder containing all csv files the analysis
-- Ground truth files - folder containing all results for each data file
+- Reference files - folder containing all results for each data file
 
 ### config.json
 
@@ -60,7 +60,7 @@ Example JSON:
     "longitude",
     "data_sampling_frequency"
   ],
-  "ground_truth_compare": [
+  "references_compare": [
     "time_series"
   ],
   "public_results_table": "time-shift-public-metrics.json",
@@ -82,18 +82,18 @@ Example JSON:
 - "function_name" - name of function required within submission file
 - "comparison_type" - type of comparison
 - "display_metrics" - mapping of final metric name to the display name for the leaderboard
-  - The formatting is as follows `<metric_operation>_<performance_metric>_<ground_truth_type>`
+  - The formatting is as follows `<metric_operation>_<performance_metric>_<references_type>`
   - e.g. `median_mean_absolute_error_time_series`
 - "performance_metrics" - list of metrics to calculate for analysis task
 - "metrics_operations" - contains a mapping of aggregate metric to the operation list to be performed on each metric
-  - The formatting is as follows `<performance_metric>_<ground_truth_type>`
+  - The formatting is as follows `<performance_metric>_<references_type>`
   - e.g. `mean_absolute_error_time_series`
 - "allowable_kwargs" - kwargs for the submission function that are allowed
-- "ground_truth_compare" - results from submission function
+- "references_compare" - results from submission function
 - "public_results_table" - name of json result file that contains information about submission results
 - "private_results_columns" - name of columns that will be in final dataframe that is passed to marimo template
   - will need to contain final metric name to be used in marimo template
-  - The formatting is as follows `<metric_operation>_<performance_metric>_<ground_truth_type>`
+  - The formatting is as follows `<metric_operation>_<performance_metric>_<references_type>`
 
 ### system_metadata.csv
 

diff --git a/ec2/analysis-tasks/az-tilt-estimation/assets/config.json b/ec2/analysis-tasks/az-tilt-estimation/assets/config.json
@@ -2,9 +2,19 @@
 	"category_name": "az_tilt_estimation",
 	"function_name": "estimate_az_tilt",
 	"comparison_type": "scalar",
-	"performance_metrics": [ "runtime", "absolute_error" ],
-	"allowable_kwargs": [ "latitude", "longitude", "data_sampling_frequency"],
-	"ground_truth_compare": [ "azimuth", "tilt" ],
+	"performance_metrics": [
+		"runtime",
+		"absolute_error"
+	],
+	"allowable_kwargs": [
+		"latitude",
+		"longitude",
+		"data_sampling_frequency"
+	],
+	"references_compare": [
+		"azimuth",
+		"tilt"
+	],
 	"public_results_table": "az-tilt-public-metrics.json",
 	"private_results_columns": [
 		"system_id",

diff --git a/ec2/analysis-tasks/az-tilt-estimation/config.json b/ec2/analysis-tasks/az-tilt-estimation/config.json
@@ -42,7 +42,7 @@
 		"latitude",
 		"longitude"
 	],
-	"ground_truth_compare": [
+	"references_compare": [
 		"azimuth",
 		"tilt"
 	],

diff --git a/...lt-estimation/data/ground-truth/README.md → ...tilt-estimation/data/references/README.md b/...lt-estimation/data/ground-truth/README.md → ...tilt-estimation/data/references/README.md
diff --git a/ec2/analysis-tasks/time-shift-detection/config.json b/ec2/analysis-tasks/time-shift-detection/config.json
@@ -24,7 +24,7 @@
     "longitude",
     "data_sampling_frequency"
   ],
-  "ground_truth_compare": [
+  "references_compare": [
     "time_series"
   ],
   "public_results_table": "time-shift-public-metrics.json",

diff --git a/...ift-detection/data/ground-truth/README.md → ...shift-detection/data/references/README.md b/...ift-detection/data/ground-truth/README.md → ...shift-detection/data/references/README.md
diff --git a/ec2/config.json b/ec2/config.json
@@ -24,7 +24,7 @@
     "longitude",
     "data_sampling_frequency"
   ],
-  "ground_truth_compare": [
+  "references_compare": [
     "time_series"
   ],
   "public_results_table": "time-shift-public-metrics.json",

diff --git a/ec2/insert_analysis.py b/ec2/insert_analysis.py
@@ -51,7 +51,7 @@ class TaskConfig(TypedDict):
     performance_metrics: list[str]
     metrics_operations: dict[str, list[str]]
     allowable_kwargs: list[str]
-    ground_truth_compare: list[str]
+    references_compare: list[str]
     public_results_table: str
     private_results_columns: list[str]
 
@@ -120,7 +120,7 @@ def __init__(
         self.markdown_files_folder_path = markdown_files_folder_path
         self.front_end_assets_folder_path = front_end_assets_folder_path
         self.data_files_hash = ""
-        self.ground_truth_files_hash = ""
+        self.references_files_hash = ""
         self.combined_hash = ""
         self.db_hash = ""
 
@@ -378,8 +378,6 @@ def createFileMetadata(self, file_metadata_df: pd.DataFrame):
         s3_path: String. S3 path that we want to write the files to.
         """
 
-        # s3_data_files = list_s3_bucket(self.is_local, self.s3_bucket_name, "data_files/analytical/")
-
         body = file_metadata_df.to_json(orient="records")  # type: ignore
         metadata_json_list = json.loads(body)
 
@@ -404,7 +402,7 @@ def createFileMetadata(self, file_metadata_df: pd.DataFrame):
             local_path = os.path.join(
                 self.file_data_folder_path, metadata["file_name"]
             )
-            upload_path = f'data_files/analytical/{metadata["file_name"]}'
+            upload_path = f'data_files/files/{metadata["file_name"]}'
 
             # upload metadata to s3
             upload_to_s3_bucket(
@@ -427,7 +425,7 @@ def uploadValidationData(self):
                 self.validation_data_folder_path, file_name
             )
             upload_path = (
-                f"data_files/ground_truth/{str(self.analysis_id)}/{file_name}"
+                f"data_files/references/{str(self.analysis_id)}/{file_name}"
             )
             upload_to_s3_bucket(
                 self.s3_url,
@@ -858,21 +856,21 @@ def createHashofFiles(self):
 
         self.data_files_hash = hash_for_data_files
 
-        ground_truth_files = os.listdir(self.validation_data_folder_path)
+        references_files = os.listdir(self.validation_data_folder_path)
 
-        hash_for_ground_truth_files = get_hash_for_list_of_files(
+        hash_for_references_files = get_hash_for_list_of_files(
             [
                 os.path.join(self.validation_data_folder_path, file)
-                for file in ground_truth_files
+                for file in references_files
             ]
         )
 
-        logger.info(f"Ground truth files hash: {hash_for_ground_truth_files}")
+        logger.info(f"Data files hash: {hash_for_references_files}")
 
-        self.ground_truth_files_hash = hash_for_ground_truth_files
+        self.references_files_hash = hash_for_references_files
 
         self.combined_hash = combine_hashes(
-            [self.data_files_hash, self.ground_truth_files_hash]
+            [self.data_files_hash, self.references_files_hash]
         )
 
         logger.info(f"Combined hash: {self.combined_hash}")
@@ -1055,7 +1053,7 @@ def convert_int(val: str) -> int:
             task_dir, "data/file_metadata.csv"
         )
         validation_data_folder_path = os.path.join(
-            task_dir, "data/ground-truth/"
+            task_dir, "data/references/"
         )
         private_report_template_file_path = os.path.join(
             task_dir, "template.py"

diff --git a/ec2/test_insertion.py b/ec2/test_insertion.py
@@ -43,7 +43,7 @@
             task_dir, "data/file_metadata.csv"
         )
         validation_data_folder_path = os.path.join(
-            task_dir, "data/ground-truth/"
+            task_dir, "data/references/"
         )
         private_report_template_file_path = os.path.join(
             task_dir, "template.py"

diff --git a/pv-validation-hub-client b/pv-validation-hub-client
diff --git a/s3Emulator/Dockerfile b/s3Emulator/Dockerfile
@@ -25,16 +25,13 @@ WORKDIR /app
 # Install Requirements
 RUN pip install -r requirements.txt
 
-# Move simulated bucket to location 
-# RUN mv ./pv-validation-hub-bucket /
-# RUN mv /time-shift-validation-hub/data/file_data/* /pv-validation-hub-bucket/data_files/analytical/
-# RUN mv /time-shift-validation-hub/data/validation_data/* /pv-validation-hub-bucket/data_files/ground_truth/
+
 
 # Make simulated bucket in location
 RUN mkdir /pv-validation-hub-bucket
 RUN mkdir /pv-validation-hub-bucket/data_files
-RUN mkdir /pv-validation-hub-bucket/data_files/analytical
-RUN mkdir /pv-validation-hub-bucket/data_files/ground_truth
+RUN mkdir /pv-validation-hub-bucket/data_files/files
+RUN mkdir /pv-validation-hub-bucket/data_files/references
 RUN mkdir /pv-validation-hub-bucket/submission_files
 
 # Install the required dependencies

diff --git a/s3Emulator/preload/s3FileStruct.json b/s3Emulator/preload/s3FileStruct.json
@@ -1,8 +1,8 @@
 {
-    "pv-validation-hub-bucket":{
+    "pv-validation-hub-bucket": {
         "data_files": {
-            "analytical": "/",
-            "ground_truth": "/"
+            "files": "/",
+            "references": "/"
         },
         "evaluation_scripts": {
             "example-1": "/",

diff --git a/valhub/base/errorcodes.json b/valhub/base/errorcodes.json
@@ -7,7 +7,7 @@
         "5": "Configuration file not found in current evaluation directory",
         "6": "Required function name not found within submission python file",
         "7": "Failure cutoff met for submission evaluation and execution has been terminated",
-        "8": "Submission result length does not match ground truth length",
+        "8": "Submission result length does not match reference length",
         "500": "Internal server error"
     },
     "wr": {
@@ -19,8 +19,8 @@
         "6": "Required evaluation files not found in s3 bucket",
         "7": "File metadata for file ID not found in API",
         "8": "No file metadata found in API for analysis ID",
-        "9": "Not all ground truth data files found in s3 bucket for analysis",
-        "10": "Not all analytical data files found in s3 bucket for analysis",
+        "9": "Not all reference data files found in s3 bucket for analysis",
+        "10": "Not all data files found in s3 bucket for analysis",
         "11": "Runner module does not have a 'run' function",
         "12": "Error posting Error Report to API",
         "13": "API did not return a valid response",

diff --git a/workers/README.md b/workers/README.md
@@ -14,7 +14,7 @@ The worker will create a docker image from the user's submission given the speci
 
 1. Worker will initialize itself and run in an infinite loop checking if there are any messages within the AWS SQS Queue
 2. Once the worker finds that a message exists in the AWS SQS Queue it will remove the message from the Queue and process the submission
-3. The worker will download all the ground truth and data files, the metadata, the user submission zip, and any other files associated with the analysis task
+3. The worker will download all the reference and data files, the metadata, the user submission zip, and any other files associated with the analysis task
 4. The worker will then create a docker image from the user's submission from the provided metadata
 5. The worker will then create multiple docker containers depending on how much memory is available to process each data file from the analysis task
 6. The result from every docker container will be saved in multiple files to the worker machine

diff --git a/workers/src/errorcodes.json b/workers/src/errorcodes.json
@@ -7,7 +7,7 @@
         "5": "Configuration file not found in current evaluation directory",
         "6": "Required function name not found within submission python file",
         "7": "Failure cutoff met for submission evaluation and execution has been terminated",
-        "8": "Submission result length does not match ground truth length",
+        "8": "Submission result length does not match reference length",
         "500": "Internal server error"
     },
     "wr": {
@@ -19,8 +19,8 @@
         "6": "Required evaluation files not found in s3 bucket",
         "7": "File metadata for file ID not found in API",
         "8": "No file metadata found in API for analysis ID",
-        "9": "Not all ground truth data files found in s3 bucket for analysis",
-        "10": "Not all analytical data files found in s3 bucket for analysis",
+        "9": "Not all reference data files found in s3 bucket for analysis",
+        "10": "Not all data files found in s3 bucket for analysis",
         "11": "Runner module does not have a 'run' function",
         "12": "Error posting Error Report to API",
         "13": "API did not return a valid response",

diff --git a/workers/src/metric_operations.py b/workers/src/metric_operations.py
@@ -26,24 +26,24 @@ def m_median(df: pd.DataFrame, column: str):
 # ----------------------------
 
 
-def p_absolute_error(output: pd.Series[float], ground_truth: pd.Series[float]):
-    difference: pd.Series[float] = output - ground_truth
+def p_absolute_error(output: pd.Series[float], references: pd.Series[float]):
+    difference: pd.Series[float] = output - references
     absolute_difference = np.abs(difference)
     return absolute_difference
 
 
 def p_mean_absolute_error(
-    output: pd.Series[float], ground_truth: pd.Series[float]
+    output: pd.Series[float], references: pd.Series[float]
 ):
-    output.index = ground_truth.index
-    difference: pd.Series[float] = output - ground_truth
+    output.index = references.index
+    difference: pd.Series[float] = output - references
     absolute_difference = np.abs(difference)
     mean_absolute_error = np.mean(absolute_difference)
     return mean_absolute_error
 
 
-def p_error(output: pd.Series[float], ground_truth: pd.Series[float]):
-    difference: pd.Series[float] = output - ground_truth
+def p_error(output: pd.Series[float], references: pd.Series[float]):
+    difference: pd.Series[float] = output - references
     return difference
+1 −1		.gitignore
+0 −0		public/static/assets/analysis/placeholder.md
+365 −347		src/app/modules/mysubmissions/reportItem.tsx
+162 −150		src/services/submission_service.tsx