From fef6029331b895970662a664859efb54b4b6335a Mon Sep 17 00:00:00 2001
From: Benjamin Cretois <benjamin.cretois@nina.no>
Date: Fri, 10 Jan 2025 10:33:19 +0100
Subject: [PATCH] [ADD] to_annotation_sheet.sh

---
 README.md              | 18 +++++++++++++++++-
 to_annotation_sheet.sh | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100755 to_annotation_sheet.sh

diff --git a/README.md b/README.md
index 861faa8..cb9959b 100644
--- a/README.md
+++ b/README.md
@@ -76,12 +76,28 @@ python3 src/global_sampler.py
 
 :star: Note that this `parquet` file will contain `$NUM_SEGMENT` random segments with the `$THRESHOLD` indicated in the `config_connection.yaml`. 
 
-3- Extract the detections!
+4- Extract the detections!
 
 ```bash
 ./extract.sh
 ```
 
+## Format for annotation
+
+To annotate the extracted segments, we create a `csv` file per species. The `csv` list the segments to annotate and add a few columns for the annotator to fill. To help create the `csv` files, the repository contains a bash script `to_annotation_sheet.sh`.
+
+Be sure to change the input and output in the script L4 and L5:
+
+```bash
+BASE_DIR= ./extracted_segments # Folder where all the segments are stored 
+OUTPUT_DIR= ./csv # Folder that receives the csv files
+```
+Then run the script:
+
+```bash
+./to_annotation_sheet.sh
+```
+
 
 
 
diff --git a/to_annotation_sheet.sh b/to_annotation_sheet.sh
new file mode 100755
index 0000000..93d4edd
--- /dev/null
+++ b/to_annotation_sheet.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Define the base directory containing the species folders and output directory
+BASE_DIR="for_tom/segments"
+OUTPUT_DIR="/home/benjamin.cretois/Code/birdnetfs/for_tom/csv"
+
+# Create the output directory if it doesn't exist
+mkdir -p "$OUTPUT_DIR"
+
+# Header for the CSV file
+CSV_HEADER="Filename,Species (BirdNET),BirdNET correct? (Yes/No),If BirdNET incorrect, true species ID or sound source?,How confident are you? (High/Medium/Low),Reason for misidentification? (Related species /Anthropogenic sound/Mimic/No call/Other),Comments"
+
+# Iterate over each species folder in the base directory
+for species_folder in "$BASE_DIR"/*; do
+    if [ -d "$species_folder" ]; then
+        # Get the species name from the folder name
+        species_name=$(basename "$species_folder")
+
+        # Define the output CSV file
+        csv_file="$OUTPUT_DIR/$species_name.csv"
+
+        # Write the CSV header to the file
+        echo "$CSV_HEADER" > "$csv_file"
+
+        # Iterate over the files in the species folder
+        for file in "$species_folder"/*; do
+            if [ -f "$file" ]; then
+                # Get the filename without the path
+                filename=$(basename "$file")
+
+                # Add a row to the CSV with the filename and the species name
+                echo "$filename,$species_name,,,,,," >> "$csv_file"
+            fi
+        done
+    fi
+done
+
+echo "CSV generation complete. Files saved in $OUTPUT_DIR."
\ No newline at end of file