diff --git a/src/analysis.py b/src/analysis.py
new file mode 100644
index 0000000..bac087f
--- /dev/null
+++ b/src/analysis.py
@@ -0,0 +1,66 @@
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import StringIO
+import csv
+
+#code
+def perform_analyses(data_file):
+    try:
+        # Open file in read mode
+        data = 'data.csv'
+
+        with open(data, 'r') as f:
+            file_content = f.read()
+
+
+        # Try to parse as JSON
+        try:
+            data = json.loads(file_content)
+            is_json = True
+        except json.JSONDecodeError:
+            is_json = False
+
+        # If JSON parsing failed, try parsing as CSV
+        if not is_json:
+            try:
+                # If the file has headers
+                data = list(csv.DictReader(StringIO(file_content)))
+                is_csv = True
+            except csv.Error:
+                is_csv = False
+
+        if not is_json and not is_csv:
+            raise ValueError("The file format is not supported.")
+
+        if is_json and not isinstance(data, list):
+            raise ValueError("The data should be formatted as a list of objects.")
+
+        df = pd.DataFrame(data)
+
+        # Display basic statistics
+        print("Total Websites Analyzed:", df.shape[0])
+        print("Average character count:", df['char_count'].mean())
+        print("Average image count:", df['image_count'].mean())
+
+        # plot character count histogram
+        plt.figure(figsize=(10, 5))
+        df['char_count'].hist(bins=50)
+        plt.title('Character count histogram')
+        plt.xlabel('Character count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+        # plot image count histogram
+        plt.figure(figsize=(10, 5))
+        df['image_count'].hist(bins=50)
+        plt.title('Image count histogram')
+        plt.xlabel('Image count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+    except ValueError as e:
+        print(e)
+
+    except FileNotFoundError:
+        print("The file was not found")
\ No newline at end of file
diff --git a/src/analyze_data.py b/src/analyze_data.py
deleted file mode 100644
index 514993d..0000000
--- a/src/analyze_data.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from sklearn.cluster import KMeans
-
-def analyze_data(data):
-    if data is not None and 'Data' in data.columns:
-        # Calculate average word count and image count
-        data['WordCount'] = data['Data'].apply(lambda x: len(re.findall(r'\b\w+\b', str(x))))
-        data['ImageCount'] = data['Data'].apply(count_images)
-
-        # Check if 'WordCount' and 'ImageCount' columns exist in the DataFrame
-        if 'WordCount' in data.columns and 'ImageCount' in data.columns:
-            average_word_count = data['WordCount'].mean()
-            average_image_count = data['ImageCount'].mean()
-
-            return {'average_word_count': average_word_count, 'average_image_count': average_image_count}
-        else:
-            return None
-    else:
-        return None
-
-
-def count_images(text):
-    img_tags = re.findall(r'<img[^>]+>', str(text))
-    return len(img_tags)
-
-
-def perform_cluster_analysis(data):
-    # Analyze the data
-    # TODO Why are we calling analyze_data() here?
-    analysis_result = analyze_data(data)
-
-    # Extract relevant features for clustering
-    features = data[['WordCount', 'ImageCount']]
-
-    # Check if there are any samples to cluster
-    if not features.empty:
-        # Use K-Means clustering
-        kmeans = KMeans(n_clusters=3, random_state=42)  
-        data['Cluster'] = kmeans.fit_predict(features)
-
-        return data[['Data', 'Cluster']]
-    else:
-        # Handle the case when there are no samples to cluster
-        return None
-
diff --git a/src/test_analysis.ipynb b/src/test_analysis.ipynb
new file mode 100644
index 0000000..eb281e7
--- /dev/null
+++ b/src/test_analysis.ipynb
@@ -0,0 +1,103 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from io import StringIO\n",
+    "import csv\n",
+    "import unittest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def perform_analyses(data_file):\n",
+    "    try:\n",
+    "        # Open file in read mode\n",
+    "        with open(data_file, 'r') as f:\n",
+    "            file_content = f.read()\n",
+    "\n",
+    "        # Try to parse as JSON\n",
+    "        try:\n",
+    "            data = json.loads(file_content)\n",
+    "            is_json = True\n",
+    "        except json.JSONDecodeError:\n",
+    "            is_json = False\n",
+    "\n",
+    "        # If JSON parsing failed, try parsing as CSV\n",
+    "        if not is_json:\n",
+    "            try:\n",
+    "                # If the file has headers\n",
+    "                data = list(csv.DictReader(StringIO(file_content)))\n",
+    "                is_csv = True\n",
+    "            except csv.Error:\n",
+    "                is_csv = False\n",
+    "\n",
+    "        if not is_json and not is_csv:\n",
+    "            raise ValueError(\"The file format is not supported.\")\n",
+    "\n",
+    "        if is_json and not isinstance(data, list):\n",
+    "            raise ValueError(\"The data should be formatted as a list of objects.\")\n",
+    "\n",
+    "        df = pd.DataFrame(data)\n",
+    "\n",
+    "        # Display basic statistics\n",
+    "        print(\"Total Websites Analyzed:\", df.shape[0])\n",
+    "        print(\"Average character count:\", str(df['char_count'].mean()))\n",
+    "        print(\"Average image count:\", str(df['image_count'].mean()))\n",
+    "\n",
+    "        # plot character count histogram\n",
+    "        plt.figure(figsize=(10, 5))\n",
+    "        df['char_count'].hist(bins=50)\n",
+    "        plt.title('Character count histogram')\n",
+    "        plt.xlabel('Character count')\n",
+    "        plt.ylabel('Frequency')\n",
+    "        plt.show()\n",
+    "\n",
+    "        # plot image count histogram\n",
+    "        plt.figure(figsize=(10, 5))\n",
+    "        df['image_count'].hist(bins=50)\n",
+    "        plt.title('Image count histogram')\n",
+    "        plt.xlabel('Image count')\n",
+    "        plt.ylabel('Frequency')\n",
+    "        plt.show()\n",
+    "\n",
+    "    except ValueError as e:\n",
+    "        print(e)\n",
+    "\n",
+    "    except FileNotFoundError:\n",
+    "        print(\"The file was not found\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/test_analysis.py b/src/test_analysis.py
new file mode 100644
index 0000000..40d52f7
--- /dev/null
+++ b/src/test_analysis.py
@@ -0,0 +1,67 @@
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import StringIO
+import csv
+import unittest
+
+# code
+def perform_analyses(data_file):
+    try:
+        # Open file in read mode
+        with open(data_file, 'r') as f:
+            file_content = f.read()
+
+        # Try to parse as JSON
+        try:
+            data = json.loads(file_content)
+            is_json = True
+        except json.JSONDecodeError:
+            is_json = False
+
+        # If JSON parsing failed, try parsing as CSV
+        if not is_json:
+            try:
+                # If the file has headers
+                data = list(csv.DictReader(StringIO(file_content)))
+                is_csv = True
+            except csv.Error:
+                is_csv = False
+
+        if not is_json and not is_csv:
+            raise ValueError("The file format is not supported.")
+
+        if is_json and not isinstance(data, list):
+            raise ValueError("The data should be formatted as a list of objects.")
+
+        df = pd.DataFrame(data)
+
+        # Display basic statistics
+        print("Total Websites Analyzed:", df.shape[0])
+        print("Average character count:", str(df['char_count'].mean()))
+        print("Average image count:", str(df['image_count'].mean()))
+
+        # plot character count histogram
+        plt.figure(figsize=(10, 5))
+        df['char_count'].hist(bins=50)
+        plt.title('Character count histogram')
+        plt.xlabel('Character count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+        # plot image count histogram
+        plt.figure(figsize=(10, 5))
+        df['image_count'].hist(bins=50)
+        plt.title('Image count histogram')
+        plt.xlabel('Image count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+    except ValueError as e:
+        print(e)
+
+    except FileNotFoundError:
+        print("The file was not found")
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/test/test _data_analysis.py b/test/test _data_analysis.py
new file mode 100644
index 0000000..40d52f7
--- /dev/null
+++ b/test/test _data_analysis.py	
@@ -0,0 +1,67 @@
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import StringIO
+import csv
+import unittest
+
+# code
+def perform_analyses(data_file):
+    try:
+        # Open file in read mode
+        with open(data_file, 'r') as f:
+            file_content = f.read()
+
+        # Try to parse as JSON
+        try:
+            data = json.loads(file_content)
+            is_json = True
+        except json.JSONDecodeError:
+            is_json = False
+
+        # If JSON parsing failed, try parsing as CSV
+        if not is_json:
+            try:
+                # If the file has headers
+                data = list(csv.DictReader(StringIO(file_content)))
+                is_csv = True
+            except csv.Error:
+                is_csv = False
+
+        if not is_json and not is_csv:
+            raise ValueError("The file format is not supported.")
+
+        if is_json and not isinstance(data, list):
+            raise ValueError("The data should be formatted as a list of objects.")
+
+        df = pd.DataFrame(data)
+
+        # Display basic statistics
+        print("Total Websites Analyzed:", df.shape[0])
+        print("Average character count:", str(df['char_count'].mean()))
+        print("Average image count:", str(df['image_count'].mean()))
+
+        # plot character count histogram
+        plt.figure(figsize=(10, 5))
+        df['char_count'].hist(bins=50)
+        plt.title('Character count histogram')
+        plt.xlabel('Character count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+        # plot image count histogram
+        plt.figure(figsize=(10, 5))
+        df['image_count'].hist(bins=50)
+        plt.title('Image count histogram')
+        plt.xlabel('Image count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+    except ValueError as e:
+        print(e)
+
+    except FileNotFoundError:
+        print("The file was not found")
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file