ADORSYS-GIS · Motouom · Dec 2, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/src/analysis.py b/src/analysis.py
@@ -0,0 +1,66 @@
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import StringIO
+import csv
+
+#code
+def perform_analyses(data_file):
+    try:
+        # Open file in read mode
+        data = 'data.csv'
+
+        with open(data, 'r') as f:
+            file_content = f.read()
+
+
+        # Try to parse as JSON
+        try:
+            data = json.loads(file_content)
+            is_json = True
+        except json.JSONDecodeError:
+            is_json = False
+
+        # If JSON parsing failed, try parsing as CSV
+        if not is_json:
+            try:
+                # If the file has headers
+                data = list(csv.DictReader(StringIO(file_content)))
+                is_csv = True
+            except csv.Error:
+                is_csv = False
+
+        if not is_json and not is_csv:
+            raise ValueError("The file format is not supported.")
+
+        if is_json and not isinstance(data, list):
+            raise ValueError("The data should be formatted as a list of objects.")
+
+        df = pd.DataFrame(data)
+
+        # Display basic statistics
+        print("Total Websites Analyzed:", df.shape[0])
+        print("Average character count:", df['char_count'].mean())
+        print("Average image count:", df['image_count'].mean())
+
+        # plot character count histogram
+        plt.figure(figsize=(10, 5))
+        df['char_count'].hist(bins=50)
+        plt.title('Character count histogram')
+        plt.xlabel('Character count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+        # plot image count histogram
+        plt.figure(figsize=(10, 5))
+        df['image_count'].hist(bins=50)
+        plt.title('Image count histogram')
+        plt.xlabel('Image count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+    except ValueError as e:
+        print(e)
+
+    except FileNotFoundError:
+        print("The file was not found")
diff --git a/src/analyze_data.py b/src/analyze_data.py
diff --git a/src/test_analysis.ipynb b/src/test_analysis.ipynb
@@ -0,0 +1,103 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from io import StringIO\n",
+    "import csv\n",
+    "import unittest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def perform_analyses(data_file):\n",
+    "    try:\n",
+    "        # Open file in read mode\n",
+    "        with open(data_file, 'r') as f:\n",
+    "            file_content = f.read()\n",
+    "\n",
+    "        # Try to parse as JSON\n",
+    "        try:\n",
+    "            data = json.loads(file_content)\n",
+    "            is_json = True\n",
+    "        except json.JSONDecodeError:\n",
+    "            is_json = False\n",
+    "\n",
+    "        # If JSON parsing failed, try parsing as CSV\n",
+    "        if not is_json:\n",
+    "            try:\n",
+    "                # If the file has headers\n",
+    "                data = list(csv.DictReader(StringIO(file_content)))\n",
+    "                is_csv = True\n",
+    "            except csv.Error:\n",
+    "                is_csv = False\n",
+    "\n",
+    "        if not is_json and not is_csv:\n",
+    "            raise ValueError(\"The file format is not supported.\")\n",
+    "\n",
+    "        if is_json and not isinstance(data, list):\n",
+    "            raise ValueError(\"The data should be formatted as a list of objects.\")\n",
+    "\n",
+    "        df = pd.DataFrame(data)\n",
+    "\n",
+    "        # Display basic statistics\n",
+    "        print(\"Total Websites Analyzed:\", df.shape[0])\n",
+    "        print(\"Average character count:\", str(df['char_count'].mean()))\n",
+    "        print(\"Average image count:\", str(df['image_count'].mean()))\n",
+    "\n",
+    "        # plot character count histogram\n",
+    "        plt.figure(figsize=(10, 5))\n",
+    "        df['char_count'].hist(bins=50)\n",
+    "        plt.title('Character count histogram')\n",
+    "        plt.xlabel('Character count')\n",
+    "        plt.ylabel('Frequency')\n",
+    "        plt.show()\n",
+    "\n",
+    "        # plot image count histogram\n",
+    "        plt.figure(figsize=(10, 5))\n",
+    "        df['image_count'].hist(bins=50)\n",
+    "        plt.title('Image count histogram')\n",
+    "        plt.xlabel('Image count')\n",
+    "        plt.ylabel('Frequency')\n",
+    "        plt.show()\n",
+    "\n",
+    "    except ValueError as e:\n",
+    "        print(e)\n",
+    "\n",
+    "    except FileNotFoundError:\n",
+    "        print(\"The file was not found\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/test_analysis.py b/src/test_analysis.py
@@ -0,0 +1,67 @@
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import StringIO
+import csv
+import unittest
+
+# code
+def perform_analyses(data_file):
+    try:
+        # Open file in read mode
+        with open(data_file, 'r') as f:
+            file_content = f.read()
+
+        # Try to parse as JSON
+        try:
+            data = json.loads(file_content)
+            is_json = True
+        except json.JSONDecodeError:
+            is_json = False
+
+        # If JSON parsing failed, try parsing as CSV
+        if not is_json:
+            try:
+                # If the file has headers
+                data = list(csv.DictReader(StringIO(file_content)))
+                is_csv = True
+            except csv.Error:
+                is_csv = False
+
+        if not is_json and not is_csv:
+            raise ValueError("The file format is not supported.")
+
+        if is_json and not isinstance(data, list):
+            raise ValueError("The data should be formatted as a list of objects.")
+
+        df = pd.DataFrame(data)
+
+        # Display basic statistics
+        print("Total Websites Analyzed:", df.shape[0])
+        print("Average character count:", str(df['char_count'].mean()))
+        print("Average image count:", str(df['image_count'].mean()))
+
+        # plot character count histogram
+        plt.figure(figsize=(10, 5))
+        df['char_count'].hist(bins=50)
+        plt.title('Character count histogram')
+        plt.xlabel('Character count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+        # plot image count histogram
+        plt.figure(figsize=(10, 5))
+        df['image_count'].hist(bins=50)
+        plt.title('Image count histogram')
+        plt.xlabel('Image count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+    except ValueError as e:
+        print(e)
+
+    except FileNotFoundError:
+        print("The file was not found")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test _data_analysis.py b/test/test _data_analysis.py
@@ -0,0 +1,67 @@
+import json
+import pandas as pd
+import matplotlib.pyplot as plt
+from io import StringIO
+import csv
+import unittest
+
+# code
+def perform_analyses(data_file):
+    try:
+        # Open file in read mode
+        with open(data_file, 'r') as f:
+            file_content = f.read()
+
+        # Try to parse as JSON
+        try:
+            data = json.loads(file_content)
+            is_json = True
+        except json.JSONDecodeError:
+            is_json = False
+
+        # If JSON parsing failed, try parsing as CSV
+        if not is_json:
+            try:
+                # If the file has headers
+                data = list(csv.DictReader(StringIO(file_content)))
+                is_csv = True
+            except csv.Error:
+                is_csv = False
+
+        if not is_json and not is_csv:
+            raise ValueError("The file format is not supported.")
+
+        if is_json and not isinstance(data, list):
+            raise ValueError("The data should be formatted as a list of objects.")
+
+        df = pd.DataFrame(data)
+
+        # Display basic statistics
+        print("Total Websites Analyzed:", df.shape[0])
+        print("Average character count:", str(df['char_count'].mean()))
+        print("Average image count:", str(df['image_count'].mean()))
+
+        # plot character count histogram
+        plt.figure(figsize=(10, 5))
+        df['char_count'].hist(bins=50)
+        plt.title('Character count histogram')
+        plt.xlabel('Character count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+        # plot image count histogram
+        plt.figure(figsize=(10, 5))
+        df['image_count'].hist(bins=50)
+        plt.title('Image count histogram')
+        plt.xlabel('Image count')
+        plt.ylabel('Frequency')
+        plt.show()
+
+    except ValueError as e:
+        print(e)
+
+    except FileNotFoundError:
+        print("The file was not found")
+
+if __name__ == '__main__':
+    unittest.main()