Update Soms_FraudDetection.py

jzsmoreno · Nov 8, 2024 · 5a7b3b9 · 5a7b3b9
1 parent 3f57c42
commit 5a7b3b9
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 24 deletions.
diff --git a/forecasting/app.py b/forecasting/app.py
@@ -1,13 +1,12 @@
 import numpy as np
 import pandas as pd
 import streamlit as st
-from likelihood.tools import *
-from tensorflow.keras.models import load_model
-
 from figure import *
+from likelihood.tools import *
 
 # This files are in the forecasting folder
 from series import *
+from tensorflow.keras.models import load_model
 
 np.random.seed(0)
 neural_network = load_model("forecasting/models/model_tensor.h5")

diff --git a/fraud_detection/Soms_FraudDetection.py b/fraud_detection/Soms_FraudDetection.py
@@ -4,10 +4,11 @@
 @author: J. Ivan Avalos
 """
 
+import pickle
+
 import numpy as np
 import pandas as pd
 from minisom import MiniSom
-from pylab import bone, colorbar, pcolor, plot, show
 from sklearn.preprocessing import MinMaxScaler
 
 
@@ -39,7 +40,7 @@ def transformData(features):
 """
 
 
-def somTrained(features, x=10, y=10, sigma=1.0, learning_rate=0.3, num_iteration=100):
+def somTrained(features, x=10, y=10, sigma=0.5, learning_rate=0.01, num_iteration=1000):
     num_features = features.shape[1]
     som = MiniSom(x=x, y=y, input_len=num_features, sigma=sigma, learning_rate=learning_rate)
     som.random_weights_init(features)
@@ -52,8 +53,9 @@ def getFrauds(som, features, dist_int, sc):
     mappings = som.win_map(features)
 
     # Obtengo los indices de los clusters
-    distance_map = som.distance_map().round(1)
-    bestIdx = [[i, j] for i in range(10) for j in range(10) if (distance_map[i, j] >= dist_int)]
+    distance_map = som.distance_map().round(2)
+    n = distance_map.shape[0]
+    bestIdx = [[i, j] for i in range(n) for j in range(n) if (distance_map[i, j] >= dist_int)]
 
     # Obtengo los potenciales fraudes
     fraud_list = []  # Arreglo de numpys con los posibles fraudes
@@ -71,14 +73,78 @@ def getFrauds(som, features, dist_int, sc):
     return fraud_inverse_transformed
 
 
-def getAccuracy(dataset, fraud_id):
-    right_prediction_index = []
-    wrong_prediction_index = []
-    for fraudsbySom in fraud_id:
-        for index, fraudsTrue in enumerate(dataset["CustomerID"]):
-            if fraudsbySom == fraudsTrue:
-                if dataset["Class"][index] == 0:
-                    right_prediction_index.append(index)
-            else:
-                wrong_prediction_index.append(index)
-    return (len(right_prediction_index) / len(fraud_id)) * 100
+def getMetrics(dataset, fraud_id):
+    # Variables to keep track of the number of correct and total predictions
+    true_positives = 0  # Correctly predicted frauds
+    true_negatives = 0  # Correctly predicted non-frauds
+    false_positives = 0  # Non-frauds predicted as frauds
+    false_negatives = 0  # Frauds predicted as non-frauds
+    total_predictions = len(dataset)
+
+    for index, customer_id in enumerate(dataset["CustomerID"]):
+        actual_class = dataset["Class"][index]
+
+        # Check if the current customer is a fraud
+        is_fraud = customer_id in fraud_id
+
+        # Update confusion matrix counts
+        if actual_class == 1 and is_fraud:  # True positive
+            true_positives += 1
+        elif actual_class == 0 and not is_fraud:  # True negative
+            true_negatives += 1
+        elif actual_class == 0 and is_fraud:  # False positive
+            false_positives += 1
+        elif actual_class == 1 and not is_fraud:  # False negative
+            false_negatives += 1
+
+    # Calculate accuracy
+    accuracy = (true_positives + true_negatives) / total_predictions * 100
+
+    # Calculate precision
+    if true_positives + false_positives > 0:
+        precision = true_positives / (true_positives + false_positives) * 100
+    else:
+        precision = 0  # Avoid division by zero
+
+    # Calculate recall
+    if true_positives + false_negatives > 0:
+        recall = true_positives / (true_positives + false_negatives) * 100
+    else:
+        recall = 0  # Avoid division by zero
+
+    # Calculate F1-Score
+    if precision + recall > 0:
+        f1_score = 2 * (precision * recall) / (precision + recall)
+    else:
+        f1_score = 0  # Avoid division by zero
+
+    # Output the metrics
+    print("MinSom accuracy : ", accuracy)
+    print("MinSom precision : ", precision)
+    print("MinSom recall : ", recall)
+    print("MinSom F1-score : ", f1_score)
+
+    return accuracy
+
+
+def load_model(filepath):
+    # Load the trained model from the file
+    model = pickle.load(open(filepath, "rb"))
+    return model
+
+
+if __name__ == "__main__":
+    # Cargar datos
+    dataset, features, isFraud = getData()
+    features_transformed, sc = transformData(features)
+    # Obtener los clusters
+    som = somTrained(features_transformed, 3, 3, 1)
+    # Obtener los posibles fraudes
+    fraud_id = getFrauds(som, features_transformed, 0.75, sc)
+    # Obtener la precisión
+    metrics = getMetrics(dataset, fraud_id)
+    filepath = "./fraud_detection/som.p"
+    with open(filepath, "wb") as outfile:
+        pickle.dump(som, outfile)
+
+    som = load_model(filepath)
diff --git a/fraud_detection/app.py b/fraud_detection/app.py
@@ -1,20 +1,20 @@
 import streamlit as st
 from pylab import bone, colorbar, pcolor, plot, show
-from Soms_FraudDetection import getAccuracy, getData, getFrauds, somTrained, transformData
+from Soms_FraudDetection import getData, getFrauds, getMetrics, load_model, transformData
 
 # Importacion del conjunto de datos
 dataset, features, isFraud = getData()
 # Preprocesamiento de los datos
 features, sc = transformData(features)
 
-models = {"Self-organizing map": somTrained(features)}
+models = {"Self-Organizing Map": load_model("./fraud_detection/som.p")}
 
 # Sección de introducción
-st.title("Predicción de fraudes usando mapas autoorganizados")
+st.title("Predicción de fraudes usando mapas auto-organizados")
 st.write(
     """
     * Bienvenid@ a este sencillo ejemplo que ejecuta un modelo entrenado 
-    de IA usando mapas autoorganizados para encontrar potenciales fraudes.
+    de IA usando mapas auto-organizados para encontrar potenciales fraudes.
 
     * La base de datos utilizada proviene del siguiente link: https://archive.ics.uci.edu/ml/datasets/credit+approval
     """
@@ -53,7 +53,7 @@
 )
 
 # Obtenemos la gráfica de colores
-som = models["Self-organizing map"]
+som = models["Self-Organizing Map"]
 bone()
 pcolor(som.distance_map().T)
 colorbar()
@@ -88,7 +88,7 @@
 
 
 # Obtengo la precición del modelo
-acc = getAccuracy(dataset, fraud_id)
+acc = getMetrics(dataset, fraud_id)
 
 st.write("* Porcentaje de predicción : ")
 st.header(str(round(acc, 2)) + "%")
diff --git a/fraud_detection/minisom.py → fraud_detection/legacy_minisom.py b/fraud_detection/minisom.py → fraud_detection/legacy_minisom.py
diff --git a/fraud_detection/requirements.txt b/fraud_detection/requirements.txt
@@ -0,0 +1,2 @@
+numpy
+MiniSom
diff --git a/fraud_detection/som.p b/fraud_detection/som.p