Skip to content

Commit

Permalink
Update Soms_FraudDetection.py
Browse files Browse the repository at this point in the history
  • Loading branch information
jzsmoreno committed Nov 8, 2024
1 parent 3f57c42 commit 5a7b3b9
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 24 deletions.
5 changes: 2 additions & 3 deletions forecasting/app.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import numpy as np
import pandas as pd
import streamlit as st
from likelihood.tools import *
from tensorflow.keras.models import load_model

from figure import *
from likelihood.tools import *

# This files are in the forecasting folder
from series import *
from tensorflow.keras.models import load_model

np.random.seed(0)
neural_network = load_model("forecasting/models/model_tensor.h5")
Expand Down
96 changes: 81 additions & 15 deletions fraud_detection/Soms_FraudDetection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
@author: J. Ivan Avalos
"""

import pickle

import numpy as np
import pandas as pd
from minisom import MiniSom
from pylab import bone, colorbar, pcolor, plot, show
from sklearn.preprocessing import MinMaxScaler


Expand Down Expand Up @@ -39,7 +40,7 @@ def transformData(features):
"""


def somTrained(features, x=10, y=10, sigma=1.0, learning_rate=0.3, num_iteration=100):
def somTrained(features, x=10, y=10, sigma=0.5, learning_rate=0.01, num_iteration=1000):
num_features = features.shape[1]
som = MiniSom(x=x, y=y, input_len=num_features, sigma=sigma, learning_rate=learning_rate)
som.random_weights_init(features)
Expand All @@ -52,8 +53,9 @@ def getFrauds(som, features, dist_int, sc):
mappings = som.win_map(features)

# Obtengo los indices de los clusters
distance_map = som.distance_map().round(1)
bestIdx = [[i, j] for i in range(10) for j in range(10) if (distance_map[i, j] >= dist_int)]
distance_map = som.distance_map().round(2)
n = distance_map.shape[0]
bestIdx = [[i, j] for i in range(n) for j in range(n) if (distance_map[i, j] >= dist_int)]

# Obtengo los potenciales fraudes
fraud_list = [] # Arreglo de numpys con los posibles fraudes
Expand All @@ -71,14 +73,78 @@ def getFrauds(som, features, dist_int, sc):
return fraud_inverse_transformed


def getAccuracy(dataset, fraud_id):
right_prediction_index = []
wrong_prediction_index = []
for fraudsbySom in fraud_id:
for index, fraudsTrue in enumerate(dataset["CustomerID"]):
if fraudsbySom == fraudsTrue:
if dataset["Class"][index] == 0:
right_prediction_index.append(index)
else:
wrong_prediction_index.append(index)
return (len(right_prediction_index) / len(fraud_id)) * 100
def getMetrics(dataset, fraud_id):
# Variables to keep track of the number of correct and total predictions
true_positives = 0 # Correctly predicted frauds
true_negatives = 0 # Correctly predicted non-frauds
false_positives = 0 # Non-frauds predicted as frauds
false_negatives = 0 # Frauds predicted as non-frauds
total_predictions = len(dataset)

for index, customer_id in enumerate(dataset["CustomerID"]):
actual_class = dataset["Class"][index]

# Check if the current customer is a fraud
is_fraud = customer_id in fraud_id

# Update confusion matrix counts
if actual_class == 1 and is_fraud: # True positive
true_positives += 1
elif actual_class == 0 and not is_fraud: # True negative
true_negatives += 1
elif actual_class == 0 and is_fraud: # False positive
false_positives += 1
elif actual_class == 1 and not is_fraud: # False negative
false_negatives += 1

# Calculate accuracy
accuracy = (true_positives + true_negatives) / total_predictions * 100

# Calculate precision
if true_positives + false_positives > 0:
precision = true_positives / (true_positives + false_positives) * 100
else:
precision = 0 # Avoid division by zero

# Calculate recall
if true_positives + false_negatives > 0:
recall = true_positives / (true_positives + false_negatives) * 100
else:
recall = 0 # Avoid division by zero

# Calculate F1-Score
if precision + recall > 0:
f1_score = 2 * (precision * recall) / (precision + recall)
else:
f1_score = 0 # Avoid division by zero

# Output the metrics
print("MinSom accuracy : ", accuracy)
print("MinSom precision : ", precision)
print("MinSom recall : ", recall)
print("MinSom F1-score : ", f1_score)

return accuracy


def load_model(filepath):
# Load the trained model from the file
model = pickle.load(open(filepath, "rb"))
return model


if __name__ == "__main__":
# Cargar datos
dataset, features, isFraud = getData()
features_transformed, sc = transformData(features)
# Obtener los clusters
som = somTrained(features_transformed, 3, 3, 1)
# Obtener los posibles fraudes
fraud_id = getFrauds(som, features_transformed, 0.75, sc)
# Obtener la precisión
metrics = getMetrics(dataset, fraud_id)
filepath = "./fraud_detection/som.p"
with open(filepath, "wb") as outfile:
pickle.dump(som, outfile)

som = load_model(filepath)
12 changes: 6 additions & 6 deletions fraud_detection/app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import streamlit as st
from pylab import bone, colorbar, pcolor, plot, show
from Soms_FraudDetection import getAccuracy, getData, getFrauds, somTrained, transformData
from Soms_FraudDetection import getData, getFrauds, getMetrics, load_model, transformData

# Importacion del conjunto de datos
dataset, features, isFraud = getData()
# Preprocesamiento de los datos
features, sc = transformData(features)

models = {"Self-organizing map": somTrained(features)}
models = {"Self-Organizing Map": load_model("./fraud_detection/som.p")}

# Sección de introducción
st.title("Predicción de fraudes usando mapas autoorganizados")
st.title("Predicción de fraudes usando mapas auto-organizados")
st.write(
"""
* Bienvenid@ a este sencillo ejemplo que ejecuta un modelo entrenado
de IA usando mapas autoorganizados para encontrar potenciales fraudes.
de IA usando mapas auto-organizados para encontrar potenciales fraudes.
* La base de datos utilizada proviene del siguiente link: https://archive.ics.uci.edu/ml/datasets/credit+approval
"""
Expand Down Expand Up @@ -53,7 +53,7 @@
)

# Obtenemos la gráfica de colores
som = models["Self-organizing map"]
som = models["Self-Organizing Map"]
bone()
pcolor(som.distance_map().T)
colorbar()
Expand Down Expand Up @@ -88,7 +88,7 @@


# Obtengo la precición del modelo
acc = getAccuracy(dataset, fraud_id)
acc = getMetrics(dataset, fraud_id)

st.write("* Porcentaje de predicción : ")
st.header(str(round(acc, 2)) + "%")
File renamed without changes.
2 changes: 2 additions & 0 deletions fraud_detection/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
numpy
MiniSom
Binary file added fraud_detection/som.p
Binary file not shown.

0 comments on commit 5a7b3b9

Please sign in to comment.