group_12_ml_code.py

# -*- coding: utf-8 -*-
"""ML_Project

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1u2vJP5cQVY8IedDSPKH2vbrfZhxYR_uJ
"""

#imported all the libraries reqeuired in the code
import pandas as pd 
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

#define a normalise functioon according to formula given in research paper
def norm(list):
  mini = min(list)
  maxi = max(list)
  for i in range(len(list)):
    list[i] = 0.1 + (0.8*(list[i]-mini))/(maxi - mini)
def normalise(list_1, list_2, list_3):
  norm(list_1)
  norm(list_2)
  norm(list_3)

def calculate(rul, rms, amplitude, sizes, learning_bearing, number, something):
  print(something)
  for i in range(len(learning_bearing)):
    if((something == "Test_set" and i>1) or something=="Learning_set"):
      k=pd.read_csv("https://raw.githubusercontent.com/wkzs111/phm-ieee-2012-data-challenge-dataset/master/" + something + "/Bearing" + "{:01d}".format(number) + "_" + "{:01d}".format(i+1) + "/acc_0"+"{:04d}".format(learning_bearing[i]-1)+".csv", header=None)

      finalTime = (k.iloc[-1][0]*3600) + (k.iloc[-1][1]*60) + k.iloc[-1][2]

      for j in range(1, learning_bearing[i]):
        f=pd.read_csv("https://raw.githubusercontent.com/wkzs111/phm-ieee-2012-data-challenge-dataset/master/" + something +"/Bearing" + "{:01d}".format(number) + "_" + "{:01d}".format(i+1) + "/acc_0"+"{:04d}".format(j)+".csv", names = ["hr", "min", "sec", "msec", "horacc", "veracc"])

        #rul
        currentTime = (f.iloc[-1][0]*3600) + (f.iloc[-1][1]*60) + f.iloc[-1][2]
        rul.append(finalTime - currentTime)

        # #RMS
        currAcceleration = f.iloc[:, [4]]
        MSE = np.square(currAcceleration).mean() 
        RMSE = math.sqrt(MSE)
        rms.append(RMSE)

        #Amplitude
        AMPL = math.sqrt(2)*(RMSE)
        amplitude.append(AMPL)

        #size
        sizes.append(k.size)

new_rms = []
new_amplitude = []
new_sizes = []
new_rul = []

# learning_bearing_1 = [2804, 872]
learning_bearing_1 = [2804]
learning_bearing_2 = [912, 798]
learning_bearing_3 = [516, 1638]
# testing_bearing_1 = [0, 0, 1803, 1140, 2303, 2303, 1503]
# testing_bearing_2 = [0, 0, 572, 172, 1203, 613, 2003, 573] #from 6 to 7
testing_bearing_3 = [0, 0, 353]

calculate(new_rul, new_rms, new_amplitude, new_sizes, learning_bearing_1, 1, "Learning_set")

#call normalise on all the lists
normalise(new_rul, new_amplitude, new_rms)

#create dataframe using obtained normalised lists
df = pd.DataFrame(list(zip(new_rms, new_amplitude, new_rul)), columns =['RMS', 'Amplitude', 'RUL'])

testing_new_rms = []
testing_new_amplitude = []
testing_new_sizes = []
testing_new_rul = []
# print(len(testing_bearing_3))
# calculate(testing_new_rul, testing_new_rms, testing_new_amplitude, testing_new_sizes, testing_bearing_3, 3, "Test_set")

#call normalise on all the lists
normalise(testing_new_rul, testing_new_amplitude, testing_new_rms)
#
#create dataframe using obtained normalised lists
dfp = pd.DataFrame(list(zip(testing_new_rms, testing_new_amplitude, testing_new_rul)), columns =['RMS', 'Amplitude', 'RUL'])

# separate into input and output columns
# X_train, y_train = df.iloc[:, :-1], df.iloc[:, -1]
# X_test, y_test = dfp.iloc[:, :-1], dfp.iloc[:, -1]
X, y = df.iloc[:, :-1], df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=51)
model = SVR(kernel = 'rbf',epsilon=0.1,gamma=0.5,)
model.fit(X, y)
model.score(X_test, y_test)