classifiers_output.py

import sys
import pandas as pd
import time
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
import sklearn.metrics as metrics
import os
from pathlib import Path

def RandomForest(perm_list, cat):
    if cat == 'ArtAndDesign':
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\ArtAndDesign.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "Books":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Sparse\Books.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "Comics":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Sparse\Comics.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "MapsAndNavigation":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Sparse\MapsAndNavigation.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "Personalization":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\Personalization.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "Weather":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Sparse\Weather.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "HouseAndHome":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\HouseAndHome.csv"
        category = pd.read_csv(file, encoding='cp1252')
    n_apps = category.shape[0]
    n_permissions = category.shape[1]-3
    perm = list(category.columns[2:-1])
    target_label = category.columns[-1]
    permissions = category[perm]
    target = category[target_label]
    skf = StratifiedKFold(n_splits=10)
    predicted_y = []
    expected_y = []
    for train_index, test_index in skf.split(permissions, target):
        x_train, x_test = permissions.loc[train_index], permissions.loc[test_index]
        y_train, y_test = target[train_index], target[test_index]
        clf = RandomForestClassifier()
        clf.fit(x_train, y_train)
        predicted_y.extend(clf.predict(x_test))
        expected_y.extend(y_test)
    training_accracy = metrics.accuracy_score(expected_y, predicted_y)
    l = clf.predict([perm_list])
    print(l[0])
    
def SVM(perm_list, cat):
    if cat == 'AugmentedReality':
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Sparse\AugmentedReality.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == 'Beauty':
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\Beauty.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "VideoPlayersAndEditors":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\VideoPlayersAndEditors.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "LibrariesAndDemo":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\LibrariesAndDemo.csv"
        category = pd.read_csv(file, encoding='cp1252')
    n_apps = category.shape[0]
    n_permissions = category.shape[1]-3
    perm = list(category.columns[2:-1])
    target_label = category.columns[-1]
    permissions = category[perm]
    target = category[target_label]
    print(permissions.shape)
    print(target.shape)
    skf = StratifiedKFold(n_splits=10)
    predicted_y = []
    expected_y = []
    for train_index, test_index in skf.split(permissions, target):#train_index and test_index are lists of indices in one split
        x_train, x_test = permissions.loc[train_index], permissions.loc[test_index]
        y_train, y_test = target[train_index], target[test_index]
        clf = SVC()
        clf.fit(x_train, y_train)
        predicted_y.extend(clf.predict(x_test))
        expected_y.extend(y_test)
    training_accuracy = metrics.accuracy_score(expected_y, predicted_y)
    clf.predict(perm_list)
    
def bagging(perm_list, cat):
    if cat == "AutoAndVehicles":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Sparse\AutoAndVehicles.csv"
        category = pd.read_csv(file, encoding='cp1252')
    else if cat == "Communication":
        file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\Communication.csv"
        category = pd.read_csv(file, encoding='cp1252')
    n_apps = category.shape[0]
    n_permissions = category.shape[1]-3
    perm = list(category.columns[2:-1])
    target_label = category.columns[-1]
    permissions = category[perm]
    target = category[target_label]
    print(permissions.shape)
    print(target.shape)
    skf = StratifiedKFold(n_splits=10)
    predicted_y = []
    expected_y = []
    for train_index, test_index in skf.split(permissions, target):#train_index and test_index are lists of indices in one split
        x_train, x_test = permissions.loc[train_index], permissions.loc[test_index]
        y_train, y_test = target[train_index], target[test_index]
        clf = BaggingClassifier()
        clf.fit(x_train, y_train)
        predicted_y.extend(clf.predict(x_test))
        expected_y.extend(y_test)
    training_accuracy = metrics.accuracy_score(expected_y, predicted_y)
    clf.predict(perm_list)
    
def boosting(perm_list, cat):
    file = r"C:\Users\Admin\eclipse-workspace\MajorFinalInterface\src\csvs\Heuristic\Lifestyle.csv"
    category = pd.read_csv(file, encoding='cp1252')
    n_apps = category.shape[0]
    n_permissions = category.shape[1]-3
    perm = list(category.columns[2:-1])
    target_label = category.columns[-1]
    permissions = category[perm]
    target = category[target_label]
    print(permissions.shape)
    print(target.shape)
    skf = StratifiedKFold(n_splits=10)
    predicted_y = []
    expected_y = []
    for train_index, test_index in skf.split(permissions, target):#train_index and test_index are lists of indices in one split
        x_train, x_test = permissions.loc[train_index], permissions.loc[test_index]
        y_train, y_test = target[train_index], target[test_index]
        clf = AdaBoostClassifier()
        clf.fit(x_train, y_train)
        predicted_y.extend(clf.predict(x_test))
        expected_y.extend(y_test)
    training_accuracy = metrics.accuracy_score(expected_y, predicted_y)
    clf.predict(perm_list)

def classifier_allotment(perm_list, category):
    if category == 'ArtAndDesign':
        RandomForest(perm_list, category)
    else if category == 'AugmentedReality':
        SVM(perm_list, category)
    else if category == 'AutoAndVehicles':
        bagging(perm_list, category)
    else if category == 'Beauty':
        SVM(perm_list, category)
    else if category == 'Books':
        RandomForest(perm_list, category)
    else if category == 'Comics':
        RandomForest(perm_list, category)
    else if category == 'Communication':
        bagging(perm_list, category)
    else if category == 'HouseAndHome':
        RandomForest(perm_list, category)
    else if category == 'LibrariesAndDemo':
        SVM(perm_list, category)
    else if category == 'Lifestyle':
        boosting(perm_list, category)
    else if category == 'MapsAndNavigation':
        RandomForest(perm_list, category)
    else if category == 'Personalization':
        RandomForest(perm_list, category)
    else if category == 'VideoPlayersAndEditors':
        SVM(perm_list, category)
    else if category == 'Weather':
        RandomForest(perm_list, category)
    
argumentList = sys.argv
perm_list_strings = argumentList[1:-1]
perm_list = [float(i) for i in perm_list_strings]
classifier_allotment(perm_list, argumentList[-1])