#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ENGG1811 Lecture

Machine Learning (Support vector machine)
"""

# %% Import
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# %% Load the data 
# The data are stored in two files: 
# data_values.npy, data_labels.npy
# 
# These files are in numpy format and load them using 
# the numpy.load() function
#
# The file data_values.npy is a (150,2) array containing 
# 150 sets of measurements. Each measurement contains 
# 2 values. 
#
# The file data_labels is a (150,) array containing the 
# labels for measurements. A label can be normal or fault. 
# Note that the labels are of the datatype string. 
# 
# np.load(path, allow_pickle=True)


xy = np.load('data_values.npy', allow_pickle=True)      # (150,2) floats
labels = np.load('data_labels.npy', allow_pickle=True)  # (150,) 'normal' or 'fault'

# split into training and test sets
features_train, features_test, target_train, target_test = train_test_split(xy, labels, random_state=42)

# %% Use Support vector machine (SVM) for classification 
# sklearn is the machine learning library in Python
from sklearn import svm  # Import the svm function 

# To find the classifier 
classifer_svm = svm.SVC(kernel='linear')
classifer_svm.fit(features_train, target_train)  

# %% Prediction using SVM
# For each pair of (x,y)  
# determine whether the pair of (x,y) will produce
# a normal or fault 
# 
# The results are stored in prediction_svm 

#test_data = np.array([[5,3], [7.5, 3.35] ])
prediction_svm = classifer_svm.predict(features_test)
print(prediction_svm)
# 

#%%
# create confusion matrix
matrix_svm = confusion_matrix(target_test, prediction_svm)

# %% Use decision tree for classification 
# sklearn is the machine learning library in Python
from sklearn import tree

classifer_tree = tree.DecisionTreeClassifier()
classifer_tree.fit(features_train, target_train)

# Note: If you want to get a classfication tree with a
# depth of 2, shown on Slides 24 and 25, you need to use
# classifer_tree = tree.DecisionTreeClassifier(max_depth=2)
# 

# %% Prediction using decision tree for classification 
# For each pair of (x,y)  
# determine whether the pair of (x,y) will produce
# a normal or fault 
# 

#test_data = np.array([[5,3], [7.5, 3.35] ])
# The results are stored in prediction_tree 
prediction_tree = classifer_tree.predict(features_test)
print(prediction_tree)
# 
#%%
# create confusion matrix
matrix_dt = confusion_matrix(target_test, prediction_tree)

#%%
# create pandas dataframe
class_names = ['fault', 'normal']
dataframe_Confusion = pd.DataFrame(matrix_dt, index=class_names, columns=class_names)

# create heatmap
sns.heatmap(dataframe_Confusion, annot=True,  cmap="Blues", fmt=".0f")
plt.title("Confusion Matrix")
plt.tight_layout()
plt.ylabel("True Class")
plt.xlabel("Predicted Class")
plt.savefig('./confusion_matrix.png')
plt.show()
plt.close()