#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ ENGG1811 Lecture Machine Learning (Support vector machine) """ # %% Import import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import seaborn as sns import pandas as pd # %% Load the data # The data are stored in two files: # data_values.npy, data_labels.npy # # These files are in numpy format and load them using # the numpy.load() function # # The file data_values.npy is a (150,2) array containing # 150 sets of measurements. Each measurement contains # 2 values. # # The file data_labels is a (150,) array containing the # labels for measurements. A label can be normal or fault. # Note that the labels are of the datatype string. # # np.load(path, allow_pickle=True) xy = np.load('data_values.npy', allow_pickle=True) # (150,2) floats labels = np.load('data_labels.npy', allow_pickle=True) # (150,) 'normal' or 'fault' # split into training and test sets features_train, features_test, target_train, target_test = train_test_split(xy, labels, random_state=42) # %% Use Support vector machine (SVM) for classification # sklearn is the machine learning library in Python from sklearn import svm # Import the svm function # To find the classifier classifer_svm = svm.SVC(kernel='linear') classifer_svm.fit(features_train, target_train) # %% Prediction using SVM # For each pair of (x,y) # determine whether the pair of (x,y) will produce # a normal or fault # # The results are stored in prediction_svm #test_data = np.array([[5,3], [7.5, 3.35] ]) prediction_svm = classifer_svm.predict(features_test) print(prediction_svm) # #%% # create confusion matrix matrix_svm = confusion_matrix(target_test, prediction_svm) # %% Use decision tree for classification # sklearn is the machine learning library in Python from sklearn import tree classifer_tree = tree.DecisionTreeClassifier() classifer_tree.fit(features_train, target_train) # Note: If you want to get a classfication tree with a # depth of 2, shown on Slides 24 and 25, you need to use # classifer_tree = tree.DecisionTreeClassifier(max_depth=2) # # %% Prediction using decision tree for classification # For each pair of (x,y) # determine whether the pair of (x,y) will produce # a normal or fault # #test_data = np.array([[5,3], [7.5, 3.35] ]) # The results are stored in prediction_tree prediction_tree = classifer_tree.predict(features_test) print(prediction_tree) # #%% # create confusion matrix matrix_dt = confusion_matrix(target_test, prediction_tree) #%% # create pandas dataframe class_names = ['fault', 'normal'] dataframe_Confusion = pd.DataFrame(matrix_dt, index=class_names, columns=class_names) # create heatmap sns.heatmap(dataframe_Confusion, annot=True, cmap="Blues", fmt=".0f") plt.title("Confusion Matrix") plt.tight_layout() plt.ylabel("True Class") plt.xlabel("Predicted Class") plt.savefig('./confusion_matrix.png') plt.show() plt.close()