#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Aug 2 11:25:59 2020 @author: ashesh """ # Load libraries import matplotlib.pyplot as plt import seaborn as sns from sklearn.tree import DecisionTreeClassifier from sklearn.tree import export_text from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix import pandas as pd import numpy as np # Create URL url = './data.csv' # Load dataset dataframe = pd.read_csv(url) dataframe.dropna(inplace = True) featureframe = dataframe.iloc[:, 1 : -1 ] targetframe = dataframe.iloc[:, -1 ] # For more on descriptive statistics in pandas # see https://www.tutorialspoint.com/python_pandas/python_pandas_descriptive_statistics.htm print("================ =============== ") print(featureframe.describe()) print("================ =============== ") dummy_cols = [ 'outlook', 'temp', 'humidity', 'wind'] ff = pd.get_dummies(featureframe, columns=dummy_cols, drop_first=True) feature_names = ff.columns features = pd.DataFrame(ff).to_numpy( ) tf = pd.get_dummies(targetframe, columns=['play'] , drop_first=True) target_name = tf.columns target = pd.DataFrame(tf).to_numpy( ) target = np.ravel(target) # split into training and test sets features_train, features_test, target_train, target_test = train_test_split(features, target, random_state=12) #%% tree_depth = 10 min_in_leaf = 1 #classifier = DecisionTreeClassifier(random_state=0, max_depth=tree_depth) classifier = DecisionTreeClassifier(max_features="auto", random_state=12, min_samples_leaf=min_in_leaf, max_depth=tree_depth) #%% # train model decision_tree = classifier.fit(features_train, target_train) #plot_tree(decision_tree) r = export_text(decision_tree, show_weights=True, feature_names= list(ff.columns)) print(r) #%% now make predictions target_predicted = decision_tree.predict(features_test) #%% # create confusion matrix matrix = confusion_matrix(target_test, target_predicted) #%% # create pandas dataframe class_names = ['Play_No', 'Play_Yes'] dataframe_Confusion = pd.DataFrame(matrix, index=class_names, columns=class_names) # create heatmap sns.heatmap(dataframe_Confusion, annot=True, cmap="Blues", fmt=".0f") plt.title("Confusion Matrix") plt.tight_layout() plt.ylabel("True Class") plt.xlabel("Predicted Class") plt.savefig('./confusion_matrix.png') plt.show() plt.close()