Ilovepdf Merged
Ilovepdf Merged
Ilovepdf Merged
INDEX
1
Implementation of Simple Linear Regression.
2
Implementation of Multiple Linear Regression.
3
Implementation of Support Vector Regression.
4
Implementation of Naïve Bayes Classifier.
5
Implementation of Decision Tree Classifier.
6
Implementation of Adaboost for Naïve Bayes Classifier.
7
Implementation of Adaboost for Decision Tree Classifier.
8
Implementation of Artificial Neural Network.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('/content/sample_data/Salary_Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
print(X)
[[ 1.1]
[ 1.3]
[ 1.5]
[ 2. ]
[ 2.2]
[ 2.9]
[ 3. ]
[ 3.2]
[ 3.2]
[ 3.7]
[ 3.9]
[ 4. ]
[ 4. ]
[ 4.1]
[ 4.5]
[ 4.9]
[ 5.1]
[ 5.3]
[ 5.9]
[ 6. ]
[ 6.8]
[ 7.1]
[ 7.9]
[ 8.2]
[ 8.7]
[ 9. ]
[ 9.5]
[ 9.6]
[10.3]
[10.5]]
print(y)
Splitting the dataset into the Training Set and Test Set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=WhFN4cj_Rh3R&printMode=true 1/3
1/23/23, 2:33 AM Untitled34.ipynb - Colaboratory
print(X_train)
[[ 2.9]
[ 5.1]
[ 3.2]
[ 4.5]
[ 8.2]
[ 6.8]
[ 1.3]
[10.5]
[ 3. ]
[ 2.2]
[ 5.9]
[ 6. ]
[ 3.7]
[ 3.2]
[ 9. ]
[ 2. ]
[ 1.1]
[ 7.1]
[ 4.9]
[ 4. ]]
print(X_test)
[[ 1.5]
[10.3]
[ 4.1]
[ 3.9]
[ 9.5]
[ 8.7]
[ 9.6]
[ 4. ]
[ 5.3]
[ 7.9]]
print(y_train)
print(y_test)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
LinearRegression()
y_pred = regressor.predict(X_test)
plt.scatter(X_train, y_train, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Training set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=WhFN4cj_Rh3R&printMode=true 2/3
1/23/23, 2:33 AM Untitled34.ipynb - Colaboratory
plt.scatter(X_test, y_test, color = 'red')
plt.plot(X_test, regressor.predict(X_test), color = 'blue')
plt.title('Salary vs Experience (Test set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=WhFN4cj_Rh3R&printMode=true 3/3
1/23/23, 2:45 AM Untitled34.ipynb - Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('/content/sample_data/50_Startups.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
print(X)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 1/3
1/23/23, 2:45 AM Untitled34.ipynb - Colaboratory
[542.05 51743.15 0.0 'New York']
[0.0 116983.8 45173.06 'California']]
print(y)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)
Splitting the dataset into the Training Set and Test Set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 2/3
1/23/23, 2:45 AM Untitled34.ipynb - Colaboratory
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
LinearRegression()
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
[[103015.2 103282.38]
[132582.28 144259.4 ]
[132447.74 146121.95]
[ 71976.1 77798.83]
[178537.48 191050.39]
[116161.24 105008.31]
[ 67851.69 81229.06]
[ 98791.73 97483.56]
[113969.44 110352.25]
[167921.07 166187.94]]
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 3/3
1/23/23, 2:58 AM Untitled34.ipynb - Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('/content/sample_data/Position_Salaries.csv')
X = dataset.iloc[:, 1:2].values
y·=·dataset.iloc[:,·2].values
print(X)
print(y)
[[ 1]
[ 2]
[ 3]
[ 4]
[ 5]
[ 6]
[ 7]
[ 8]
[ 9]
[10]]
[ 45000 50000 60000 80000 110000 150000 200000 300000 500000
1000000]
A real-world dataset contains features that vary in magnitudes, units, and range. I would suggest
performing normalization when the scale of a feature is irrelevant or misleading.
Feature Scaling basically helps to normalize the data within a particular range. Normally several common class
types contain the feature scaling function so that they make feature scaling automatically. However, the SVR class
is not a commonly used class type so we should perform feature scaling using Python.
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
#y = sc_y.fit_transform(y.reshape(len(y),1))
y=y.reshape(-1,1)
y = sc_y.fit_transform(y)
print(X)
print(y)
[[-1.57]
[-1.22]
[-0.87]
[-0.52]
[-0.17]
[ 0.17]
[ 0.52]
[ 0.87]
[ 1.22]
[ 1.57]]
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=ICHe6o5mOxdl&printMode=true 1/3
1/23/23, 2:58 AM Untitled34.ipynb - Colaboratory
[[-0.72]
[-0.7 ]
[-0.67]
[-0.6 ]
[-0.49]
[-0.35]
[-0.17]
[ 0.18]
[ 0.88]
[ 2.64]]
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X, y)
y_pred = regressor.predict(sc_X.fit_transform([[6.5]])).reshape(-1,1)
print(y_pred)
[[-0.42]]
# Taking the inverse of the scaled value
y_pred=sc_y.fit_transform(y_pred)
print(y_pred)
[[0.]]
Step 6 : Visualizing the SVR results ( for higher resolution and smoother curve)
# inverse the transformation to go back to the initial scale
plt.scatter(sc_X.inverse_transform(X), sc_y.inverse_transform(y), color = 'red')
plt.plot(sc_X.inverse_transform(X), sc_y.inverse_transform(regressor.predict(X).reshape(-1,1)), color = 'blue')
# add the title to the plot
plt.title('Support Vector Regression Model')
# label x axis
plt.xlabel('Position')
# label y axis
plt.ylabel('Salary Level')
# print the plot
plt.show()
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=ICHe6o5mOxdl&printMode=true 2/3
1/23/23, 2:58 AM Untitled34.ipynb - Colaboratory
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=ICHe6o5mOxdl&printMode=true 3/3
1/23/23, 3:12 AM Untitled34.ipynb - Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('/content/sample_data/Social_Network_Ads.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
print(X_train)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=7EX9Ff4POW4H&printMode=true 1/6
1/23/23, 3:12 AM Untitled34.ipynb - Colaboratory
[ 25 80000]
[ 28 85000]
[ 55 39000]
[ 50 88000]
[ 49 88000]
[ 52 150000]
[ 35 65000]
[ 42 54000]
[ 34 43000]
[ 37 52000]
[ 48 30000]
[ 29 43000]
[ 36 52000]
[ 27 54000]
[ 26 118000]]
print(y_train)
[0 1 0 1 1 1 0 0 0 0 0 0 1 1 1 0 1 0 0 1 0 1 0 1 0 0 1 1 1 1 0 1 0 1 0 0 1
0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 1 1 0 0 1 0 1
1 1 0 0 1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 0
1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0
0 0 1 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 1 0 0
0 0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0
0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0
0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1
0 0 0 0]
print(X_test)
[ 41 52000]
[ 27 84000]
[ 35 20000]
[ 43 112000]
[ 27 58000]
[ 37 80000]
[ 52 90000]
[ 26 30000]
[ 49 86000]
[ 57 122000]
[ 34 25000]
[ 35 57000]
[ 34 115000]
[ 59 88000]
[ 45 32000]
[ 29 83000]
[ 26 80000]
[ 49 28000]
[ 23 20000]
[ 32 18000]
[ 60 42000]
[ 19 76000]
[ 36 99000]
[ 19 26000]
[ 60 83000]
[ 24 89000]
[ 27 58000]
[ 40 47000]
[ 42 70000]
[ 32 150000]
[ 35 77000]
[ 22 63000]
[ 45 22000]
[ 27 89000]
[ 18 82000]
[ 42 79000]
[ 40 60000]
[ 53 34000]
[ 47 107000]
[ 58 144000]
[ 59 83000]
[ 24 55000]
[ 26 35000]
[ 58 38000]
[ 42 80000]
[ 40 75000]
[ 59 130000]
[ 46 41000]
[ 41 60000]
[ 42 64000]
[ 37 146000]
[ 23 48000]
[ 25 33000]
[ 24 84000]
[ 27 96000]
[ 23 63000]
[ 48 33000]
[ 48 90000]
[ 42 104000]]
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=7EX9Ff4POW4H&printMode=true 2/6
1/23/23, 3:12 AM Untitled34.ipynb - Colaboratory
print(y_test)
[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0
0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1
0 0 0 0 1 1 1 0 0 0 1 1 0 1 1 0 0 1 0 0 0 1 0 1 1 1]
Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train)
[ 0.09 1.06]
[-0.11 -0.36]
[-1.2 0.07]
[-0.31 -1.35]
[ 1.57 1.11]
[-0.8 -1.52]
[ 0.09 1.87]
[-0.9 -0.77]
[-0.51 -0.77]
[-0.31 -0.92]
[ 0.28 -0.71]
[ 0.28 0.07]
[ 0.09 1.87]
[-1.1 1.95]
[-1.7 -1.55]
[-1.2 -1.09]
[-0.71 -0.1 ]
[ 0.09 0.1 ]
[ 0.28 0.27]
[ 0.88 -0.57]
[ 0.28 -1.15]
[-0.11 0.68]
[ 2.17 -0.68]
[-1.3 -1.38]
[-1. -0.94]
[-0.01 -0.42]
[-0.21 -0.45]
[-1.8 -0.97]
[ 1.77 1. ]
[ 0.19 -0.36]
[ 0.38 1.11]
[-1.8 -1.35]
[ 0.19 -0.13]
[ 0.88 -1.44]
[-1.99 0.48]
[-0.31 0.27]
[ 1.87 -1.06]
[-0.41 0.07]
[ 1.08 -0.89]
[-1.1 -1.12]
[-1.89 0.01]
[ 0.09 0.27]
[-1.2 0.33]
[-1.3 0.3 ]
[-1. 0.45]
[ 1.67 -0.89]
[ 1.18 0.53]
[ 1.08 0.53]
[ 1.37 2.33]
[-0.31 -0.13]
[ 0.38 -0.45]
[-0.41 -0.77]
[-0.11 -0.51]
[ 0.98 -1.15]
[-0.9 -0.77]
[-0.21 -0.51]
[-1.1 -0.45]
[-1.2 1.4 ]]
print(X_test)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=7EX9Ff4POW4H&printMode=true 3/6
1/23/23, 3:12 AM Untitled34.ipynb - Colaboratory
[ 1.87 1.52]
[-0.41 -1.29]
[-0.31 -0.36]
[-0.41 1.32]
[ 2.07 0.53]
[ 0.68 -1.09]
[-0.9 0.39]
[-1.2 0.3 ]
[ 1.08 -1.21]
[-1.5 -1.44]
[-0.61 -1.5 ]
[ 2.17 -0.8 ]
[-1.89 0.19]
[-0.21 0.85]
[-1.89 -1.26]
[ 2.17 0.39]
[-1.4 0.56]
[-1.1 -0.34]
[ 0.19 -0.65]
[ 0.38 0.01]
[-0.61 2.33]
[-0.31 0.22]
[-1.6 -0.19]
[ 0.68 -1.38]
[-1.1 0.56]
[-1.99 0.36]
[ 0.38 0.27]
[ 0.19 -0.28]
[ 1.47 -1.03]
[ 0.88 1.08]
[ 1.97 2.16]
[ 2.07 0.39]
[-1.4 -0.42]
[-1.2 -1. ]
[ 1.97 -0.92]
[ 0.38 0.3 ]
[ 0.19 0.16]
[ 2.07 1.75]
[ 0.78 -0.83]
[ 0.28 -0.28]
[ 0.38 -0.16]
[-0.11 2.22]
[-1.5 -0.63]
[-1.3 -1.06]
[-1.4 0.42]
[-1.1 0.77]
[-1.5 -0.19]
[ 0.98 -1.06]
[ 0.98 0.59]
[ 0.38 1. ]]
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
GaussianNB()
print(sc.transform([[30,87000]]))
print(classifier.predict(sc.transform([[40,200000]])))
[[-0.8 0.5]]
[1]
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=7EX9Ff4POW4H&printMode=true 4/6
1/23/23, 3:12 AM Untitled34.ipynb - Colaboratory
[1 1]
[0 0]
[0 0]
[1 0]
[1 1]
[0 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 1]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[0 1]
[0 0]
[0 0]
[0 0]
[0 0]
[1 1]
[1 1]
[1 1]
[1 0]
[0 0]
[0 0]
[1 1]
[0 1]
[0 0]
[1 1]
[0 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[0 1]
[0 0]
[1 1]
[1 1]
[1 1]]
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)
[[65 3]
[ 7 25]]
0.9
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Naive Bayes (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=7EX9Ff4POW4H&printMode=true 5/6
1/23/23, 3:12 AM Untitled34.ipynb - Colaboratory
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Naive Bayes (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=7EX9Ff4POW4H&printMode=true 6/6
1/23/23, 3:21 AM Untitled34.ipynb - Colaboratory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('/content/sample_data/Social_Network_Ads (1).csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
print(X_train)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 1/6
1/23/23, 3:21 AM Untitled34.ipynb - Colaboratory
[ 25 80000]
[ 28 85000]
[ 55 39000]
[ 50 88000]
[ 49 88000]
[ 52 150000]
[ 35 65000]
[ 42 54000]
[ 34 43000]
[ 37 52000]
[ 48 30000]
[ 29 43000]
[ 36 52000]
[ 27 54000]
[ 26 118000]]
print(y_train)
[0 1 0 1 1 1 0 0 0 0 0 0 1 1 1 0 1 0 0 1 0 1 0 1 0 0 1 1 1 1 0 1 0 1 0 0 1
0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 1 1 0 0 1 0 1
1 1 0 0 1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 0
1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0
0 0 1 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 1 0 0
0 0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0
0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0
0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1
0 0 0 0]
print(X_test)
[ 41 52000]
[ 27 84000]
[ 35 20000]
[ 43 112000]
[ 27 58000]
[ 37 80000]
[ 52 90000]
[ 26 30000]
[ 49 86000]
[ 57 122000]
[ 34 25000]
[ 35 57000]
[ 34 115000]
[ 59 88000]
[ 45 32000]
[ 29 83000]
[ 26 80000]
[ 49 28000]
[ 23 20000]
[ 32 18000]
[ 60 42000]
[ 19 76000]
[ 36 99000]
[ 19 26000]
[ 60 83000]
[ 24 89000]
[ 27 58000]
[ 40 47000]
[ 42 70000]
[ 32 150000]
[ 35 77000]
[ 22 63000]
[ 45 22000]
[ 27 89000]
[ 18 82000]
[ 42 79000]
[ 40 60000]
[ 53 34000]
[ 47 107000]
[ 58 144000]
[ 59 83000]
[ 24 55000]
[ 26 35000]
[ 58 38000]
[ 42 80000]
[ 40 75000]
[ 59 130000]
[ 46 41000]
[ 41 60000]
[ 42 64000]
[ 37 146000]
[ 23 48000]
[ 25 33000]
[ 24 84000]
[ 27 96000]
[ 23 63000]
[ 48 33000]
[ 48 90000]
[ 42 104000]]
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 2/6
1/23/23, 3:21 AM Untitled34.ipynb - Colaboratory
print(y_test)
[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0
0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1
0 0 0 0 1 1 1 0 0 0 1 1 0 1 1 0 0 1 0 0 0 1 0 1 1 1]
Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train)
[ 0.09 1.06]
[-0.11 -0.36]
[-1.2 0.07]
[-0.31 -1.35]
[ 1.57 1.11]
[-0.8 -1.52]
[ 0.09 1.87]
[-0.9 -0.77]
[-0.51 -0.77]
[-0.31 -0.92]
[ 0.28 -0.71]
[ 0.28 0.07]
[ 0.09 1.87]
[-1.1 1.95]
[-1.7 -1.55]
[-1.2 -1.09]
[-0.71 -0.1 ]
[ 0.09 0.1 ]
[ 0.28 0.27]
[ 0.88 -0.57]
[ 0.28 -1.15]
[-0.11 0.68]
[ 2.17 -0.68]
[-1.3 -1.38]
[-1. -0.94]
[-0.01 -0.42]
[-0.21 -0.45]
[-1.8 -0.97]
[ 1.77 1. ]
[ 0.19 -0.36]
[ 0.38 1.11]
[-1.8 -1.35]
[ 0.19 -0.13]
[ 0.88 -1.44]
[-1.99 0.48]
[-0.31 0.27]
[ 1.87 -1.06]
[-0.41 0.07]
[ 1.08 -0.89]
[-1.1 -1.12]
[-1.89 0.01]
[ 0.09 0.27]
[-1.2 0.33]
[-1.3 0.3 ]
[-1. 0.45]
[ 1.67 -0.89]
[ 1.18 0.53]
[ 1.08 0.53]
[ 1.37 2.33]
[-0.31 -0.13]
[ 0.38 -0.45]
[-0.41 -0.77]
[-0.11 -0.51]
[ 0.98 -1.15]
[-0.9 -0.77]
[-0.21 -0.51]
[-1.1 -0.45]
[-1.2 1.4 ]]
print(X_test)
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 3/6
1/23/23, 3:21 AM Untitled34.ipynb - Colaboratory
[ 1.87 1.52]
[-0.41 -1.29]
[-0.31 -0.36]
[-0.41 1.32]
[ 2.07 0.53]
[ 0.68 -1.09]
[-0.9 0.39]
[-1.2 0.3 ]
[ 1.08 -1.21]
[-1.5 -1.44]
[-0.61 -1.5 ]
[ 2.17 -0.8 ]
[-1.89 0.19]
[-0.21 0.85]
[-1.89 -1.26]
[ 2.17 0.39]
[-1.4 0.56]
[-1.1 -0.34]
[ 0.19 -0.65]
[ 0.38 0.01]
[-0.61 2.33]
[-0.31 0.22]
[-1.6 -0.19]
[ 0.68 -1.38]
[-1.1 0.56]
[-1.99 0.36]
[ 0.38 0.27]
[ 0.19 -0.28]
[ 1.47 -1.03]
[ 0.88 1.08]
[ 1.97 2.16]
[ 2.07 0.39]
[-1.4 -0.42]
[-1.2 -1. ]
[ 1.97 -0.92]
[ 0.38 0.3 ]
[ 0.19 0.16]
[ 2.07 1.75]
[ 0.78 -0.83]
[ 0.28 -0.28]
[ 0.38 -0.16]
[-0.11 2.22]
[-1.5 -0.63]
[-1.3 -1.06]
[-1.4 0.42]
[-1.1 0.77]
[-1.5 -0.19]
[ 0.98 -1.06]
[ 0.98 0.59]
[ 0.38 1. ]]
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)
DecisionTreeClassifier()
print(classifier.predict(sc.transform([[30,87000]])))
print(classifier.predict(sc.transform([[40,200000]])))
[0]
[1]
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 4/6
1/23/23, 3:21 AM Untitled34.ipynb - Colaboratory
[1 1]
[0 0]
[0 0]
[1 0]
[1 1]
[1 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[1 1]
[0 0]
[1 1]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[1 0]
[1 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[0 0]
[1 1]
[1 1]
[1 1]
[1 0]
[0 0]
[0 0]
[1 1]
[0 1]
[0 0]
[1 1]
[1 1]
[0 0]
[0 0]
[1 1]
[0 0]
[0 0]
[0 0]
[1 1]
[0 0]
[1 1]
[1 1]
[1 1]]
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)
[[62 6]
[ 4 28]]
0.9
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Naive Bayes (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 5/6
1/23/23, 3:21 AM Untitled34.ipynb - Colaboratory
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Naive Bayes (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
WARNING:matplotlib.axes._axes:*c* argument looks like a single numeric RGB or RGBA sequence, which shou
https://colab.research.google.com/drive/1_fY-ZsE19bsvDSsXkKoJmJWkDE19Di8v#scrollTo=4STz9XIyRbR8&printMode=true 6/6
Siddhant Mandal
Siddhant Mandal