KNN For Classification

Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

Name : Snehal Kotkar Div : A Roll No.

: 46

Practical No. : 2 Problem Statement : Build a machine learning model using k-Nearest
Neighbors algorithm to predict whether the patients in the "Pima Indians Diabetes Dataset"
have diabetes or not.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')

from google.colab import drive


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly


remount, call drive.mount("/content/drive", force_remount=True).

df = pd.read_csv('/content/drive/MyDrive/ML /diabetes.csv')
df.head()

{"summary":"{\n \"name\": \"df\",\n \"rows\": 768,\n \"fields\": [\


n {\n \"column\": \"Pregnancies\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n
\"max\": 17,\n \"num_unique_values\": 17,\n \"samples\":
[\n 6,\n 1,\n 3\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Glucose\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 31,\n
\"min\": 0,\n \"max\": 199,\n \"num_unique_values\":
136,\n \"samples\": [\n 151,\n 101,\n
112\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BloodPressure\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 19,\n \"min\": 0,\n
\"max\": 122,\n \"num_unique_values\": 47,\n
\"samples\": [\n 86,\n 46,\n 85\
n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"SkinThickness\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 15,\n \"min\": 0,\n
\"max\": 99,\n \"num_unique_values\": 51,\n \"samples\":
[\n 7,\n 12,\n 48\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Insulin\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 115,\n
\"min\": 0,\n \"max\": 846,\n \"num_unique_values\":
186,\n \"samples\": [\n 52,\n 41,\n
183\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BMI\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 7.884160320375446,\n \"min\": 0.0,\n \"max\":
67.1,\n \"num_unique_values\": 248,\n \"samples\": [\n
19.9,\n 31.0,\n 38.1\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"DiabetesPedigreeFunction\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0.3313285950127749,\n \"min\": 0.078,\n \"max\": 2.42,\n
\"num_unique_values\": 517,\n \"samples\": [\n 1.731,\
n 0.426,\n 0.138\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Age\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 11,\n \"min\": 21,\n
\"max\": 81,\n \"num_unique_values\": 52,\n \"samples\":
[\n 60,\n 47,\n 72\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Outcome\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 0,\n
\"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n
\"samples\": [\n 0,\n 1\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"df"}

df.shape

(768, 9)

df.isnull().sum()

Pregnancies 0
Glucose 0
BloodPressure 0
SkinThickness 0
Insulin 0
BMI 0
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64

X = df.drop('Outcome',axis=1).values
y = df['Outcome'].values

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.25,random_state=42, stratify=y)

#import KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier

#Setup arrays to store training and test accuracies


neighbors = np.arange(1,15)
train_accuracy =np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))

for i,k in enumerate(neighbors):


#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=k)

#Fit the model


knn.fit(X_train, y_train)

#Compute accuracy on the training set


train_accuracy[i] = knn.score(X_train, y_train)

#Compute accuracy on the test set


test_accuracy[i] = knn.score(X_test, y_test)

#Generate plot
plt.title('k-NN Varying number of neighbors')
plt.plot(neighbors, test_accuracy, label='Testing Accuracy')
plt.plot(neighbors, train_accuracy, label='Training accuracy')
plt.legend()
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
plt.show()
#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=4)

#Fit the model


knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=4)

#Get accuracy. Note: In case of classification algorithms score method


represents accuracy.
knn.score(X_test,y_test)

0.7291666666666666

#let us get the predictions using the classifier we had fit above
y_pred = knn.predict(X_test)

y_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0,
0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0])

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy