assignment-3

Download as pdf or txt
Download as pdf or txt
You are on page 1of 9

Assignment-3

Q1.1. Binary class classification (Binomial)(Titanic-Dataset): There can be only two

possible types of the dependent variables, such as 0

or 1, Pass or Fail, etc.

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

#data = pd.read_csv('titanic.csv')

df=pd.read_csv("Z:\ML LAB\Titanic.csv");

print(df);

#converting string to numeric data

m=len(df['Sex']);

for i in range(0,m,1):

if(df['Sex'][i]=='male'):

df['Sex'][i]=1;

else:

df['Sex'][i]=0;

for i in range(0,m,1):
if(df['Embarked'][i]=='C'):

df['Embarked'][i]=1;

if(df['Embarked'][i]=='Q'):

df['Embarked'][i]=2;

if(df['Embarked'][i]=='S'):

df['Embarked'][i]=3;

## drop the string data

df = df.drop('Name', axis=1)

df = df.drop('Cabin', axis=1)

df = df.drop('Ticket', axis=1)

df['Age'].fillna(df['Age'].mean(), inplace=True)

df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

df['Fare'].fillna(df['Fare'].mean(), inplace=True)

'''q1=df['Age'].quantile(0.25);

q2=df['Age'].quantile(0.75);

IQR=q2-q1;

lb=q1-1.5*IQR;

ub=q1+1.5*IQR;

df=df[(df['Age']>=lb) &(df['Age']<=ub)];

'''
#data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})

#data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]

y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

def sigmoid(z):

return 1 / (1 + np.exp(-z))

def cost_function(X, y, theta):

m = len(y)

h = sigmoid(X.dot(theta))

cost = (-1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))

return cost

def gradient_descent(X, y, theta, alpha, epochs):

m = len(y)

cost_history = []

for epoch in range(epochs):

h = sigmoid(X.dot(theta))

gradient = (1/m) * X.T.dot(h - y)

theta -= alpha * gradient


cost = cost_function(X, y, theta)

print(epoch,cost);

cost_history.append(cost)

return theta, cost_history

X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]

theta_initial = np.zeros(X_train_bias.shape[1])

XTEST=np.c_[np.ones((X_test.shape[0], 1)), X_test]

alpha = 0.01

iteration = 1000

theta_final, cost_history = gradient_descent(X_train_bias, y_train, theta_initial, alpha, iteration)

#print('final theta:',theta_final);

plt.figure(figsize=(5, 5))

plt.plot(range(epochs), cost_history, color='blue')

plt.title('Cost Function over Epochs')

plt.xlabel('Epochs')

plt.ylabel('Cost')

plt.grid(True)

plt.show()

predict= sigmoid(XTEST.dot(theta_final))

print('predict and y_test values',predict,y_test);


Q2.Multi-class classification (Multinomial) (Iris-Dataset): there can be 3 or more

possible types of the dependent variable.

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import confusion_matrix

import seaborn as sns

file_path = 'C:\\Users\\abhis\\Downloads\\Iris.csv'

df = pd.read_csv(file_path)

df['Species'] = df['Species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})

X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values

y = df['Species'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

class LogisticRegressionWithGD:
def __init__(self, learning_rate=0.01, epochs=1000):

self.learning_rate = learning_rate

self.epochs = epochs

self.cost_history = []

def softmax(self, z):

exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))

return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cost_function(self, X, y, theta):

m = len(y)

predictions = self.softmax(np.dot(X, theta))

one_hot_y = np.eye(theta.shape[1])[y]

cost = -np.sum(one_hot_y * np.log(predictions)) / m

return cost

def fit(self, X, y):

m, n = X.shape

X = np.hstack([np.ones((m, 1)), X])

num_classes = len(np.unique(y))

theta = np.zeros((n + 1, num_classes))

for epoch in range(self.epochs):

predictions = self.softmax(np.dot(X, theta))


error = predictions - np.eye(num_classes)[y.flatten()]

gradient = np.dot(X.T, error) / m

theta -= self.learning_rate * gradient

cost = self.cost_function(X, y, theta)

self.cost_history.append(cost)

return theta

def predict(self, X):

X = np.hstack([np.ones((X.shape[0], 1)), X])

predictions = self.softmax(np.dot(X, self.theta))

return np.argmax(predictions, axis=1)

model = LogisticRegressionWithGD(learning_rate=0.01, epochs=1000)

model.theta = model.fit(X_train, y_train)

plt.plot(range(len(model.cost_history)), model.cost_history, label="Cost")

plt.xlabel('Epochs')

plt.ylabel('Cost')

plt.title('Epochs vs Cost for Multi-Class Logistic Regression (Gradient Descent)')

plt.legend()

plt.show()
y_pred = model.predict(X_test)

conf_matrix = confusion_matrix(y_test, y_pred)

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Setosa', 'Versicolor',


'Virginica'], yticklabels=['Setosa', 'Versicolor', 'Virginica'])

plt.title('Confusion Matrix for Multi-Class Iris Classification')

plt.xlabel('Predicted')

plt.ylabel('True')

plt.show()

plt.figure(figsize=(8, 6))

plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired, edgecolors='k', s=50)

plt.xlabel('Sepal Length')

plt.ylabel('Sepal Width')

plt.title('Iris Dataset: Sepal Length vs Sepal Width')

plt.show()

x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1

y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))

grid_points = np.c_[xx.ravel(), yy.ravel(), np.zeros_like(xx.ravel()), np.zeros_like(yy.ravel())]

Z = model.predict(grid_points)
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8)

plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', s=50)

plt.xlabel('Sepal Length')

plt.ylabel('Sepal Width')

plt.title('Decision Boundaries of Multi-Class Logistic Regression (Gradient Descent)')

plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy