assignment-3

Assignment-3
Q1.1. Binary class classification (Binomial)(Titanic-Dataset): There can be only two
possible types of the dependent variables, such as 0
or 1, Pass or Fail, etc.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#data = pd.read_csv('titanic.csv')
df=pd.read_csv("Z:\ML LAB\Titanic.csv");
print(df);
#converting string to numeric data
m=len(df['Sex']);
for i in range(0,m,1):
if(df['Sex'][i]=='male'):
df['Sex'][i]=1;
else:
df['Sex'][i]=0;
for i in range(0,m,1):
if(df['Embarked'][i]=='C'):
df['Embarked'][i]=1;
if(df['Embarked'][i]=='Q'):
if(df['Embarked'][i]=='S'):
## drop the string data
df = df.drop('Name', axis=1)
df = df.drop('Cabin', axis=1)
df = df.drop('Ticket', axis=1)
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df['Fare'].fillna(df['Fare'].mean(), inplace=True)
'''q1=df['Age'].quantile(0.25);
q2=df['Age'].quantile(0.75);
IQR=q2-q1;
lb=q1-1.5*IQR;
ub=q1+1.5*IQR;
df=df[(df['Age']>=lb) &(df['Age']<=ub)];
'''
#data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
#data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost_function(X, y, theta):
m = len(y)
h = sigmoid(X.dot(theta))
cost = (-1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
return cost
def gradient_descent(X, y, theta, alpha, epochs):
m = len(y)
cost_history = []
for epoch in range(epochs):
h = sigmoid(X.dot(theta))
gradient = (1/m) * X.T.dot(h - y)
theta -= alpha * gradient

cost = cost_function(X, y, theta)
print(epoch,cost);
cost_history.append(cost)
return theta, cost_history
X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]
theta_initial = np.zeros(X_train_bias.shape[1])
XTEST=np.c_[np.ones((X_test.shape[0], 1)), X_test]
alpha = 0.01
iteration = 1000
theta_final, cost_history = gradient_descent(X_train_bias, y_train, theta_initial, alpha, iteration)
#print('final theta:',theta_final);
plt.figure(figsize=(5, 5))
plt.plot(range(epochs), cost_history, color='blue')
plt.title('Cost Function over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.grid(True)
plt.show()
predict= sigmoid(XTEST.dot(theta_final))
print('predict and y_test values',predict,y_test);

Q2.Multi-class classification (Multinomial) (Iris-Dataset): there can be 3 or more
possible types of the dependent variable.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import seaborn as sns
file_path = 'C:\\Users\\abhis\\Downloads\\Iris.csv'
df = pd.read_csv(file_path)
df['Species'] = df['Species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})
X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values
y = df['Species'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
class LogisticRegressionWithGD:
def __init__(self, learning_rate=0.01, epochs=1000):
self.learning_rate = learning_rate
self.epochs = epochs
self.cost_history = []
def softmax(self, z):
exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
return exp_z / np.sum(exp_z, axis=1, keepdims=True)
def cost_function(self, X, y, theta):
m = len(y)
predictions = self.softmax(np.dot(X, theta))
one_hot_y = np.eye(theta.shape[1])[y]
cost = -np.sum(one_hot_y * np.log(predictions)) / m
return cost
def fit(self, X, y):
m, n = X.shape
X = np.hstack([np.ones((m, 1)), X])
num_classes = len(np.unique(y))
theta = np.zeros((n + 1, num_classes))
for epoch in range(self.epochs):
predictions = self.softmax(np.dot(X, theta))

error = predictions - np.eye(num_classes)[y.flatten()]
gradient = np.dot(X.T, error) / m
theta -= self.learning_rate * gradient
cost = self.cost_function(X, y, theta)
self.cost_history.append(cost)
return theta
def predict(self, X):
X = np.hstack([np.ones((X.shape[0], 1)), X])
predictions = self.softmax(np.dot(X, self.theta))
return np.argmax(predictions, axis=1)
model = LogisticRegressionWithGD(learning_rate=0.01, epochs=1000)
model.theta = model.fit(X_train, y_train)
plt.plot(range(len(model.cost_history)), model.cost_history, label="Cost")
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('Epochs vs Cost for Multi-Class Logistic Regression (Gradient Descent)')
plt.legend()
plt.show()
y_pred = model.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Setosa', 'Versicolor',

'Virginica'], yticklabels=['Setosa', 'Versicolor', 'Virginica'])
plt.title('Confusion Matrix for Multi-Class Iris Classification')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
plt.figure(figsize=(8, 6))
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired, edgecolors='k', s=50)
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Iris Dataset: Sepal Length vs Sepal Width')
plt.show()
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
grid_points = np.c_[xx.ravel(), yy.ravel(), np.zeros_like(xx.ravel()), np.zeros_like(yy.ravel())]
Z = model.predict(grid_points)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', s=50)
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Decision Boundaries of Multi-Class Logistic Regression (Gradient Descent)')
plt.show()

assignment-3

Uploaded by

Copyright:

Available Formats

assignment-3

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

assignment-3

Uploaded by

Copyright:

Available Formats

Assignment-3

Q1.1. Binary class classification (Binomial)(Titanic-Dataset): There can be only two

possible types of the dependent variables, such as 0

or 1, Pass or Fail, etc.

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

#converting string to numeric data

## drop the string data

#data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def cost_function(X, y, theta):

cost = (-1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))

def gradient_descent(X, y, theta, alpha, epochs):

for epoch in range(epochs):

gradient = (1/m) * X.T.dot(h - y)

theta -= alpha * gradient

return theta, cost_history

X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]

XTEST=np.c_[np.ones((X_test.shape[0], 1)), X_test]

theta_final, cost_history = gradient_descent(X_train_bias, y_train, theta_initial, alpha, iteration)

plt.plot(range(epochs), cost_history, color='blue')

plt.title('Cost Function over Epochs')

print('predict and y_test values',predict,y_test);

possible types of the dependent variable.

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import confusion_matrix

import seaborn as sns

df['Species'] = df['Species'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})

X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

def softmax(self, z):

exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))

return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cost_function(self, X, y, theta):

predictions = self.softmax(np.dot(X, theta))

cost = -np.sum(one_hot_y * np.log(predictions)) / m

def fit(self, X, y):

X = np.hstack([np.ones((m, 1)), X])

theta = np.zeros((n + 1, num_classes))

for epoch in range(self.epochs):

predictions = self.softmax(np.dot(X, theta))

gradient = np.dot(X.T, error) / m

theta -= self.learning_rate * gradient

cost = self.cost_function(X, y, theta)

def predict(self, X):

X = np.hstack([np.ones((X.shape[0], 1)), X])

predictions = self.softmax(np.dot(X, self.theta))

return np.argmax(predictions, axis=1)

model = LogisticRegressionWithGD(learning_rate=0.01, epochs=1000)

model.theta = model.fit(X_train, y_train)

plt.plot(range(len(model.cost_history)), model.cost_history, label="Cost")

plt.title('Epochs vs Cost for Multi-Class Logistic Regression (Gradient Descent)')

conf_matrix = confusion_matrix(y_test, y_pred)

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Setosa', 'Versicolor',

plt.title('Confusion Matrix for Multi-Class Iris Classification')

plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired, edgecolors='k', s=50)