CD 601 Lab Manual

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 61

ORIENTAL COLLEGE OF TECHNOLOGY, BHOPAL

DEPARTMENT OF COMPUTER SCIENCE & ENGINEERING

LAB MANUAL (CD-601)

“DEEP LEARNING”

BACHELOR OF TECHNOLOGY (B.Tech) COURSE

SEMESTER –VI

1
Lab Assignment #1

S.NO EXPERIMENT
Write a code to perform Image classification using CNN
01

Write a code on Face detection system with OPEN CV library


02

Write a code on digit Recognition system with CNN


03

Write a code on image compression and decompression using


04 encoder and decoder in deep learning using pytorch.

Write a code on predicting Airline Passengers count based on


05 LSTM and RNN

WAP to develop least square Regression in Python .


06

07
WAP to perform Exploratory Data Analysis on Any Given
Data Set breast cancer dataset from Sklearn.
08

LAB ASSIGNMENT # 01

Write a code to perform Image classification using CNN

# Python program to create

# Image Classifier using CNN

2
# Importing the required libraries

import cv2

import os

import numpy as np

from random import shuffle

from tqdm import tqdm

'''Setting up the env'''

TRAIN_DIR = 'E:/dataset / Cats_vs_Dogs / train'

LR = 1e-3

'''Setting up the model which will help with tensorflow models'''

MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR, '6conv-basic')

'''Labelling the dataset'''

def label_img(img):

word_label = img.split('.')[-3]

# DIY One hot encoder

if word_label == 'cat': return [1, 0]

elif word_label == 'dog': return [0, 1]

3
'''Creating the training data'''

def create_train_data():

# Creating an empty list where we should store the training data

# after a little preprocessing of the data

training_data = []

# tqdm is only used for interactive loading

# loading the training data

for img in tqdm(os.listdir(TRAIN_DIR)):

# labeling the images

label = label_img(img)

path = os.path.join(TRAIN_DIR, img)

# loading the image from the path and then converting them into

# grayscale for easier covnet prob

img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

# resizing the image for processing them in the covnet

img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

4
# final step-forming the training data list with numpy array of the
images

training_data.append([np.array(img), np.array(label)])

# shuffling of the training data to preserve the random state of our data

shuffle(training_data)

# saving our trained data for further uses if required

np.save('train_data.npy', training_data)

return training_data

'''Processing the given test data'''

# Almost same as processing the training data but

# we dont have to label it.

def process_test_data():

testing_data = []

for img in tqdm(os.listdir(TEST_DIR)):

path = os.path.join(TEST_DIR, img)

img_num = img.split('.')[0]

img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

testing_data.append([np.array(img), img_num])

shuffle(testing_data)

5
np.save('test_data.npy', testing_data)

return testing_data

'''Running the training and the testing in the dataset for our model'''

train_data = create_train_data()

test_data = process_test_data()

# train_data = np.load('train_data.npy')

# test_data = np.load('test_data.npy')

'''Creating the neural network using tensorflow'''

# Importing the required libraries

import tflearn

from tflearn.layers.conv import conv_2d, max_pool_2d

from tflearn.layers.core import input_data, dropout, fully_connected

from tflearn.layers.estimator import regression

import tensorflow as tf

tf.compat.v1.reset_default_graph()

convnet = input_data(shape =[None, IMG_SIZE, IMG_SIZE, 1], name ='input')

convnet = conv_2d(convnet, 32, 5, activation ='relu')

convnet = max_pool_2d(convnet, 5)

6
convnet = conv_2d(convnet, 64, 5, activation ='relu')

convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 128, 5, activation ='relu')

convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation ='relu')

convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation ='relu')

convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation ='relu')

convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 2, activation ='softmax')

convnet = regression(convnet, optimizer ='adam', learning_rate = LR,

loss ='categorical_crossentropy', name ='targets')

model = tflearn.DNN(convnet, tensorboard_dir ='log')

# Splitting the testing data and training data

train = train_data[:-500]

7
test = train_data[-500:]

'''Setting up the features and labels'''

# X-Features & Y-Labels

X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

Y = np.array([i[1] for i in train])

test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

test_y = np.array([i[1] for i in test])

'''Fitting the data into our model'''

# epoch = 5 taken

model.fit({'input': X}, {'targets': Y}, n_epoch = 5,

validation_set =({'input': test_x}, {'targets': test_y}),

snapshot_step = 500, show_metric = True, run_id = MODEL_NAME)

model.save(MODEL_NAME)

'''Testing the data'''

import matplotlib.pyplot as plt

# if you need to create the data:

# test_data = process_test_data()

# if you already have some saved:

test_data = np.load('test_data.npy')

8
fig = plt.figure()

for num, data in enumerate(test_data[:20]):

# cat: [1, 0]

# dog: [0, 1]

img_num = data[1]

img_data = data[0]

y = fig.add_subplot(4, 5, num + 1)

orig = img_data

data = img_data.reshape(IMG_SIZE, IMG_SIZE, 1)

# model_out = model.predict([data])[0]

model_out = model.predict([data])[0]

if np.argmax(model_out) == 1: str_label ='Dog'

else: str_label ='Cat'

y.imshow(orig, cmap ='gray')

plt.title(str_label)

y.axes.get_xaxis().set_visible(False)

y.axes.get_yaxis().set_visible(False)

9
plt.show()

Output:

Lab Assignment # 2

Write a code on Face detection system with OPEN CV library

# OpenCV program to detect face in real time

# import libraries of python OpenCV

10
# where its functionality resides

import cv2

# load the required trained XML classifiers

# https://github.com/Itseez/opencv/blob/master/

# data/haarcascades/haarcascade_frontalface_default.xml

# Trained XML classifiers describes some features of some

# object we want to detect a cascade function is trained

# from a lot of positive(faces) and negative(non-faces)

# images.

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# https://github.com/Itseez/opencv/blob/master

# /data/haarcascades/haarcascade_eye.xml

# Trained XML file for detecting eyes

eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

# capture frames from a camera

cap = cv2.VideoCapture(0)

# loop runs if capturing has been initialized.

while 1:

11
# reads frames from a camera

ret, img = cap.read()

# convert to gray scale of each frames

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detects faces of different sizes in the input image

faces = face_cascade.detectMultiScale(gray, 1.3, 5)

for (x,y,w,h) in faces:

# To draw a rectangle in a face

cv2.rectangle(img,(x,y),(x+w,y+h),(255,255,0),2)

roi_gray = gray[y:y+h, x:x+w]

roi_color = img[y:y+h, x:x+w]

# Detects eyes of different sizes in the input image

eyes = eye_cascade.detectMultiScale(roi_gray)

#To draw a rectangle in eyes

for (ex,ey,ew,eh) in eyes:

cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,127,255),2)

# Display an image in a window

12
cv2.imshow('img',img)

# Wait for Esc key to stop

k = cv2.waitKey(30) & 0xff

if k == 27:

break

# Close the window

cap.release()

# De-allocate any associated memory usage

cv2.destroyAllWindows()

LAB Assignment #3

13
Write a code on digit Recognition system with CNN

. Importing the Libraries


# Importing Tensorflow and keras
#Keras is built into TF 2.0

import tensorflow as tf

from tensorflow.keras import layers


from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline
np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

#Setting the Theme of the data visualizer Seaborn


sns.set(style="dark",context="notebook",palette="muted")

np.random.seed(2)

from sklearn.model_selection import train_test_split


from sklearn.metrics import confusion_matrix
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

#Setting the Theme of the data visualizer Seaborn


sns.set(style="dark",context="notebook",palette="muted")

14
2. Preparing the Dataset
We have added the data provided from the MNIST Handwritten Digit Recognition
competition. Use the + Add data button on the top right corner to do that. Select
the competition. The data will be added to the kernel.We can find the dataset in the
right panel under Data. You can copy the path of the train and test data from the
right panel.

read_csv is used to return a pandas DataFrame object from conversion of csv file.

Then we select the label column and store it in Y_train which is used for the
training. X_train contains the pixel values of the respective labelled image.

We visualize the total number of data of each class using countplot.Then, we check
for missing values in the dataset:

train = pd.read_csv("../input/digit-recognizer/train.csv")
test = pd.read_csv("../input/digit-recognizer/test.csv")
Y_train = train['label']

#Dropping Label Column


X_train = train.drop(labels=['label'],axis=1)

#free up some space


del train

graph = sns.countplot(Y_train)
Y_train.value_counts()

#Checking for any null or missing values


X_train.isnull().any().describe()

test.isnull().any().describe()

Normalisation
Normalisation is done to reduce the scale of the input values. The pixel value
ranges from 0 to 255 which specify gradient of gray. The CNN will converge more
faster on values 0 to 1 than 0 to 255. So we divide every value by 255 to scale the
data from [0..255] to [0..1]. It helps the model to better learning of features by
decreasing computational complexities if we have data that scales bigger.

15
X_train = X_train/255
test = test/255
Reshape
The array of pixel values are reshaped into a (28,28,1) matrix. We are feeding the
CNN model with input_shape of 28x28x1 matrix.

X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)
Label Encoding
Since the CNN model will give results in a vector of predictions for each classes.
The label (numbers) are encoded into hot vector for prediction by the model. So
that we can train the CNN with the encoded outputs and the parameters are tuned
accordingly

Y_train = tf.keras.utils.to_categorical(Y_train, num_classes=10)


#To enable label into hot vector. For Eg.7 -> [0,0,0,0,0,0,0,1,0,0]
Train and Validation Data Split
We are segmenting the input data for training into two exclusive data namely,
Train and Validation data sets. Train data is used to train the model whereas the
validation data is used for cross verification of the model's accuracy and how well
the model is generalized for the data other than training data. Validation accuracy
and loss will tell us the performance of the model for new data and it will show if
there is overfitting or underfitting situation while model training.

In [ ]:
#Spliting Train and test set
random_seed =2

X_train,X_val,Y_train,Y_val = train_test_split(X_train,Y_train,test_size=0.1,
random_state = random_seed)
In [ ]:
#Show some example

g = plt.imshow(X_train[0][:,:,0])
3. Model Building
Deep Convolutional Neural Network is a network of artificial neural networks. A
model archiecture is the design of the neural networks with which we train the

16
parameters in training process. Here we used LeNet-5 Architecture, it was
proposed by Yann LeCun in 1998. Its pretty popular for its minimal structure and
easy to train nature. LeNet-5 architecture is suitable for recognition and
classification of different classes of objects in small resolution images.
To learn more about CNN architectures and current state-of-the-art architecture,
take a visit here

In [ ]:
#CNN Architecture is In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 ->
#Flatten -> Dense -> Dropout -> Out
model = tf.keras.Sequential()

model.add(layers.Conv2D(filters=32, kernel_size=(5,5), padding='Same',


activation=tf.nn.relu, input_shape = (28,28,1)))
model.add(layers.Conv2D(filters=32, kernel_size=(5,5), padding='Same',
activation=tf.nn.relu))
model.add(layers.MaxPool2D(pool_size=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same',


activation=tf.nn.relu, input_shape = (28,28,1)))
model.add(layers.Conv2D(filters=64, kernel_size=(3,3), padding='Same',
activation=tf.nn.relu))
model.add(layers.MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())

17
model.add(layers.Dense(256,activation=tf.nn.relu))
model.add(layers.Dropout(0.25))
model.add(layers.Dense(10,activation=tf.nn.softmax))
Optimizers and Annealers

 Optimizer is the crucial part in a neural network. Optimizer ensure the model
reaches the optimium faster. RMSProp optimizer makes the model converge
more effectively and faster. It also stricts the model to coverge at global
minimum therefore the accuracy of the model will be higher.
 Setting the learning rate is very important in a Deep Learning algorithm.
Though choosing a good learning rate may yield its goodness, scheduling a
reduction in learning rate while training has a great advantage in
convergence at global minimum. ReduceLRonPlateau makes the Learning
Rate to reduce at a rate by monitoring the learning rate and epoch. It will
greatly help the model to achieve maximum accuracy.
In [ ]:
#Defining Optimizer

optimizer = tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08,


decay=0.0)
In [ ]:
#Compiling Model

model.compile(optimizer = optimizer, loss='categorical_crossentropy',


metrics=["accuracy"])
In [ ]:
#Setting Learning rate annealer

learning_rate_reduction =
tf.keras.callbacks.ReduceLROnPlateau(monitor='val_acc',
patience=3,
verbose=1,
factor=0.5,
min_lr=0.00001)
I have set the epoch to 10 for the purpose of kernel. You can bump it upto 30 to see
a larger accuracy.
In [ ]:
epochs=30
batch_size = 112

18
Data Augmentation
Data Augmentation is the process of creating more dataset for training by
manipulating the given images. In Deep learning, the availability of large dataset is
very vital for the training. Since we have limited real world training samples, we
can use data augementation to create more images for training. Data Augmentation
involves zoom, rotating, flip, crop and other image manipulations over the
available datasets to create further more data for training. Data Augmentation
makes the model to classify more generally.

In [ ]:
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=10, # randomly rotate images in the range (degrees, 0 to
180)
zoom_range = 0.1, # Randomly zoom image
width_shift_range=0.1, # randomly shift images horizontally (fraction of
total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total
height)
horizontal_flip=False, # randomly flip images
vertical_flip=False) # randomly flip images

datagen.fit(X_train)
4. Model Fitting
Model Fitting or Model Training is where we train our model and evaluate the
error parameters. Training process typically take a lot of time when it runs in a
CPU. But the training can be speeeded up with the graphics card that have CUDA
support. Kaggle have inbuilt limited GPU Support be sure to turn it on when
running this cell.

if(tf.test.is_built_with_cuda() == True):
print("CUDA Available.. Just wait a few moments...")
else:

19
print("CUDA not Available.. May the force be with you.")

# Fit the model


history = model.fit_generator(datagen.flow(X_train,Y_train,
batch_size=batch_size),
epochs = epochs, validation_data = (X_val,Y_val),
verbose = 2, steps_per_epoch=X_train.shape[0] // batch_size
, callbacks=[learning_rate_reduction])
5. Analyzing the model
We can analyse the model using various methods. One of them, is learning graph.
Here we plot the losses of both training and validation data in a plot and evaluate it
by looking at the trend. For a ideal model, training and validation loss should be
low and similar.

# Plot the loss and accuracy curves for training and validation
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['accuracy'], color='b', label="Training accuracy")


ax[1].plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)
Confusion Matrix Plotting
Confusion Matrix is another way of model evaluation. It is used for grphical
representation of performance of the model. It shows the performance of Model in
predicting every class. Here you can find the model pretty accurately predict the
relevant classes.

# Look at confusion matrix

def plot_confusion_matrix(cm, classes,


normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
plt.imshow(cm, interpolation='nearest', cmap=cmap)

20
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')

# Predict the values from the validation dataset


Y_pred = model.predict(X_val)
# Convert predictions classes to one hot vectors
Y_pred_classes = np.argmax(Y_pred,axis = 1)
# Convert validation observations to one hot vectors
Y_true = np.argmax(Y_val,axis = 1)
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = range(10))
Important Error
Though we may have very high accuracy, any deep learing model cannot correctly
predict all the images. So we are viewing the important errors done by our model
for perspection.

# Display some error results

# Errors are difference between predicted labels and true labels


errors = (Y_pred_classes - Y_true != 0)

21
Y_pred_classes_errors = Y_pred_classes[errors]
Y_pred_errors = Y_pred[errors]
Y_true_errors = Y_true[errors]
X_val_errors = X_val[errors]

def display_errors(errors_index,img_errors,pred_errors, obs_errors):


""" This function shows 6 images with their predicted and real labels"""
n=0
nrows = 2
ncols = 3
fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
for row in range(nrows):
for col in range(ncols):
error = errors_index[n]
ax[row,col].imshow((img_errors[error]).reshape((28,28)))
ax[row,col].set_title(" Predicted :{} True :
{}".format(pred_errors[error],obs_errors[error]))
n += 1

# Probabilities of the wrong predicted numbers


Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)

# Predicted probabilities of the true values in the error set


true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))

# Difference between the probability of the predicted label and the true label
delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors

# Sorted list of the delta prob errors


sorted_dela_errors = np.argsort(delta_pred_true_errors)

# Top 6 errors
most_important_errors = sorted_dela_errors[-6:]

# Show the top 6 errors


display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors,
Y_true_errors)
6. Predicting the test data
Finally we are predicting the test dataset for the competition afer done training and
performance evaluation. We predict the test data and store it in a csv file for
competition submission.
22
# predict results
results = model.predict(test)

# select the indix with the maximum probability


results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis


= 1)

submission.to_csv("cnn_mnist_datagen.csv",index=False)

LAB ASSIGNMENT# 4

Write a code on image compression and decompression using encoder and


decoder in deep learning using pytorch.

Autoencoders are one of the key elements found in recent times used for such a
task with their simple and intuitive architecture.
Broadly, once an autoencoder is trained, the encoder weights can be sent to the
transmitter side and the decoder weights to the receiver side. This way, the
transmitter side can send data in an encoded format(thus saving them time and
money) while the receiver side can receive the data at much less overhaul. This
article will explore an interesting application of autoencoder, which can be used
for image reconstruction on the famous MNIST digits dataset using the Pytorch
framework in Python.
Autoencoders

23
As shown in the figure below, a very basic autoencoder consists of two main
parts:
1. An Encoder and,
2. A Decoder
Through a series of layers, the encoder takes the input and takes the higher
dimensional data to the latent low dimension representation of the same values.
The decoder takes this latent representation and outputs the reconstructed data.

A basic 2 layer Autoencoder

Stepwise implementation:
Step 1: Loading data and printing some sample images from the training set.
 Initializing Transform: Firstly, we initialize the transform which
would be applied to each entry in the attained dataset. Since Tensors are

24
internal to Pytorch’s functioning, we first convert each item to a tensor
and normalize them to limit the pixel values between 0 & 1. This is
done to make the optimization process easier and faster.
 Downloading Dataset: Then, we download the dataset using
the torchvision.datasets utility and store it on our local machine in the
folder ./MNIST/train and ./MNIST/test for both training and testing
sets. We also convert these datasets into data loaders with batch
sizes equal to 256 for faster learning. The reader is encouraged to play
around with these values and expect consistent results.
 Plotting Dataset: Lastly, we randomly print out 25 images from the
dataset to better view the data we’re dealing with

# Importing the necessary libraries

import numpy as np

import matplotlib.pyplot as plt

import torchvision

import torch

plt.rcParams['figure.figsize'] = 15, 10

# Initializing the transform for the dataset

transform = torchvision.transforms.Compose([

torchvision.transforms.ToTensor(),

torchvision.transforms.Normalize((0.5), (0.5))

])

# Downloading the MNIST dataset

train_dataset = torchvision.datasets.MNIST(

25
root="./MNIST/train", train=True,

transform=torchvision.transforms.ToTensor(),

download=True)

test_dataset = torchvision.datasets.MNIST(

root="./MNIST/test", train=False,

transform=torchvision.transforms.ToTensor(),

download=True)

# Creating Dataloaders from the

# training and testing dataset

train_loader = torch.utils.data.DataLoader(

train_dataset, batch_size=256)

test_loader = torch.utils.data.DataLoader(

test_dataset, batch_size=256)

# Printing 25 random images from the training dataset

random_samples = np.random.randint(

1, len(train_dataset), (25))

for idx in range(random_samples.shape[0]):

plt.subplot(5, 5, idx + 1)

26
plt.imshow(train_dataset[idx][0][0].numpy(), cmap='gray')

plt.title(train_dataset[idx][1])

plt.axis('off')

plt.tight_layout()

plt.show()

Step 2: Initializing the Deep Autoencoder model and other hyperparameters


In this step, we initialize our DeepAutoencoder class, a child class of
the torch.nn.Module. This abstracts away a lot of boilerplate code for us, and
now we can focus on building our model architecture which is as follows:

Model Architecture

As described above, the encoder layers form the first half of the network, i.e.,
from Linear-1 to Linear-7, and the decoder forms the other half from
Linear-10 to Sigmoid-15. We’ve used the torch.nn.Sequential utility for
separating the encoder and decoder from one another. This was done to give a
better understanding of the model’s architecture. After that, we initialize some

27
model hyperparameters such that the training is done for 100 epochs using the
Mean Square Error loss and Adam optimizer for the learning process.

# Creating a DeepAutoencoder class

class DeepAutoencoder(torch.nn.Module):

def __init__(self):

super().__init__()

self.encoder = torch.nn.Sequential(

torch.nn.Linear(28 * 28, 256),

torch.nn.ReLU(),

torch.nn.Linear(256, 128),

torch.nn.ReLU(),

torch.nn.Linear(128, 64),

torch.nn.ReLU(),

torch.nn.Linear(64, 10)

Step 3: Training loop


The training loop iterates for the 100 epochs and does the following things:
 Iterates over each batch and calculates loss between the outputted
image and the original image(which is the output).
 Averages out the loss for each batch and stores images and their
outputs for each epoch.
After the loop ends, we plot out the training loss to better understand the training
process. As we can see, that the loss decreases for each consecutive epoch, and
thus the training can be deemed successful.

# List that will store the training loss

28
train_loss = []

# Dictionary that will store the

# different images and outputs for

# various epochs

outputs = {}

batch_size = len(train_loader)

# Training loop starts

for epoch in range(num_epochs):

# Initializing variable for storing

# loss

running_loss = 0

# Iterating over the training dataset

for batch in train_loader:

# Loading image(s) and

# reshaping it into a 1-d vector

img, _ = batch

29
img = img.reshape(-1, 28*28)

# Generating output

out = model(img)

# Calculating loss

loss = criterion(out, img)

# Updating weights according

# to the calculated loss

optimizer.zero_grad()

loss.backward()

optimizer.step()

# Incrementing loss

running_loss += loss.item()

# Averaging out loss over entire batch

running_loss /= batch_size

train_loss.append(running_loss)

# Storing useful images and

# reconstructed outputs for the last batch

30
outputs[epoch+1] = {'img': img, 'out': out}

# Plotting the training loss

plt.plot(range(1,num_epochs+1),train_loss)

plt.xlabel("Number of epochs")

plt.ylabel("Training Loss")

plt.show()

Output:

Step 4: Visualizing the reconstruction


The best part of this project is that the reader can visualize the reconstruction of
each epoch and understand the iterative learning of the model.
 We firstly plot out the first 5 reconstructed(or outputted images) for
epochs = [1, 5, 10, 50, 100].

31
 Then we also plot the corresponding original images on the bottom
for comparison.
We can see how the reconstruction improves for each epoch and gets very close
to the original by the last epoch.
# Plotting is done on a 7x5 subplot
# Plotting the reconstructed images

# Initializing subplot counter


counter = 1

# Plotting reconstructions
# for epochs = [1, 5, 10, 50, 100]
epochs_list = [1, 5, 10, 50, 100]

# Iterating over specified epochs


for val in epochs_list:

# Extracting recorded information


temp = outputs[val]['out'].detach().numpy()
title_text = f"Epoch = {val}"

# Plotting first five images of the last batch


for idx in range(5):
plt.subplot(7, 5, counter)
plt.title(title_text)
plt.imshow(temp[idx].reshape(28,28), cmap= 'gray')
plt.axis('off')

# Incrementing the subplot counter


counter+=1

# Plotting original images

# Iterating over first five


# images of the last batch
for idx in range(5):

# Obtaining image from the dictionary


val = outputs[10]['img']

# Plotting image

32
plt.subplot(7,5,counter)
plt.imshow(val[idx].reshape(28, 28),
cmap = 'gray')
plt.title("Original Image")
plt.axis('off')

# Incrementing subplot counter


counter+=1

plt.tight_layout()
plt.show()

OUTPUT:

Visualizing the reconstruction from the data collected during the training process

Step 5: Checking performance on the test set.

33
Good practice in machine learning is to check the model’s performance on the
test set also. To do that, we do the following steps:
 Generate outputs for the last batch of the test set.
 Plot the first 10 outputs and corresponding original images for
comparison.
# Dictionary that will store the different

# images and outputs for various epochs

outputs = {}

# Extracting the last batch from the test

# dataset

img, _ = list(test_loader)[-1]

# Reshaping into 1d vector

img = img.reshape(-1, 28 * 28)

# Generating output for the obtained

# batch

out = model(img)

# Storing information in dictionary

outputs['img'] = img

outputs['out'] = out

# Plotting reconstructed images

34
# Initializing subplot counter

counter = 1

val = outputs['out'].detach().numpy()

# Plotting first 10 images of the batch

for idx in range(10):

plt.subplot(2, 10, counter)

plt.title("Reconstructed \n image")

plt.imshow(val[idx].reshape(28, 28), cmap='gray')

plt.axis('off')

# Incrementing subplot counter

counter += 1

# Plotting original images

# Plotting first 10 images

for idx in range(10):

val = outputs['img']

plt.subplot(2, 10, counter)

plt.imshow(val[idx].reshape(28, 28), cmap='gray')

plt.title("Original Image")

plt.axis('off')

35
# Incrementing subplot counter

counter += 1

plt.tight_layout()

plt.show()

OUTPUT:

Verifying performance on the test set

36
LAB ASSIGNMENT # 5

Write a code on predicting Airline Passengers count based on LSTM and


RNN

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM
from sklearn.preprocessing import MinMaxScaler
data = pd.read_csv('../input/air-passengers/AirPassengers.csv')
data.head()

37
Mont #Passengers
h

0 1949-01 112

1 1949-02 118

2 1949-03 132

3 1949-04 129

4 1949-05 121

data.rename(columns={'#Passengers':'passengers'},inplace=True)
data = data['passengers']
data=np.array(data).reshape(-1,1)
plt.plot(data)
[<matplotlib.lines.Line2D at 0x7f3178d033d0>]

38
Feature Scaling
scaler= MinMaxScaler()
data=scaler.fit_transform(data)
train_size=100
test_size=44
train=data[0:train_size,:]
test=data[train_size:,:]
train.shape
(100, 1)
test.shape
(44,1)
def get_data(data, look_back):
dataX, dataY = [], []
for i in range(len(data)-look_back-1):
a = data[i:(i+look_back), 0]
dataX.append(a)
dataY.append(data[i+look_back, 0])
return np.array(dataX), np.array(dataY)
look_back = 1
X_train, y_train = get_data(train, look_back)
X_train.shape
(98, 1)
X_test, y_test = get_data(test, look_back)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

X_train.shape

39
(98, 1, 1)

type(y_test)
numpy.ndarray

Building the LSTM


model = Sequential()
model.add(LSTM(5, input_shape = (1, look_back)))
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
===========================================================
======
lstm (LSTM) (None, 5) 140
_________________________________________________________________
dense (Dense) (None, 1) 6
===========================================================
======
Total params: 146
Trainable params: 146
Non-trainable params: 0
_________________________________________________________________
model.fit(X_train, y_train, epochs=25, batch_size=1)
Train on 98 samples
Epoch 1/25
98/98 [==============================] - 2s 17ms/sample - loss: 0.0425
Epoch 2/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0191
Epoch 3/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0142
Epoch 4/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0129
Epoch 5/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0118
Epoch 6/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0107

40
Epoch 7/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0096
Epoch 8/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0085
Epoch 9/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0074
Epoch 10/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0064
Epoch 11/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0056
Epoch 12/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0046
Epoch 13/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0039
Epoch 14/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0033
Epoch 15/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0029
Epoch 16/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0026
Epoch 17/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0024
Epoch 18/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0023
Epoch 19/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0022
Epoch 20/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0022
Epoch 21/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0022
Epoch 22/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0022
Epoch 23/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0022
Epoch 24/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0022
Epoch 25/25
98/98 [==============================] - 0s 2ms/sample - loss: 0.0021
<tensorflow.python.keras.callbacks.History at 0x7f3178148f10>
y_pred = model.predict(X_test)
scaler.scale_

41
array([0.0019305])
y_pred = scaler.inverse_transform(y_pred)
y_test = y_test.reshape(-1, 1)
y_test = scaler.inverse_transform(y_test)
# plot baseline and predictions
plt.figure(figsize=(14,5))
plt.plot(y_test, label = 'real number of passengers')
plt.plot(y_pred, label = 'predicted number of passengers')
plt.ylabel('# passengers')
plt.legend()
plt.show()

LAB ASSIGNMENT # 6

WAP to develop least square Regression in Python .

Least Squares Regression in Python


Recall that if we enumerate the estimation of the data at each data point, xixi, this
gives us the following system of equations:

42
If the data was absolutely perfect (i.e., no noise), then the estimation function
would go through all the data points, resulting in the following system of
equations:

If we take AA to be as defined previously, this would result in the matrix equation


$Y=Aβ.Y=Aβ.$

However, since the data is not perfect, there will not be an estimation function that
can go through all the data points, and this system will have no solutionno solution.
Therefore, we need to use the least square regression that we derived in the
previous two sections to get a solution.

Consider the artificial data created by x = np.linspace(0, 1, 101)x = np.linspace(0,


1, 101) and y = 1 + x + x * np.random.random(len(x))y = 1 + x + x *
np.random.random(len(x)). Do a least squares regression with an estimation
function defined by y^=α1x+α2y^=α1x+α2. Plot the data points along with the
least squares regression. Note that we
expect α1=1.5α1=1.5 and α2=1.0α2=1.0 based on this data. Due to the random
noise we added into the data, your results maybe slightly different.

Use direct inverse method

import numpy as np
from scipy import optimize

43
import matplotlib.pyplot as plt
plt.style.use('seaborn-poster')

# generate x and y
x = np.linspace(0, 1, 101)
y = 1 + x + x * np.random.random(len(x))

# assemble matrix A
A = np.vstack([x, np.ones(len(x))]).T

# turn y into a column vector


y = y[:, np.newaxis]

# Direct least square regression


alpha = np.dot((np.dot(np.linalg.inv(np.dot(A.T,A)),A.T)),y)
print(alpha)

[[1.459573 ]
[1.02952189]]

# plot the results


plt.figure(figsize = (10,8))
plt.plot(x, y, 'b.')
plt.plot(x, alpha[0]*x + alpha[1], 'r')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

44
In Python, there are many different ways to conduct the least square regression.
For example, we can use packages as numpy, scipy, statsmodels, sklearn and so on
to get a least square solution. Here we will use the above example and introduce
you more ways to do it. Feel free to choose one you like.
Use the pseudoinverse
We talked before that the (ATA)−1AT(ATA)−1AT is called the pseudo-inverse,
therefore, we could use the pinv function in numpy to directly calculate it.

pinv = np.linalg.pinv(A)
alpha = pinv.dot(y)
print(alpha)

[[1.459573 ]
[1.02952189]]

Use numpy.linalg.lstsq
Actually, numpy has already implemented the least square methods that we can just
call the function to get a solution. The function will return more things than the
solution itself, please check the documentation for details.

alpha = np.linalg.lstsq(A, y, rcond=None)[0]


print(alpha)

Use optimize.curve_fit from scipy


This scipy function is actually very powerful, that it can fit not only linear
functions, but many different function forms, such as non-linear function. Here we
will show the linear example from above. Note that, using this function, we don’t
need to turn y into a column vector.

# generate x and y

45
x = np.linspace(0, 1, 101)
y = 1 + x + x * np.random.random(len(x))

def func(x, a, b):


y = a*x + b
return y

alpha = optimize.curve_fit(func, xdata = x, ydata = y)[0]


print(alpha)

[1.44331612 1.0396133 ]

LAB ASSIGNMENT # 7

WAP to perform Exploratory Data Analysis on Any Given Data Set breast
cancer dataset from Sklearn.

import sklearn.datasets
import numpy as np
breast_cancer=sklearn.datasets.load_breast_cancer()
x= breast_cancer.data
y=breast_cancer.target

x=breast_cancer.data

46
y=breast_cancer.target

print(x)
print(y)

print(x.shape,y.shape)
(569, 30) (569,)

import pandas as pd

data=pd.DataFrame(breast_cancer.data,columns=breast_cancer.feature_names)

data['class'] = breast_cancer.target

data.head()

data.describe()

47
print(data['class'].value_counts())

1 357
0 212
Name: class, dtype: int64 CodeText

print(breast_cancer.target_names)

['malignant' 'benign']

data.groupby('class').mean()

from sklearn.model_selection import train_test_split

X= data.drop('class',axis=1)
Y=data['class']

type(X)

pandas.core.frame.DataFrame

X_train, X_test, Y_train,Y_test=train_test_split(X,Y)

48
print(X.shape,X_train.shape,X_test.shape)

(569, 30) (426, 30) (143, 30)


CodeText

print(Y.shape,Y_train.shape,Y_test.shape)

(569,) (426,) (143,)

X_train, X_test, Y_train,Y_test=train_test_split(X,Y,test_size=0.1)

print(X.shape,X_train.shape,X_test.shape)

(569, 30) (512, 30) (57, 30)

print(Y.shape,Y_train.shape,Y_test.shape)

(569,) (512,) (57,)

print(Y.mean(),Y_train.mean(),Y_test.mean())

0.6274165202108963 0.6328125 0.5789473684210527

X_train, X_test, Y_train,Y_test=train_test_split(X,Y,test_size=0.1, stratify=Y)

print(Y.mean(),Y_train.mean(),Y_test.mean())

0.6274165202108963 0.626953125 0.631578947368421

X_train, X_test, Y_train,Y_test=train_test_split(X,Y,test_size=0.1, stratify=Y,rand


om_state=1)

print(Y.mean(),Y_train.mean(),Y_test.mean())

49
0.6274165202108963 0.626953125 0.631578947368421

1. Introduction:

50
We aim to accomplist the following for this study:
Identify and visualize which factors contribute to customer churn:
Build a prediction model that will perform the following:

 Classify if a customer is going to churn or not


 Preferably and based on model performance, choose a model that will
attach a probability to the churn to make it easier for customer service to
target low hanging fruits in their efforts to prevent churn.

2. Data set review & preparation


In this section we will seek to explore the structure of our data:

1. To understand the input space the data set


2. And to prepare the sets for exploratory and prediction tasks as described in
section 1

## REQUIRED LIBRARIES# For data wrangling import numpy as npimport


pandas as pd

# For visualization

import matplotlib.pyplot as plt

%matplotlib inline

import seaborn as sns

pd.options.display.max_rows = None

pd.options.display.max_columns = None

# Read the data frame

51
df = pd.read_csv('../input/Churn_Modelling.csv', delimiter=',')
df.shape
(10000, 14)
The Df has 1000 rows with 14 attributes. We review this further to identify what
attributes will be necessary and what data manipulation needs to be carried out
before Exploratory analysis and prediction modelling

# Check columns list and missing valuesdf.isnull().sum()


RowNumber 0
CustomerId 0
Surname 0
CreditScore 0
Geography 0
Gender 0
Age 0
Tenure 0
Balance 0
NumOfProducts 0
HasCrCard 0
IsActiveMember 0
EstimatedSalary 0
Exited 0
dtype: int64
Get unique count for each variabledf.nunique()

RowNumber 10000
CustomerId 10000
Surname 2932
CreditScore 460
Geography 3
Gender 2
Age 70
Tenure 11
Balance 6382
NumOfProducts 4

52
HasCrCard 2
IsActiveMember 2
EstimatedSalary 9999
Exited 2
dtype: int64

From the above, we will not require the first 2 attributes as the are specific to a
customer. It is borderline with the surname as this would result to profiling so we
exclude this as well.

# Drop the columns as explained abovedf = df.drop(["RowNumber",


"CustomerId", "Surname"], axis = 1)

# Review the top rows of what is left of the data framedf.head()


From the above, a couple of question linger:

1. The data appears to be a snapshot as some point in time e.g. the balance is for a
given date which leaves a lot of questions:
 What date is it and of what relevance is this date

 Would it be possible to obtain balances over a period of time as opposed to


a single date.
2. There are customers who have exited but still have a balance in their account!
What would this mean? Could they have exited from a product and not the
bank?
3. What does being an active member mean and are there difference degrees to it?
Could it be better to provide transaction count both in terms of credits and
debits to the account instead?
4. A break down to the products bought into by a customer could provide more
information topping listing of product count
5. For this exercise, we proceed to model without context even though typically
having context and better understanding of the data extraction process would

53
give better insight and possibly lead to better and contextual results of the
modelling process
# Check variable data typesdf.dtypes
CreditScore int64
Geography object
Gender object
Age int64
Tenure int64
Balance float64
NumOfProducts int64
HasCrCard int64
IsActiveMember int64
EstimatedSalary float64
Exited int64
dtype: object
So we moslty have categorical variables and 5 continuous variable
3. Exploratory Data Analysis
labels = 'Exited', 'Retained'sizes = [df.Exited[df['Exited']==1].count(),
df.Exited[df['Exited']==0].count()]explode = (0, 0.1)fig1, ax1 =
plt.subplots(figsize=(10, 8))ax1.pie(sizes, explode=explode, labels=labels,
autopct='%1.1f%%',
shadow=True, startangle=90)ax1.axis('equal')plt.title("Proportion of customer
churned and retained", size = 20)plt.show()

54
So about 20% of the customers have churned. So the baseline model could be to
predict that 20% of the customers will churn. Given 20% is a small number, we
need to ensure that the chosen model does predict with great accuracy this 20% as
it is of interest to the bank to identify and keep this bunch as opposed to accurately
predicting the customers that are retained.

# We first review the 'Status' relation with categorical variables


fig, axarr = plt.subplots(2, 2, figsize=(20, 12))
sns.countplot(x='Geography', hue = 'Exited',data = df, ax=axarr[0][0])
sns.countplot(x='Gender', hue = 'Exited',data = df, ax=axarr[0][1])
sns.countplot(x='HasCrCard', hue = 'Exited',data = df, ax=axarr[1][0])
sns.countplot(x='IsActiveMember', hue = 'Exited',data = df, ax=axarr[1][1])
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f1a5d3f2780>
# Relations based on the continuous data attributes
fig, axarr = plt.subplots(3, 2, figsize=(20, 12))

55
sns.boxplot(y='CreditScore',x = 'Exited', hue = 'Exited',data = df, ax=axarr[0]
[0])
sns.boxplot(y='Age',x = 'Exited', hue = 'Exited',data = df , ax=axarr[0][1])
sns.boxplot(y='Tenure',x = 'Exited', hue = 'Exited',data = df, ax=axarr[1][0])
sns.boxplot(y='Balance',x = 'Exited', hue = 'Exited',data = df, ax=axarr[1][1])
sns.boxplot(y='NumOfProducts',x = 'Exited', hue = 'Exited',data = df,
ax=axarr[2][0])
sns.boxplot(y='EstimatedSalary',x = 'Exited', hue = 'Exited',data = df,
ax=axarr[2][1])
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f1a5ca8bc18>

4. Feature engineering
We seek to add features that are likely to have an impact on the probability of churning. We first split the train and test sets

In [11]:

# Split Train, test datadf_train = df.sample(frac=0.8,random_state=200)df_test =


df.drop(df_train.index)print(len(df_train))print(len(df_test))
8000

2000

df_train['BalanceSalaryRatio'] =
df_train.Balance/df_train.EstimatedSalarysns.boxplot(y='BalanceSalaryRatio',x =
'Exited', hue = 'Exited',data = df_train)plt.ylim(-1, 5)
Out[12]:
(-1, 5)

we have seen that the salary has little effect on the chance of a customer churning. However as seen above, the ratio of the bank balance and the
estimated salary indicates that customers with a higher balance salary ratio churn more which would be worrying to the bank as this impacts their
source of loan capital.

In [13]:

# Given that tenure is a 'function' of age, we introduce a variable aiming to


standardize tenure over age:df_train['TenureByAge'] =
df_train.Tenure/(df_train.Age)sns.boxplot(y='TenureByAge',x = 'Exited', hue =
'Exited',data = df_train)plt.ylim(-1, 1)plt.show()

56
''Lastly we introduce a variable to capture credit score given age to take into
account credit behaviour visavis adult life:-)'''df_train['CreditScoreGivenAge'] =
df_train.CreditScore/(df_train.Age)

# Resulting Data Framedf_train.head()


5. Data prep for model fitting

# Arrange columns by data type for easier manipulationcontinuous_vars =


['CreditScore', 'Age', 'Tenure', 'Balance','NumOfProducts', 'EstimatedSalary',
'BalanceSalaryRatio',

'TenureByAge','CreditScoreGivenAge']cat_vars = ['HasCrCard',
'IsActiveMember','Geography', 'Gender']df_train = df_train[['Exited'] +
continuous_vars + cat_vars]df_train.head()
'''For the one hot variables, we change 0 to -1 so that the models can capture a
negative relation where the attribute in inapplicable instead of
0'''df_train.loc[df_train.HasCrCard == 0, 'HasCrCard'] = -
1df_train.loc[df_train.IsActiveMember == 0, 'IsActiveMember'] = -
1df_train.head()
# One hot encode the categorical variableslst = ['Geography', 'Gender']remove =
list()for i in lst:
if (df_train[i].dtype == np.str or df_train[i].dtype == np.object):
for j in df_train[i].unique():
df_train[i+'_'+j] = np.where(df_train[i] == j,1,-1)
remove.append(i)df_train = df_train.drop(remove, axis=1)df_train.head()

57
# minMax scaling the continuous variablesminVec =
df_train[continuous_vars].min().copy()maxVec =
df_train[continuous_vars].max().copy()df_train[continuous_vars] =
(df_train[continuous_vars]-minVec)/(maxVec-minVec)df_train.head()
# data prep pipeline for test datadef
DfPrepPipeline(df_predict,df_train_Cols,minVec,maxVec):
# Add new features
df_predict['BalanceSalaryRatio'] =
df_predict.Balance/df_predict.EstimatedSalary
df_predict['TenureByAge'] = df_predict.Tenure/(df_predict.Age - 18)
df_predict['CreditScoreGivenAge'] = df_predict.CreditScore/(df_predict.Age -
18)
# Reorder the columns
continuous_vars =
['CreditScore','Age','Tenure','Balance','NumOfProducts','EstimatedSalary','Balance
SalaryRatio',
'TenureByAge','CreditScoreGivenAge']
cat_vars = ['HasCrCard','IsActiveMember',"Geography", "Gender"]
df_predict = df_predict[['Exited'] + continuous_vars + cat_vars]
# Change the 0 in categorical variables to -1
df_predict.loc[df_predict.HasCrCard == 0, 'HasCrCard'] = -1
df_predict.loc[df_predict.IsActiveMember == 0, 'IsActiveMember'] = -1
# One hot encode the categorical variables
lst = ["Geography", "Gender"]
remove = list()
for i in lst:
for j in df_predict[i].unique():

df_predict[i+'_'+j] = np.where(df_predict[i] == j,1,-1)


remove.append(i)
df_predict = df_predict.drop(remove, axis=1)
# Ensure that all one hot encoded variables that appear in the train data appear
in the subsequent data
L = list(set(df_train_Cols) - set(df_predict.columns))
for l in L:
df_predict[str(l)] = -1
# MinMax scaling coontinuous variables based on min and max from the train
data
df_predict[continuous_vars] = (df_predict[continuous_vars]-minVec)/(maxVec-
minVec)
# Ensure that The variables are ordered in the same way as was ordered in the
train set

58
df_predict = df_predict[df_train_Cols]
return df_predict
6. Model fitting and selection
For the model fitting, I will try out the following

 Logistic regression in the primal space and with different kernels


 SVM in the primal and with different Kernels

 Ensemble models

# Support functions
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from scipy.stats import uniform
# Fit models

from sklearn.linear_model import LogisticRegression


from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
Scoring functions
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# Function to give best model score and parametersdef best_model(model):


print(model.best_score_)
print(model.best_params_)
print(model.best_estimator_)def get_auc_scores(y_actual, method,method2):
auc_score = roc_auc_score(y_actual, method);
fpr_df, tpr_df, _ = roc_curve(y_actual, method2);
return (auc_score, fpr_df, tpr_df)
# Fit primal logistic regressionparam_grid = {'C': [0.1,0.5,1,10,50,100],
'max_iter': [250], 'fit_intercept':[True],'intercept_scaling':[1],
'penalty':['l2'], 'tol':[0.00001,0.0001,0.000001]}log_primal_Grid =
GridSearchCV(LogisticRegression(solver='lbfgs'),param_grid, cv=10, refit=True,
verbose=0)log_primal_Grid.fit(df_train.loc[:, df_train.columns !=
'Exited'],df_train.Exited)best_model(log_primal_Grid)
0.815125

59
{'C': 100, 'fit_intercept': True, 'intercept_scaling': 1, 'max_iter': 250, 'penalty': 'l2',
'tol': 1e-05}
LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=250,multi_class='warn',
n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
tol=1e-05, verbose=0, warm_start=False)

# Fit logistic regression with degree 2 polynomial kernelparam_grid = {'C':


[0.1,10,50], 'max_iter': [300,500], 'fit_intercept':[True],'intercept_scaling':
[1],'penalty':['l2'],
'tol':[0.0001,0.000001]}poly2 =
PolynomialFeatures(degree=2)df_train_pol2 = poly2.fit_transform(df_train.loc[:,
df_train.columns != 'Exited'])log_pol2_Grid =
GridSearchCV(LogisticRegression(solver = 'liblinear'),param_grid, cv=5,
refit=True,
verbose=0)log_pol2_Grid.fit(df_train_pol2,df_train.Exited)best_model(log_pol2
_Grid)
0.855625
{'C': 10, 'fit_intercept': True, 'intercept_scaling': 1, 'max_iter': 300, 'penalty': 'l2',
'tol': 0.0001}
LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=300, multi_class='warn',
n_jobs=None, penalty='l2', random_state=None, solver='liblinear',
tol=0.0001, verbose=0, warm_start=False)

# Fit SVM with RBF Kernelparam_grid = {'C': [0.5,100,150], 'gamma':


[0.1,0.01,0.001],'probability':[True],'kernel': ['rbf']}SVM_grid =
GridSearchCV(SVC(), param_grid, cv=3, refit=True,
verbose=0)SVM_grid.fit(df_train.loc[:, df_train.columns !=
'Exited'],df_train.Exited)best_model(SVM_grid)

60
61

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy