0% found this document useful (0 votes)
3 views

Chenhao_HW1

ISYE6501 week 1 homework

Uploaded by

stonemmc
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Chenhao_HW1

ISYE6501 week 1 homework

Uploaded by

stonemmc
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

5/21/24, 12:16 AM Chenhao_HW1

Chenhao_HW1
2024-05-20
Week 1 Homework submission

First I will import the credit card data and convert this into a matrix

library(kernlab)
library(kknn)

cc_data <- read.table("credit_card_data.txt")


View(cc_data)

head(cc_data)

## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1 30.83 0.000 1.25 1 0 1 1 202 0 1
## 2 0 58.67 4.460 3.04 1 0 6 1 43 560 1
## 3 0 24.50 0.500 1.50 1 1 0 1 280 824 1
## 4 1 27.83 1.540 3.75 1 0 5 0 100 3 1
## 5 1 20.17 5.625 1.71 1 1 0 1 120 0 1
## 6 1 32.08 4.000 2.50 1 1 0 0 360 0 1

#convert data to matrix


matrix <- data.matrix(cc_data)

For 2.2.1 I basically use the sample code provided and here we will use the vanilladot kernel with C value equals
to 100

# call ksvm. Vanilladot is a simple linear kernel.


model <- ksvm(matrix[,1:10],matrix[,11],type="C-svc",kernel="vanilladot",C=100,scaled=TRUE)

## Setting default kernel parameters

# calculate a1…am
a <- colSums(model@xmatrix[[1]] * model@coef[[1]])
a

## V1 V2 V3 V4 V5
## -0.0010065348 -0.0011729048 -0.0016261967 0.0030064203 1.0049405641
## V6 V7 V8 V9 V10
## -0.0028259432 0.0002600295 -0.0005349551 -0.0012283758 0.1063633995

file:///C:/Users/quant/iCloudDrive/Documents/OMSA/2024/ISYE6501/Week1/HW1.html 1/5
5/21/24, 12:16 AM Chenhao_HW1

# calculate a0
a0 <- -model@b
a0

## [1] 0.08158492

# see what the model predicts


pred <- predict(model,matrix[,1:10])
pred

## [1] 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
## [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [260] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [297] 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [482] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
## [556] 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

# see what fraction of the model’s predictions match the actual classification
sum(pred == matrix[,11]) / nrow(matrix)

## [1] 0.8639144

For question 3 in 2.2 i will use the kknn classifier, here i will create a function to pass in the value of k

#question 2.2.3
#use kknn to predict
knnPred <- rep(0,nrow(cc_data))
#create a function to pass value of k to the model and return accuracy
choose_K <- function(j){
for (i in 1:nrow(cc_data)){
model_knn <- kknn(V11~., cc_data[-i, ],cc_data[i, ],k=j,scale=TRUE)
knnPred[i] <- as.integer(fitted(model_knn)+0.5)
}
accuracy = sum(knnPred == cc_data[,11])/nrow(cc_data)
return(accuracy)
}

file:///C:/Users/quant/iCloudDrive/Documents/OMSA/2024/ISYE6501/Week1/HW1.html 2/5
5/21/24, 12:16 AM Chenhao_HW1

#create a list to store k value from 1 to 20, in total 20 values


knn_Result <- rep(0,20)
k_Value <- seq(1,20,by=1)
for (i in k_Value[1]:length(k_Value)){
knn_Result[i] = choose_K(i)
print (paste0("Accuracy with K as ",i, " is ",knn_Result[i]))
}

## [1] "Accuracy with K as 1 is 0.814984709480122"


## [1] "Accuracy with K as 2 is 0.814984709480122"
## [1] "Accuracy with K as 3 is 0.814984709480122"
## [1] "Accuracy with K as 4 is 0.814984709480122"
## [1] "Accuracy with K as 5 is 0.851681957186544"
## [1] "Accuracy with K as 6 is 0.845565749235474"
## [1] "Accuracy with K as 7 is 0.847094801223242"
## [1] "Accuracy with K as 8 is 0.848623853211009"
## [1] "Accuracy with K as 9 is 0.847094801223242"
## [1] "Accuracy with K as 10 is 0.850152905198777"
## [1] "Accuracy with K as 11 is 0.851681957186544"
## [1] "Accuracy with K as 12 is 0.853211009174312"
## [1] "Accuracy with K as 13 is 0.851681957186544"
## [1] "Accuracy with K as 14 is 0.851681957186544"
## [1] "Accuracy with K as 15 is 0.853211009174312"
## [1] "Accuracy with K as 16 is 0.851681957186544"
## [1] "Accuracy with K as 17 is 0.851681957186544"
## [1] "Accuracy with K as 18 is 0.851681957186544"
## [1] "Accuracy with K as 19 is 0.850152905198777"
## [1] "Accuracy with K as 20 is 0.850152905198777"

#show accuracy of different k value

print (paste0("The K value with highest accuracy is ",which.max(knn_Result)))

## [1] "The K value with highest accuracy is 12"

For question 3.1 i choose a bigger Kmax and just use the train.kknn to perform the cross validation method

#question 3.1.a
#this time we are going to try k value from 1 to 80
#training kknn via leave-one-out cross validation method

k_cv <- 80

kcvmodel <- train.kknn(V11~., cc_data,kmax =k_cv, kernel='optimal', scale=TRUE)


kcvpred <- as.integer(predict(kcvmodel,cc_data)+0.5)
accuracy_cv = sum(kcvpred == cc_data[,11])/nrow(cc_data)

kcvmodel

file:///C:/Users/quant/iCloudDrive/Documents/OMSA/2024/ISYE6501/Week1/HW1.html 3/5
5/21/24, 12:16 AM Chenhao_HW1

##
## Call:
## train.kknn(formula = V11 ~ ., data = cc_data, kmax = k_cv, kernel = "optimal", scale = TR
UE)
##
## Type of response variable: continuous
## minimal mean absolute error: 0.1850153
## Minimal mean squared error: 0.1073792
## Best kernel: optimal
## Best k: 58

Lastly on question 3.1.b, i use sample method to create the 60,20,20 split for the credit card data, and then train
the same ksvm model as in 2.1 on the training data set, then use this to predict the validation dataset

#question 3.1.b

set.seed(6)

#create the split of the train, validation (60,20,20 split), test using sample function

key <- sample(seq(1, 3), size = nrow(cc_data), replace = TRUE, prob = c(0.6, 0.2, 0.2))
train <- cc_data[key == 1,]
validation <- cc_data[key == 2,]
test <- cc_data[key == 3,]

set.seed(6)
#train on the training data set with C=100
ksvm_model <- ksvm(as.matrix(train[,1:10]),
train[,11],
type="C-svc",
kernel="vanilladot",
C=100,
scaled=TRUE)

## Setting default kernel parameters

#predict on the validation data set


ksvm_prediction <- predict(ksvm_model, validation[,1:10])
ksvm_accuracy <- sum(ksvm_prediction == validation[,11])/nrow(validation)
ksvm_accuracy

## [1] 0.8671875

#predict on the test data set


ksvm_prediction1 <- predict(ksvm_model, test[,1:10])
ksvm_accuracy1 <- sum(ksvm_prediction1 == test[,11])/nrow(test)
ksvm_accuracy1

file:///C:/Users/quant/iCloudDrive/Documents/OMSA/2024/ISYE6501/Week1/HW1.html 4/5
5/21/24, 12:16 AM Chenhao_HW1

## [1] 0.9206349

Including Plots
Below is plot for 2.2.3

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that
generated the plot.

file:///C:/Users/quant/iCloudDrive/Documents/OMSA/2024/ISYE6501/Week1/HW1.html 5/5

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy