assignment 4 solution
assignment 4 solution
assignment 4 solution
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import seaborn as sns
# Load the dataset
file_path = '/wine.csv'
wine = pd.read_csv(file_path)
# Prepare the data
X = wine.drop('Wine', axis=1)
y = wine['Wine']
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,
test_size=0.2, random_state=42)
# Initialize the models
models = {
'Logistic Regression': LogisticRegression(max_iter=10000),
'Decision Tree': DecisionTreeClassifier(),
'Naive Bayes': GaussianNB(),
'SVM': SVC(),
'ANN': MLPClassifier(max_iter=10000)
}
# Perform cross-validation and store the results
results = {}
for model_name, model in models.items():
cv_scores = cross_val_score(model, X_train, y_train, cv=10,
scoring='accuracy')
results[model_name] = cv_scores
# Create a DataFrame to hold the results
results_df = pd.DataFrame(results)
# Plot the results using a boxplot
plt.figure(figsize=(12, 8))
sns.boxplot(data=results_df)
plt.title('Comparison of ML Models')
plt.ylabel('Accuracy')
plt.xlabel('Model')
plt.show()
results_df.describe()
Based on the mean, Logistic Regression is the best algorithm.