Ensembling

Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

10/19/24, 8:33 AM 178_ensembling.

ipynb - Colab

Bagging

import pandas as pd
from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier

# Column names for the dataset


names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

# Load the dataset


dataframe = pd.read_csv('pima-indians-diabetes.csv', names=names)

# Split the dataset into input features (X) and target (Y)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]

# Parameters for the RandomForestClassifier


num_trees = 100
max_features = 3

# Define the KFold cross-validator (without specifying random_state)


kfold = model_selection.KFold(n_splits=10)

# Define the RandomForest model


model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)

# Evaluate the model using cross-validation


results = model_selection.cross_val_score(model, X, Y, cv=kfold)

# Output the mean accuracy from cross-validation


print(results.mean())

0.7668831168831168

Boosting

import pandas as pd
from sklearn import model_selection
from sklearn.ensemble import AdaBoostClassifier

# Column names for the dataset


names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

# Load the dataset


dataframe = pd.read_csv('pima-indians-diabetes.csv', names=names)

# Split the dataset into input features (X) and target (Y)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]

# Parameters for AdaBoostClassifier


num_trees = 30

# Define the KFold cross-validator


kfold = model_selection.KFold(n_splits=10)

# Define the AdaBoost model, using the 'SAMME' algorithm


model = AdaBoostClassifier(n_estimators=num_trees, algorithm='SAMME')

# Evaluate the model using cross-validation


results = model_selection.cross_val_score(model, X, Y, cv=kfold)

# Output the mean accuracy from cross-validation


print(results.mean())

0.7656185919343814

Stacking

from sklearn import datasets


from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
f kl li d l i t L i ti R i
https://colab.research.google.com/drive/1JcAK1W_kTIziCFGcxTZWg1rV2gJNRipB#scrollTo=MepCSElnuFeM&printMode=true 1/2
10/19/24, 8:33 AM 178_ensembling.ipynb - Colab
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
import xgboost
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Load the breast cancer dataset


df = datasets.load_breast_cancer()

# Extract features and target


X = pd.DataFrame(columns=df.feature_names, data=df.data)
y = df.target

# Check for null values


X.isnull().sum()

# Define individual classifiers


dtc = DecisionTreeClassifier()
rfc = RandomForestClassifier()
knn = KNeighborsClassifier()
xgb = xgboost.XGBClassifier()

# List of classifiers for cross-validation


stacking = [dtc, rfc, knn, xgb]

# Evaluate each classifier


for i in stacking:
score = cross_val_score(i, X, y, cv=5, scoring='accuracy')
print("The accuracy score of {} is:".format(i.__class__.__name__), score.mean())

# Define a stacking classifier with LogisticRegression as the final estimator


clf = [('dtc', dtc), ('rfc', rfc), ('knn', knn), ('xgb', xgb)] # list of (str, estimator)
lr = LogisticRegression()
stack_model = StackingClassifier(estimators=clf, final_estimator=lr)

# Evaluate the stacking model


score = cross_val_score(stack_model, X, y, cv=5, scoring='accuracy')
print("The accuracy score of the stacking model is:", score.mean())

The accuracy score of DecisionTreeClassifier is: 0.9156342182890856


The accuracy score of RandomForestClassifier is: 0.95960254618848
The accuracy score of KNeighborsClassifier is: 0.9279459711224964
The accuracy score of XGBClassifier is: 0.9701288619779538
The accuracy score of the stacking model is: 0.9683744760130415

https://colab.research.google.com/drive/1JcAK1W_kTIziCFGcxTZWg1rV2gJNRipB#scrollTo=MepCSElnuFeM&printMode=true 2/2

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy