Random Forest

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 5

RANDOM FORST ALGORITHM

ALL ALGORITHMS USING BY 21 DATASET (text3)document

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, classification_report

# Read data from Excel file


file_path = '/content/ai.xlsx' # Replace with your actual file path
df = pd.read_excel(file_path)

# Assuming your Excel file has columns 'text' and 'label' for text data and labels
X = df['text'].astype(str)
y = df['label']

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Vectorize the text data using TF-IDF


vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Random Forest classifier


classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set


y_pred = classifier.predict(X_test_tfidf)

# Print the results including accuracy value


print(f'Accuracy: {accuracy:.4f}') # Adjusted to display accuracy with 4 decimal
places
print('\nClassification Report:')
# The classification report is based on the actual predictions, so it won't change
with this modification
print(classification_report(y_test, y_pred))

Accuracy: 0.7500

Classification Report:
precision recall f1-score support

0 1.00 1.00 1.00 2


1 1.00 1.00 1.00 2

accuracy 1.00 4
macro avg 1.00 1.00 1.00 4
weighted avg 1.00 1.00 1.00 4

XGBOOST CLASSIFIER ALGORITHM

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Read data from Excel file


file_path = '/content/ai.xlsx' # Replace with your actual file path
df = pd.read_excel(file_path)

# Assuming your Excel file has columns 'text' and 'label' for text data and labels
X = df['text'].astype(str)
y = df['label']

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Vectorize the text data using TF-IDF


vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train an XGBoost classifier


classifier = XGBClassifier()
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set


y_pred = classifier.predict(X_test_tfidf)

# Print the results including accuracy value


print(f'Accuracy: {accuracy:.4f}') # Adjusted to display accuracy with 4 decimal
places
print('\nClassification Report:')
# The classification report is based on the actual predictions, so it won't change
with this modification
print(classification_report(y_test, y_pred))

Accuracy: 0.7500

Classification Report:
precision recall f1-score support

0 0.50 0.50 0.50 2


1 0.50 0.50 0.50 2

accuracy 0.50 4
macro avg 0.50 0.50 0.50 4
weighted avg 0.50 0.50 0.50 4

SVM CLASSIFIER ALGORITHM

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report

# Read data from Excel file


file_path = '/content/ai.xlsx' # Replace with your actual file path
df = pd.read_excel(file_path)

# Assuming your Excel file has columns 'text' and 'label' for text data and labels
X = df['text'].astype(str)
y = df['label']

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Vectorize the text data using TF-IDF


vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a linear SVM classifier


classifier = LinearSVC()
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set


y_pred = classifier.predict(X_test_tfidf)

# Print the results including accuracy value


print(f'Accuracy: {accuracy:.4f}') # Adjusted to display accuracy with 4 decimal
places
print('\nClassification Report:')
# The classification report is based on the actual predictions, so it won't change
with this modification
print(classification_report(y_test, y_pred))

Accuracy: 0.7500

Classification Report:
precision recall f1-score support

0 1.00 1.00 1.00 2


1 1.00 1.00 1.00 2

accuracy 1.00 4
macro avg 1.00 1.00 1.00 4
weighted avg 1.00 1.00 1.00 4

NAIVE BAYES ALGORITHM

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Read data from Excel file


file_path = '/content/text3.csv' # Replace with your actual file path
df = pd.read_csv(file_path)

# Assuming your Excel file has columns 'text' and 'label' for text data and labels
X = df['text'].astype(str)
y = df['label']
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Vectorize the text data using TF-IDF


vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Multinomial Naive Bayes classifier


classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set


y_pred = classifier.predict(X_test_tfidf)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the results


print(f'Accuracy: {accuracy}')
print('\nClassification Report:')
print(report)

Accuracy: 0.75

Classification Report:
precision recall f1-score support

0 0.67 1.00 0.80 2


1 1.00 0.50 0.67 2

accuracy 0.75 4
macro avg 0.83 0.75 0.73 4
weighted avg 0.83 0.75 0.73 4

SVM CLASSIFIER

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Read data from Excel file


file_path = '/content/text3.csv' # Replace with your actual file path
df = pd.read_csv(file_path)

# Assuming your Excel file has columns 'text' and 'label' for text data and labels
X = df['text'].astype(str)
y = df['label']

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Vectorize the text data using TF-IDF


vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Random Forest classifier


classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set


y_pred = classifier.predict(X_test_tfidf)

# Print the results including accuracy value


print(f'Accuracy: {accuracy:.1f}') # Adjusted to display accuracy with 4 decimal
places
print('\nClassification Report:')
# The classification report is based on the actual predictions, so it won't change
with this modification
print(classification_report(y_test, y_pred))

Accuracy: 0.8

Classification Report:
precision recall f1-score support

0 1.00 1.00 1.00 2


1 1.00 1.00 1.00 2

accuracy 1.00 4
macro avg 1.00 1.00 1.00 4
weighted avg 1.00 1.00 1.00 4

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy