0% found this document useful (0 votes)
10K views7 pages

Coe Projects

This document contains code snippets for various predictive analytics and machine learning tasks, including sales forecasting using linear regression, sentiment analysis on social media data, disease prediction using random forest classification on healthcare data, recommendation systems for e-commerce using collaborative filtering, text classification for sentiment analysis using naive bayes, and time series analysis for stock price prediction using random forest regression. The code includes data loading, preprocessing, model training, evaluation, and visualization steps.

Uploaded by

tApIsH
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10K views7 pages

Coe Projects

This document contains code snippets for various predictive analytics and machine learning tasks, including sales forecasting using linear regression, sentiment analysis on social media data, disease prediction using random forest classification on healthcare data, recommendation systems for e-commerce using collaborative filtering, text classification for sentiment analysis using naive bayes, and time series analysis for stock price prediction using random forest regression. The code includes data loading, preprocessing, model training, evaluation, and visualization steps.

Uploaded by

tApIsH
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

coe-projects

February 8, 2024

[ ]: # Predictive Analytics for Sales Forecasting

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# load data
sales_data = pd.read_csv('data.csv')
print(sales_data.head())
features = sales_data[['Feature1', 'Feature2', '...']]
target = sales_data['Sales']

# Split data into train and test sets


X_train, X_test, y_train, y_test = train_test_split(features, target,␣
↪test_size=0.2, random_state=42)

# linear regression model


model = LinearRegression()

# Training the model


model.fit(X_train, y_train)

#using Predictive modelling for estimate predictions


predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')

# Visualizing with matplotlib


plt.scatter(X_test, y_test, color='blue', label='Actual Sales')
plt.scatter(X_test, predictions, color='red', label='Predicted Sales')
plt.xlabel('Feature')
plt.ylabel('Sales')
plt.legend()
plt.show()

1
[ ]: #sentiment analysis on social media data

from textblob import TextBlob

# Sample social media comments/reviews


comments = [
"I love this product! It's amazing!",
"The service was terrible, never using it again.",
"I'm not sure about this brand, needs improvement.",
"Best experience ever, highly recommend!"
]

# Perform sentiment analysis on each comment


for comment in comments:
blob = TextBlob(comment)
sentiment = blob.sentiment.polarity
if sentiment > 0:
print(f"'{comment}' - Positive")
elif sentiment < 0:
print(f"'{comment}' - Negative")
else:
print(f"'{comment}' - Neutral")

'I love this product! It's amazing!' - Positive


'The service was terrible, never using it again.' - Negative
'I'm not sure about this brand, needs improvement.' - Negative
'Best experience ever, highly recommend!' - Positive

[ ]: # health care Analytics disease

# important libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# libraries for modeling & prediction


from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer

# Loading file
health_data = pd.read_csv('data.csv')
print(health_data.head())

2
# missing values ko handle kri using SimpleImputer
imputer = SimpleImputer(strategy='mean')
health_data = pd.DataFrame(imputer.fit_transform(health_data),␣
↪columns=health_data.columns)

# relevant features for the model


features = health_data[['Feature1', 'Feature2', '...']]
target = health_data['Disease']

# Split data into train and test sets


X_train, X_test, y_train, y_test = train_test_split(features, target,␣
↪test_size=0.2, random_state=42)

# Random Forest prediction


model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# prediction of data
predictions = model.predict(X_test)

# data modeling part


accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report(y_test, predictions))

# Visualize using seaborn and matplotlib


plt.figure(figsize=(8, 6))
sns.countplot(x=predictions, palette='Set2', label='Predicted')
sns.countplot(x=y_test, palette='Pastel1', label='Actual')
plt.xlabel('Disease')
plt.ylabel('Count')
plt.title('Disease Prediction: Predicted vs. Actual')
plt.legend()
plt.show()

[ ]: # recomendation system for e commerce

from surprise import Dataset, Reader, KNNBasic


from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

# Load data
reader = Reader(line_format='user item rating', sep=',', rating_scale=(1, 5))
data = Dataset.load_from_file('data.csv', reader=reader)

# Split data into train and test sets


trainset, testset = train_test_split(data, test_size=0.2)

3
# Build and train the model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)
model.fit(trainset)

# Evaluate the model


predictions = model.test(testset)
rmse(predictions)

# Make recommendations for a user


user_id = '123'
items_to_ignore = [item[0] for item in trainset.ur[int(user_id)]]
top_n = 10
recommendations = model.get_neighbors(int(user_id), k=top_n)
print(recommendations)

[1]: # natural language processing for text classification

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Sample data - replace with your own dataset


documents = [
("This product is great", "positive"),
("The service was terrible", "negative"),
("The delivery was fast", "positive"),
("I'm never buying from them again", "negative"),
("Worst experience ever", "negative")
]

# Split data into features and labels


X = [doc[0] for doc in documents]
y = [doc[1] for doc in documents]

# Split data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣
↪random_state=42)

# Feature extraction using TF-IDF vectorization


vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

4
# Build and train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_vec, y_train)

# Predictions
y_pred = classifier.predict(X_test_vec)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.00

Classification Report:
precision recall f1-score support

negative 0.00 0.00 0.00 1.0


positive 0.00 0.00 0.00 0.0

accuracy 0.00 1.0


macro avg 0.00 0.00 0.00 1.0
weighted avg 0.00 0.00 0.00 1.0

/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to
0.0 in labels with no predicted samples. Use `zero_division` parameter to
control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0
in labels with no true samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to
0.0 in labels with no predicted samples. Use `zero_division` parameter to
control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0
in labels with no true samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))

5
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to
0.0 in labels with no predicted samples. Use `zero_division` parameter to
control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0
in labels with no true samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))

[ ]: # time series analysis for stock price prediction

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load historical stock price data


# Replace 'stock_data.csv' with your dataset file path or API call to fetch data
stock_data = pd.read_csv('data.csv')

# Calculate financial indicators (e.g., moving averages, RSI, MACD, etc.)


# Here, we'll use a simple moving average as an example
window = 30
stock_data['SMA'] = stock_data['Close'].rolling(window=window).mean()

# Define features and target variable


features = ['SMA'] # Add more financial indicators as needed
target = 'Close'

# Drop rows with missing values


stock_data.dropna(inplace=True)

# Split data into features and target variable


X = stock_data[features]
y = stock_data[target]

# Split data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣
↪random_state=42)

# Train a Random Forest regressor


rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

6
rf_regressor.fit(X_train, y_train)

# Make predictions
y_pred_train = rf_regressor.predict(X_train)
y_pred_test = rf_regressor.predict(X_test)

# Evaluate the model


train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f"Train RMSE: {train_rmse:.2f}")
print(f"Test RMSE: {test_rmse:.2f}")

# Plot actual vs. predicted prices


plt.figure(figsize=(10, 6))
plt.plot(stock_data.index, stock_data['Close'], label='Actual Price')
plt.plot(stock_data.index, np.concatenate((y_pred_train, y_pred_test)),␣
↪label='Predicted Price')

plt.title('Stock Price Prediction')


plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy