MRA Project Milestone 1 PDF
MRA Project Milestone 1 PDF
MRA Project Milestone 1 PDF
import numpy as np
import os
sns.set(color_codes=True)
%matplotlib inline
In [224]: df = pd.read_csv("Sales_Data.csv")
In [225]: df.head()
Out[225]:
ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES ORDERDATE DAYS_SINCE_LASTORDER
In [226]: df.describe()
Out[226]:
ORDERNUMBER QUANTITYORDERED PRICEEACH ORDERLINENUMBER SALES DAYS_SINCE_LASTORDER
In [227]: df.info()
<class 'pandas.core.frame.DataFrame'>
In [228]: df.shape
In [230]: df.dtypes
QUANTITYORDERED int64
PRICEEACH float64
ORDERLINENUMBER int64
SALES float64
ORDERDATE datetime64[ns]
DAYS_SINCE_LASTORDER int64
STATUS object
PRODUCTLINE object
MSRP int64
PRODUCTCODE object
CUSTOMERNAME object
PHONE object
ADDRESSLINE1 object
CITY object
POSTALCODE object
COUNTRY object
CONTACTLASTNAME object
CONTACTFIRSTNAME object
DEALSIZE object
dtype: object
In [231]: df.isnull().sum()
Out[231]: ORDERNUMBER 0
QUANTITYORDERED 0
PRICEEACH 0
ORDERLINENUMBER 0
SALES 0
ORDERDATE 0
DAYS_SINCE_LASTORDER 0
STATUS 0
PRODUCTLINE 0
MSRP 0
PRODUCTCODE 0
CUSTOMERNAME 0
PHONE 0
ADDRESSLINE1 0
CITY 0
POSTALCODE 0
COUNTRY 0
CONTACTLASTNAME 0
CONTACTFIRSTNAME 0
DEALSIZE 0
dtype: int64
In [232]: sns.distplot(df['QUANTITYORDERED'],color='indigo',rug=True )
In [233]: sns.boxplot(df['QUANTITYORDERED'],color='indigo')
In [234]: sns.distplot(df['PRICEEACH'],color='blue',rug=True )
In [235]: sns.boxplot(df['PRICEEACH'],color='blue')
In [236]: sns.distplot(df['ORDERLINENUMBER'],color='green',rug=True )
In [237]: sns.boxplot(df['ORDERLINENUMBER'],color='GREEN')
In [238]: sns.distplot(df['SALES'],color='orange',rug=True )
In [239]: sns.boxplot(df['SALES'],color='ORANGE')
In [240]: sns.distplot(df['DAYS_SINCE_LASTORDER'],color='pink',rug=True )
In [241]: sns.boxplot(df['DAYS_SINCE_LASTORDER'],color='PINK')
In [242]: sns.distplot(df['MSRP'],color='red',rug=True )
In [243]: sns.boxplot(df['MSRP'],color='RED')
In [244]: sns.pairplot(df)
plt.show()
In [245]: plt.figure(figsize=(10,8))
plt.show()
now = datetime.now()
In [256]: df_rfm
Out[256]:
ORDERNUMBER frequency PRICEEACH ORDERLINENUMBER monetary ORDERDATE DAYS_SINCE_LASTORDER STA
In [260]: df_1=df_rfm[['ORDERNUMBER','CUSTOMERNAME','recency','frequency','monetary']]
In [261]: df_1.head()
Out[261]:
ORDERNUMBER CUSTOMERNAME recency frequency monetary
In [268]: df_2=df_rfm[['recency','frequency','monetary']]
In [269]: model.fit(df_2)
In [270]: model.summary_statistics()
Out[270]:
recency_scores frequency_scores monetary_scores
In [271]: pd.DataFrame(model.cutoffs)
Out[271]:
recency frequency monetary
In [272]: df_fitted=model.fitted_data
df_fitted.head()
Out[272]:
recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighted
In [279]: df_fitted.sort_values(['recency_scores','frequency_scores','monetary_scores'],ascending=[Fal
se,False,False]).head(10)
Out[279]:
recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighte
In [291]: a= df_fitted.sort_values(['recency_scores','frequency_scores','monetary_scores'],ascending=[
True,True,True])
display(a.iloc[5:11])
LOST CUSTOMER
In [285]: df_fitted.sort_values(['recency_scores','frequency_scores','monetary_scores'],ascending=[Tru
e,True,True]).head(5)
Out[285]:
recency frequency monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighte
LOYAL CUSTOMERS
In [286]: df_fitted[df_fitted.monetary_scores==5].head(5)
Out[286]:
recency
This study source was downloaded by 100000828586097 from CourseHero.com on 03-13-2022 13:34:55 GMT frequency
-05:00 monetary recency_scores frequency_scores monetary_scores recency_weighted frequency_weighted
In [ ]:
In [ ]: