# Import Plotting Libraries: in (1) : Import Pandas As PD
# Import Plotting Libraries: in (1) : Import Pandas As PD
# Import Plotting Libraries: in (1) : Import Pandas As PD
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 1/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
# Note that the results summarized above in Past Usage refer to a dataset
# of size 369, while Group 1 has only 367 instances. This is because it
# originally contained 369 instances; 2 were removed. The following
# statements summarizes changes to the original Group 1's set of data:
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 2/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
dx = ['Benign', 'Malignant']
In [6]: # load the training data from breast cancer data set
df_training = pd.read_csv(location, names=names)
Out[7]:
id_number diagnosis radius_mean texture_mean perimeter_mean area_mean smo
5 rows × 32 columns
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 3/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
Out[8]: id_number 0
diagnosis 0
radius_mean 0
texture_mean 0
perimeter_mean 0
area_mean 0
smoothness_mean 0
compactness_mean 0
concavity_mean 0
concave_points_mean 0
symmetry_mean 0
fractal_dimension_mean 0
radius_se 0
texture_se 0
perimeter_se 0
area_se 0
smoothness_se 0
compactness_se 0
concavity_se 0
concave_points_se 0
symmetry_se 0
fractal_dimension_se 0
radius_worst 0
texture_worst 0
perimeter_worst 0
area_worst 0
smoothness_worst 0
compactness_worst 0
concavity_worst 0
concave_points_worst 0
symmetry_worst 0
fractal_dimension_worst 0
dtype: int64
In [9]: # Cleaning
# We do some minor cleanage like setting the id_number to be the
# data frame index, along with converting the diagnosis to the standard
# binary 1, 0 representation using the map() function.
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 4/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
Out[11]: ['radius_mean',
'texture_mean',
'perimeter_mean',
'area_mean',
'smoothness_mean',
'compactness_mean',
'concavity_mean',
'concave_points_mean',
'symmetry_mean',
'fractal_dimension_mean',
'radius_se',
'texture_se',
'perimeter_se',
'area_se',
'smoothness_se',
'compactness_se',
'concavity_se',
'concave_points_se',
'symmetry_se',
'fractal_dimension_se',
'radius_worst',
'texture_worst',
'perimeter_worst',
'area_worst',
'smoothness_worst',
'compactness_worst',
'concavity_worst',
'concave_points_worst',
'symmetry_worst',
'fractal_dimension_worst']
In [12]: df_training.head()
Out[12]:
diagnosis radius_mean texture_mean perimeter_mean area_mean smooth
id_number
5 rows × 31 columns
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 5/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
In [14]: df_training.shape
[1 0]
0 357
1 212
Name: diagnosis, dtype: int64
In [16]: print(df_training.diagnosis.value_counts(normalize=True))
0 0.627417
1 0.372583
Name: diagnosis, dtype: float64
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 6/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
In [17]: df_training.describe()
Out[17]:
diagnosis radius_mean texture_mean perimeter_mean area_mean smoothne
8 rows × 31 columns
# pre-processing
# Random Forest does not require any pre-processing
In [20]: print(type(X_df))
print(type(y_df))
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 7/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
In [24]: # train the algorithm utilizing the training and target class
clf.fit(X_train, y_train)
Out[25]: [0.07070875852652238,
0.006575694819293099,
0.05866098072621878,
0.01399705469211196,
0.01012210099909593,
0.0019237134838078498,
0.0021572692743547835,
0.10995070971888617,
0.008025453949977576,
0.003333374263403106,
0.06202480367181799,
0.003832344234070842,
0.009663008594770445,
0.06283545902452922,
0.002782000712192183,
0.004408529077759847,
0.01433132209795599,
0.007234021783473726,
0.002804912141556622,
0.008586381993643893,
0.083409041645263,
0.015484722256977418,
0.03184810894288064,
0.09713728084231674,
0.00405296245964799,
0.03585605229950856,
0.0203828559132144,
0.2373544725706544,
0.00322830207046923,
0.007288307213625225]
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 8/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.html 9/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
In [28]: np.arange(len(names_index))
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.… 10/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
In [30]: ax = df_feature_importance.plot(kind='bar',
x='feature',
y='importance',
figsize=(10,8),
title= 'Feature importances for Random Forest Model',
grid=True,
legend=True,
fontsize = 12,
color='orange',
);
# Set the x-axis label
ax.set_xlabel("Feature")
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.h… 11/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
In [34]: # comparing actual response values (y_test) with predicted response values (y_
pred)
print("model accuracy:", metrics.accuracy_score(y_test, y_pred)* 100)
predictions_prob = clf.predict_proba(X_test)[:, 1]
predictions_prob
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.… 12/13
9/21/2018 ML-RANDOM-FOREST-12-breast-cancer
Out[37]: 0.9909924664264658
file:///D:/KOMAL/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/pdf%20conversion/htmls/komal_RF_breastCancer.… 13/13