ML lab manual 1-10
ML lab manual 1-10
PRACTICAL: 01
AIM: Dealing With Data Using Stats Numpy and Pandas Library.
Mean of age: 30
a3 = np.array([[10,20,30],[40,50,60],[60,70,80]])
[[10 20 30]
[40 50 60]
[60 70 80]]
2 int64 8
(3, 3)
9
a4 = np.zeros((5,5)) print(a4)
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
a5 = np.zeros((0,3)) print(a5)
[]
a6 = np.arange(2,101,2) a6
array([ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34,
36, 38, 40, 42, 44, 46, 48, 50, 52,
54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78,
80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100])
a7 = np.reshape(a6,(10,5)) a7
a8 = np.reshape(a6,(2,5,5)) a8
[ 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36
38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72
74 76 78 80 82 84 86 88 90 92 94 96 98 100]
[ 2 4 6 8 10]
[ 2 12 22 32 42 52 62 72 82 92]
[ 6 8 10]
[ 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48
50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94 96
98]
my_dictionary = {
"id":[101,103],
"name":["Nandani","Rathod"],
} print(my_dictionary) dtfr = pd.DataFrame(my_dictionary) dtfr
0 101 Nandani
1 103 Rathod
PRACTICAL: 02
df.shape
(11251, 15)
df.head()
Ag Ag Marital_Stat
User_ICust_naProduct_Gend Gro Stat
0 1002903 Sanskriti P00125942F 26-35 28 0 Maharashtra
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11251 entries, 0 to 11250 Data
columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 User_ID 11251 non-null int64
1 Cust_name 11251 non-null object
2 Product_ID 11251 non-null object
3 Gender 11251 non-null object
4 Age Group 11251 non-null object
5 Age 11251 non-null int64
6 Marital_Status 11251 non-null int64
7 State 11251 non-null object
8 Zone 11251 non-null object
9 Occupation 11251 non-null object
10 Product_Category 11251 non-null object
11 Orders 11251 non-null int64
12 Amount 11239 non-null float64
13 Status 0 non-null float64 14 unnamed1 0 non-null float64 dtypes:
float64(3), int64(4), object(8) memory usage: 1.3+ MB
pd.isnull(df).sum()
User_ID 0
Cust_name 0
Product_ID 0
Gender 0
Age Group 0
Age 0
Marital_Status 0
State 0
Zone 0
Occupation 0
Product_Category 0
Orders 0 Amount
12 dtype: int64
Drop all null values in Amount as 12 is very less compared to 11251 records
df.dropna(inplace=True)
df['Amount'] = df['Amount'].astype('int')
df['Amount'].dtypes dtype('int64')
df.columns
df.rename(columns={'Marital_Status':'Shaadi'})
A A Shaa
User_ Cust_naProduct_Gend Gro Stat Zo OccupatiProduct_Categ
Orde A
0 1002903 Sanskriti P00125942F 26-35 28 0 Maharashtra Western Healthcare Auto
1 2
Food
Processing
... ... ... ... ... ... ... ... ... ... ...
...
Madhya
Pradesh
df.describe()
df[['Age','Orders','Amount']].describe
()
Ag Orde Amo
count 11239.000000 11239.000000 11239.000000
mean 35.410357 2.489634 9453.610553
Uttar
Clothing &
top NaN Vishakha P00265242 F 26-35 NaN NaN Pradesh Central IT Sector
Appar
freq NaN 42 53 7832 4541 NaN NaN 1944 4289 1583 265
mean 1.003004e+06 NaN NaN NaN NaN 35.410357 0.420055 NaN NaN NaN Na
std 1.716039e+03 NaN NaN NaN NaN 12.753866 0.493589 NaN NaN NaN Na
min 1.000001e+06 NaN NaN NaN NaN 12.000000 0.000000 NaN NaN NaN Na
25% 1.001492e+06 NaN NaN NaN NaN 27.000000 0.000000 NaN NaN NaN Na
50% 1.003064e+06 NaN NaN NaN NaN 33.000000 0.000000 NaN NaN NaN Na
75% 1 004426e+06 NaN NaN NaN NaN 43 000000 1 000000 NaN NaN NaN Na
bars in ax.containers:
ax.bar_label
(bars)
Age
y='Amount', data=sales_age)
State
(15,5)})
sns.barplot data=sales_state
sns.set(rc={'figure.figsize':(15,5)})
sns.barplot(data=sales_state, x='State',
y='Amount') <Axes: xlabel='State',
ylabel='Amount'>
Marital Status
ax = sns.countplot(data=df, x='Marital_Status')
sns.set(rc={'figure.figsi
ze':(7,5)})
for bars in
ax.containers:
(bar)
Occupation
sns.set(rc={'figure.figsize'
:(20,5)}) ax =
sns.countplot(data=df,x='Occ
ax.containers:
ax.bar_label
(bars)
Product Category
sns.set(rc={'figure.figsize':(20,5)}) ax = sns.countplot(data=df,x='Product_Category')
PRACTICAL: 03
cgpa package
0 6.89 3.26
1 5.12 1.98
2 7.82 3.25
3 7.42 3.67
4 6.94 3.57
plt.scatter(df['cgpa'],df['package']) plt.xlabel('CGPA')
plt.ylabel('Package(in lpa)')
x=df.iloc[:,0:1] y = df.iloc[:,-1]
0 3.26
11.98
23.25
33.67
43.57
195 2.46
196 2.57
197 3.24
198 3.96
199 2.33
Name: package, Length: 200, dtype: float64
lr = LinearRegression()
lr.fit(x_train,y_train)
#training a model fit method pass training variables
▾LinearRegression
LinearRegression()
x_test
115 8.35
35 6.87
12 8.94
92 7.90
13 6.93
126 5.91
174 7.32
2 7.82
44 5.09
3 7.42
113 6.94
14 7.73
23 6.19
25 7.28
6 6.73
134 7.20
165 8.21
173 6.75
45 7.87
65 7.60
48 8.63
122 5.12
178 8.15
64 7 36
64 7.36
9 8.31
57 6.60
78 6.59
71 7.47
128 7.93
176 6.29
131 6.37
53 6.47
lr.predict(x_test.iloc[0].values.reshape(1,1))
#method used for testing : predict
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not warnings.warn( array([3.89111601])
plt.scatter(df['cgpa'],df['package'])
plt.plot(x_train,lr.predict(x_train),color='red') plt.xlabel('CGPA')
plt.ylabel ( 'Package(in lpa)' )
m=lr.coef_
b=lr.intercept_
m*8.58+b array([3.89111601])
m*9.5+b array([4.40443183])
m*100+b array([54.89908542])
Accuracy score:0.780730147510384
y_pred
cgpa
112 8.58
29 7.15
182 5.88
199 6.22
193 4.57
85 4.79
10 5.32
54 6.86
115 8.35
35 6.87
12 8.94
92 7.90
13 6.93
126 5.91
174 7.32
2 7.82
44 5.09
3 7.42
113 6.94
14 7.73
df = pd.read_csv('/content/50_Startups.csv')
df.head()
df.isnull().sum()
R&D Spend 0
Administration 0
Marketing Spend 0
State 0 Profit 0
dtype: int64
dtype=object)
plt.show
()
df.head()
R Marketi Ne
Spen Administrati Spen Profit Yor CaliforniFlorid
0 165349.20 136897.80 471784.10 192261.83 1 0 0
df.head()
y='Profit'
x=df.columns.tolist()
x.remove(y) x
['R&D Spend',
'Administration',
'Marketing Spend',
'New York',
'California', 'Florida']
x=df[x].values
y=df[y].values
x
x_train
PRACTICAL: 04
df = pd.read_csv('50_Startups.csv')
df.head()
df.tail()
df.isnull().sum()
R&D Spend 0
Administration 0
Marketing Spend 0
State 0 Profit 0 dtype: int64
df["State"].unique()
x1 = df.iloc[:,0].values y1 = df.iloc[:,-1].values
plt.scatter(x1,y1,color="Green",s=50) plt.xlabel("R&D")
plt.ylabel("Profit") plt.title("R&D vs Profit") plt.show()
df.State.value_counts()
State
New York 17
California 17
Florida 16
Name: count, dtype: int64
df.head()
R Marketi Ne
Spen Administrati Spen Profit Yor CaliforniFlorid
0 165349.20 136897.80 471784.10 192261.83 1 0 0
y='Profit'
PRACTICAL: 05
df=sns.load_datase
t('iris')
df.head()
sepal_length sepal_width petal_length petal_width species
df.tail()
df.isnull().sum()
sepal_l
ength
0
sepal_w
idth
0
petal_l
ength
0
petal_w
idth
0
species
0
dtype:
int64
df=df[df['species']!='setosa']
df.head()
sepal_length sepal_width petal_length petal_width species
df
Next steps: Generate code with View recommended plots
df['species']=df['species'].map({'varsicolor':0,'virginica':1})
df.head()
sepal_length sepal_width petal_length petal_width species
df
Next steps: Generate code with View recommended plots
x=df.iloc[:,:-
1]
y=df.iloc[:,-
1]
x
Next steps: Generate code with View recommended plots
50 NaN
51 NaN
52 NaN
53 NaN
54 NaN ... 145 1.0
146 1.0
147 1.0
148 1.0
149 1.0
Name: species, Length: 100, dtype: float64
classifier_regressor=GridSearchCV(classifier,param_grid=parameter,scoring='accura
cy',cv=5)
classifier_regressor.fit(x_train,y_train)
/usr/local/lib/python3.10/dist-
packages/sklearn/utils/multiclass.py:380:
RuntimeWarni if xp.any(data !=
data.astype(int)):
---------------------------------------------------------------
-----------ValueError Traceback
(most recent call last)
<ipython-input-28-613a9f6f8019> in
<cell line: 1>() ----> 1
classifier_regressor.fit(x_train,y_t
rain)
3 frames
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py
in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name,
input_name)
159 "#estimators-that-handle-nan-values"
160 )
--> 161 raise
ValueError(msg_err)
162
163
print(classifier_regressor.best_params_)
print(classifier_regressor.best_score_)
------------------------------------------------------------------
--------AttributeError Traceback (most
recent call last)
<ipython-input-30-e6dbc56d2a3b> in <cell line: 1>()
----> 1 print(classifier_regressor.best_params_)
y_pred=classifier_regressor.predict(x_test)
------------------------------------------------------------------
--------NotFittedError Traceback (most
recent call last)
<ipython-input-24-fbc5171df548> in <cell line: 1>()
----> 1 y_pred=classifier_regressor.predict(x_test)
1 frames
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_is_fitted(estimator,
attributes, msg, all_or_any)
1388
1389 if not fitted:
-> 1390 raise NotFittedError(msg % {"name": type(estimator).__name__})
1391
1392
NotFittedError: This GridSearchCV instance is not fitted yet. Call 'fit' with appropriate arguments
before using this estimator.
PRACTICAL: 06
import
warning
s
warning
s .
filterw
arnings
( '
ignore
' )
cancer ds = datasets .
( )
cancer ds.data y = cancer ds.target
x. shape
(569, 30)
Y. shape
...t$ț (569, )
knn model =
KNeighborsC1assifi
er(n_neighbors=5)
knn_model
y_train) y_pred =
knn_model.predict(
x_test) cm =
confusion_matrix(y
_test, y_pred)
cls_rpt = y_pred
classification_report(y_te )
st,
print(c
m)
print
(cls_rp
t)
..Ț?.« 59 4]
C 5 103] ]
precisi recal fl- suppor
t
on l score
9.92 e . a.93 63
94
1 e.96 e. 95 8.96 108
a.95 171
accuracy
macro avg e. 94 e.95 94 171
weighted avg e.95 e.95 a.95 171
x_train . shape
gscv . best_score_
. 9346518987341772
gscv . best_params
X&printMode=true
1/3
('n_neighbors• : 11}
_
cm = cls_rpt =
y_pred)
print(cm)
print(cls_rpt)
59
2 10611
precisio recal fl- suppor
t
n l scor
e
94 e.
e 95 63
98 e.97 138
accurac
y 171
macro avg a. 97 96 e. 171
96
weighted 96 96 171
avg
from sklearn. tree import
DecisionTreeC1assifier
cancer ds - - datasets.
load_breast_cancer()
x=cancer data
Y=cancer_ds. target
94
accurac €.93 171
y
macro avg a. 92 o. 93 €.93 171
weighted 0.93 0.93 €.93 171
avg
from
sklearn.tree
import
plot_tree
import
matplotlib.
pyplot as plt
plot_t
ree
(dt_mo
del )
plt.sh
ow()
8200484687
PRACTICAL: 07
PRACTICAL: 08
from sklearn
import datasets
from
sklearn.cluster
import
KMeans
from matplotlib import pyplot as
plt
plt.plot(k_values,wcss_val
ues) plt.show()
PRACTICAL: 09
PRACTICAL: 10