PDA_Week_1-8 & 10-12.Ipynb - Colab
PDA_Week_1-8 & 10-12.Ipynb - Colab
In [ ]: # Importing a package
# Import package_name as alias name
import numpy as np
# Create an array
# arrayname = alias.name.array([v1,v2,v3,..vn])
# 1-D array
a= np.array([1,2,3,4])
a
array([1, 2, 3, 4])
Out[ ]:
In [ ]: # 2-D array
# arrayname=aliasname.array([[d1][d2]])
b= np.array([[1,2,3,4,5,6,],[1,2,3,4,5,6,]])
b
array([[1, 2, 3, 4, 5, 6],
Out[ ]:
[1, 2, 3, 4, 5, 6]])
In [ ]: # 3-D array
# arrayname=aliasname.array([[d1][d2][d3]])
c= np.array([[1,2,3],[1,2,3,],[4,5,6],[8,9,10]])
c
array([[ 1, 2, 3],
Out[ ]:
[ 1, 2, 3],
[ 4, 5, 6],
[ 8, 9, 10]])
1
Out[ ]:
In [ ]: b.ndim
2
Out[ ]:
In [ ]: c.ndim
2
Out[ ]:
4
Out[ ]:
In [ ]: b.size
12
Out[ ]:
In [ ]: c.size
12
Out[ ]:
1
Out[ ]:
In [ ]: np.min(a)
1
Out[ ]:
In [ ]: np.min(b)
1
Out[ ]:
In [ ]: np.min(c)
1
Out[ ]:
4
Out[ ]:
In [ ]: np.max(a)
4
Out[ ]:
In [ ]: np.max(b)
6
Out[ ]:
In [ ]: np.max(c)
10
Out[ ]:
0
Out[ ]:
In [ ]: np.argmax(a)
3
Out[ ]:
In [ ]: # Sum of array
# Syntax --> sum(arrayname) or np.sum(arrayname)
sum(a)
10
Out[ ]:
In [ ]: np.sum(a)
10
Out[ ]:
In [ ]: # Product of array
# Syntax --> np.prod(arrayname)
np.prod(a)
24
Out[ ]:
In [ ]: # Mean of array
# Syntax --> np.mean(arrayname)
np.mean(a)
2.5
Out[ ]:
In [ ]: # Median of array
# Syntax --> np.median(arrayname)
np.median(a)
2.5
Out[ ]:
In [ ]: # Variance of array
# Syntax --> np.var(arrayname)
np.var(a)
1.25
Out[ ]:
Out[ ]:
[74.42443878 65.53754283 63.62919774 ... 63.66416353 71.9258358
68.36848621]
Minimum Height: 57.5032186105382
Sum: 79762.86328320228
Min Index: 428
Max Index: 576
Variance: 14.8406074828533
Average Height: 66.91515376107574
Maximum Height: 77.0512818135321
25th Percentile: 64.0097456309595
Median: 66.4512652109843
75th Percentile: 69.84810005291368
In [ ]: # Importing a package
# Import package_name as alias name
import numpy as np
# Create an array
# arrayname = alias.name.array([v1,v2,v3,..vn])
# 1-D array
a= np.array([1,2,3,4,5])
print(a)
# Comparisions --> <,<=,>,>=,==,!=
# aliasname.comparision(array_1,array_2)
# Less Than Operator
print(np.less(a,3))
print(a<3)
# Less Than and Equal to Operator
print(np.less_equal(a,3))
print(a<=3)
# Greater Than Operator
print(np.greater(a,4))
print(a>4)
# Greater Than and Equal to Operator
print(np.greater_equal(a,4))
print(a>=4)
# Equal To Operator
print(np.equal(a,3))
print(a==3)
# Not Equal To Operator
print(np.not_equal(a,4))
print(a!=4)
[1 2 3 4 5]
[ True True False False False]
[ True True False False False]
[ True True True False False]
[ True True True False False]
[False False False False True]
[False False False False True]
[False False False True True]
[False False False True True]
[False False True False False]
[False False True False False]
[ True True True False True]
[ True True True False True]
[1 2 3 4]
[4 5 6 7 8]
[ 3 6 9 12 15]
In [ ]: import numpy as np
import pandas as pd
data = pd.read_csv(r"/content/Student_Marks.csv")
Marks = np.array(data['Marks'])
print(Marks)
# Comparisons
print("\nMarks less than 50:", np.less(marks, 50))
print("Marks less than or equal to 50:", np.less_equal(marks, 50))
print("Marks greater than 70:", np.greater(marks, 70))
print("Marks greater than or equal to 75:", np.greater_equal(marks, 75))
print("Marks equal to 72:", np.equal(marks, 72))
print("Marks not equal to 51:", np.not_equal(marks, 51))
# Masking
print("\nMarks less than 50 (masked):", marks[marks < 50])
print("Marks greater than 70 (masked):", marks[marks > 70])
Marks less than 50: [ True False False False True False False True False False]
Marks less than or equal to 50: [ True False False False True False False True F
alse False]
Marks greater than 70: [False True True False False False True False True Tru
e]
Marks greater than or equal to 75: [False False True False False False True Fals
e True True]
Marks equal to 72: [False True False False False False False False False False]
Marks not equal to 51: [ True True True False True True True True True Tru
e]
Logical AND (Marks and pass marks): [ True True True True True True True Tr
ue True True]
Logical OR (Marks or pass marks): [ True True True True True True True True
True True]
Logical NOT (Marks): [False False False False False False False False False False]
[51 92 14 71 60 20 82 86 74 74]
In [9]: # Entering the index numbers to print the elements of that particular index
# Syntax --> ind = [i1,i2,i3,..in]
ind = [3, 7, 2]
# Printing the elements of the entered index numbers
# Syntax 1 --> arrayname[ind]
x[ind]
array([[71, 86],
Out[13]:
[60, 20]])
In [19]: '''Creating an array with n random elements between 0 and n-1, and then reshape
it into a matrix with r rows and c columns where n= r*c'''
# Syntax --> x = np.arange(n).reshape((r, c))
x = np.arange(12).reshape((3, 4))
x
array([[ 0, 1, 2, 3],
Out[19]:
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
In [21]: ''' Creating a array using fancy indexing i.e first create a row array and a
column array, then use them to index into another array such that each
element corresponds to the matching indices in the row and column array'''
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
X[row, col]
array([ 2, 5, 11])
Out[21]:
In [22]: '''Creating an array using advanced indexing i.e reshapes the row indices into a
column and pairs them with the column indices to access specific elements
from the array, creating a grid-like retrieval pattern'''
# Syntax --> arrayname[row_arrayname[:, np.newaxis], coloumn_arrayname]
X[row[:, np.newaxis], col]
In [23]: '''Creating an array by indexing one specific row number with entered
coloumn numbers'''
# Syntax --> arrayname[row_number, [c1, c2, ...cn]]
X[2, [2, 0, 1]]
array([10, 8, 9])
Out[23]:
In [26]: '''Creating an array by indexing from specific row number till the last row
number with entered coloumn numbers'''
# Syntax --> arrayname[row_number:, [c1, c2, ...cn]]
X[1:, [2, 0, 1]]
array([[ 6, 4, 5],
Out[26]:
[10, 8, 9]])
RandomState(MT19937)
(100, 2)
In [35]: '''Randomly selecting n number of unique indices from the range of X's first
dimension without repetition'''
# Syntax --> indices = np.random.choice(X.shape[0], n, replace=False)
indices = np.random.choice(X.shape[0], 20, replace=False)
print(indices)
'''Creating a array using fancy indexing i.e selecting the rows of X
corresponding to the chosen indices'''
selection = X[indices]
# Printing the shape of the resulting selection array
print(selection.shape)
[39 34 4 81 77 5 69 41 93 15 71 96 36 49 98 3 87 31 67 59]
(20, 2)
In [45]: '''From the multivariate normal distribution array scatter plotting where all
data points are plotted with semi-transparency'''
# Syntax --> plt.scatter(x, y, alpha=0.5)
'''Where:
x: Data for the x-axis. X[:, 0] means "all rows, first column" of array X
y: Data for the y-axis. X[:, 1] means "all rows, second column" of array X
alpha=0.5: Sets the transparency of the points to 50%'''
plt.scatter(X[:, 0], X[:, 1], alpha=0.5)
'''From the multivariate normal distribution array scatter plotting the selected
In [ ]: # Importing a package
# Import package_name as alias name
import numpy as np
# arrayname = alaisname.eye(value)
# eye() --> Creating an Identity Matrix
# Creating an Identiy Matrix Order 3
a= np.eye(3)
a
array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
Out[ ]:
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
In [ ]: # Importing a package
# Import package_name as alias name
import numpy as np
#arrayname = aliasname.linespace(start,stop,num=value)
a = np.linspace(1,10,num=5)
a
array([ 5., 10., 15., 20., 25., 30., 35., 40., 45., 50.])
Out[ ]:
Food
Name
Peter Fish
Paul Beans
Mary Bread
Drink
Name
Mary Wine
Joseph Beer
In [18]: # Joining tow Data Frames considering Data Frame 1 as Main Data Frame
# Syntax --> print(df_name_1.join(df_name_2))
print(df1.join(df2))
# Alternative Method:
# Syntax --> df_name_1.join(df_name_2,how="left")
df1.join(df2,how="left")
Food Drink
Name
Peter Fish NaN
Paul Beans NaN
Mary Bread Wine
Out[18]: Food Drink
Name
In [19]: # Joining tow Data Frames considering Data Frame 2 as Main Data Frame
# Syntax --> df_name_1.join(df_name_2,how="right")
df1.join(df2,how="right")
Name
In [21]: # Joining tow Data Frames considering common elements in both the Data Frames
# Syntax --> df_name_1.join(df_name_2,how="inner")
df1.join(df2,how="inner")
Name
Rank
Name
Bob 1
Jake 2
Lisa 3
Sue 4
Rank
Name
Bob 3
Jake 1
Lisa 4
Sue 2
Out[22]: Rank_L Rank_R
Name
Bob 1 3
Jake 2 1
Lisa 3 4
Sue 4 2
Out[5]: survived pclass sex age sibsp parch fare embarked class who
... ... ... ... ... ... ... ... ... ... ...
In [17]: '''Calculating the average survival rate for each combination of passenger `sex`
and `class` on the Titanic and displays it in a table format'''
# Syntax --> titanic.groupby(['C1', 'C2'])['C3'].aggregate('mean').unstack()
titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
sex
sex
sex age
In [22]: '''Spliting the Titanic dataset's `fare` column into two equal-sized fare
categories: low and high'''
# Syntax --> V2 = pd.qcut(titanic['C4'], 2)
fare1 = pd.qcut(titanic['fare'], 2)
'''Createing a pivot table showing the average survival rate on the Titanic,
grouped by passenger `sex` and age category (`age1`) as rows, and by fare
category (`fare1`) and passenger `class` as columns'''
# Syntax --> titanic.pivot_table('C3', ['C1', 'V1'], ['V2', 'C2'])
titanic.pivot_table('survived', ['sex', age1], [fare1, 'class'])
sex age
In [23]: '''Createing a pivot table that shows the total number of survivors (`survived`)
and the average fare (`fare`), grouped by `sex` as rows and `class` as
columns'''
''' Syntax --> titanic.pivot_table(index='C1', columns='C2',
aggfunc={'C3':sum, 'fare':'C4'})'''
titanic.pivot_table(index='sex', columns='class',
aggfunc={'survived':sum, 'fare':'mean'})
sex
In [24]: '''Creating a pivot table displaying the average survival rate on the Titanic,
grouped by `sex` as rows and `class` as columns, with an extra row and column
labeled "All" (from `margins=True`) to show overall averages for each row and
column'''
# Syntax --> titanic.pivot_table('C3', index='C1', columns='C2', margins=True
titanic.pivot_table('survived', index='sex', columns='class', margins=True)
sex
In [ ]: # Importing a package
# Import package_name as alias name
import pandas as pd
# Creating a Series Data Frame
# Syntax --> V1 = pd.Series(['R1, R2, R3,... Rn'])
monte = pd.Series(['Graham Chapman', 'John Cleese', 'Terry Gilliam',
'Eric Idle', 'Terry Jones', 'Michael Palin'])
print(monte)
0 Graham Chapman
1 John Cleese
2 Terry Gilliam
3 Eric Idle
4 Terry Jones
5 Michael Palin
dtype: object
0 graham chapman
1 john cleese
2 terry gilliam
3 eric idle
4 terry jones
5 michael palin
dtype: object
In [ ]: # Checking if any string of the Series Data Frame starts with the letter 'T'
# Syntax --> V1.str.startswith('T')
print(monte.str.startswith('T'))
0 False
1 False
2 True
3 False
4 True
5 False
dtype: bool
0 [Graham, Chapman]
1 [John, Cleese]
2 [Terry, Gilliam]
3 [Eric, Idle]
4 [Terry, Jones]
5 [Michael, Palin]
dtype: object
0 Graham
1 John
2 Terry
3 Eric
4 Terry
5 Michael
dtype: object
In [ ]: '''Finding all names that start and end with a consonant, making use of the
start-of-string (^) and end-of-string ($) regular expression characters'''
# Syntax --> V1.str.findall(r'^[^AEIOU].*[^aeiou]$')
print(monte.str.findall(r'^[^AEIOU].*[^aeiou]$'))
0 [Graham Chapman]
1 []
2 [Terry Gilliam]
3 []
4 [Terry Jones]
5 [Michael Palin]
dtype: object
0 Gra
1 Joh
2 Ter
3 Eri
4 Ter
5 Mic
dtype: object
0 Chapman
1 Cleese
2 Gilliam
3 Idle
4 Jones
5 Palin
dtype: object
name info
0 Graham Chapman B|C|D
1 John Cleese B|D
2 Terry Gilliam A|C
3 Eric Idle B|D
4 Terry Jones B|C
5 Michael Palin B|C|D
In [ ]: '''Spliting the values in the `info` column by the `|` symbol and creating new
columns for each unique value, marking `1` if the value is present and `0` if
it's not.The get_dummies() routine lets you quickly split-out these indicator
variables into a DataFrame'''
# Syntax --> V2['info'].str.get_dummies('|')
full_monte['info'].str.get_dummies('|')
Out[ ]: A B C D
0 0 1 1 1
1 0 1 0 1
2 1 0 1 0
3 0 1 0 1
4 0 1 1 0
5 0 1 1 1
In [ ]: # Importing a package
# Import package_name as alias name
import numpy as np
# Create an 2-D array
# arrayname=aliasname.array([[d1][d2]])
a=np.array([[1,2,3],[4,5,6],[6,7,8]])
a
array([[1, 2, 3],
Out[ ]:
[4, 5, 6],
[6, 7, 8]])
array([[6, 7, 8],
Out[ ]:
[4, 5, 6],
[1, 2, 3]])
array([[3, 2, 1],
Out[ ]:
[6, 5, 4],
[8, 7, 6]])
array([[8, 7, 6],
Out[ ]:
[6, 5, 4],
[3, 2, 1]])
In [ ]: # Importing a package
# Import package_name as alias name
import numpy as np
# Create an 2-D array
# arrayname=aliasname.array([[d1][d2]]
a=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(a)
'''Computing the mean, median, standard deviation, and variance of a given array
along the second axis'''
# Syntax --> np.mean(arrayname, axis=1)
# Syntax --> np.median(arrayname, axis=1)
# Syntax --> np.std(arrayname, axis=1)
# Syntax --> np.var(arrayname, axis=1)
mean = np.mean(a, axis=1)
median=np.median(a, axis=1)
std_dev = np.std(a, axis=1)
variance = np.var(a, axis=1)
'''Printing the mean, median, standard deviation, and variance of a given array
along the second axis'''
print("Mean along the second axis:", mean)
print("Median along the second axis:", median)
print("Standard deviation along the second axis:", std_dev)
print("Variance along the second axis:", variance)
In [9]: '''Computing the mean, median, standard deviation, and variance of a given array
along the first axis'''
# Syntax --> np.mean(arrayname, axis=0)
# Syntax --> np.median(arrayname, axis=0)
# Syntax --> np.std(arrayname, axis=0)
# Syntax --> np.var(arrayname, axis=0)
mean = np.mean(a, axis=0)
median=np.median(a, axis=0)
std_dev = np.std(a, axis=0)
variance = np.var(a, axis=0)
'''Printing the mean, median, standard deviation, and variance of a given array
along the first axis'''
print("Mean along the first axis:", mean)
print("Median along the first axis:", median)
print("Standard deviation along the first axis:", std_dev)
print("Variance along the first axis:", variance)
In [17]: '''Write a NumPy program to sort the Student ID with increasing Height of the
Students from given Students ID and Height. Print the integer indices that
describes the sort order by multiple columns and the sorted data.'''
# Importing a package
# Import package_name as alias name
import numpy as np
# Creating two array which contains Student IDs and there respective Heights
# arrayname = alias.name.array([v1,v2,v3,..vn])
# 1-D array
student_ID = np.array([101,102,103,104,105])
heights = np.array([5.5,6.1,5.8,5.7,6.0])
# Print the two array which contains Student IDs and there respective Heights
print("Student IDs:",student_ID)
print("Student's Heights",heights)
# Combine the data into a 2-D array
# Syntax --> data = np.column_stack((array_1, array_2))
data = np.column_stack((student_ID,heights))
print("Combined Data:\n",data)
In [18]: '''Write a NumPy program to sort the Employee ID with increasing Wages of the
Employees from given Employee ID and Wages. Print the integer indices that
describes the sort order by multiple columns and the sorted data.'''
# Importing a package
# Import package_name as alias name
import numpy as np
# Creating two array which contains Employee IDs and there respective Wages
# arrayname = alias.name.array([v1,v2,v3,..vn])
# Printing the arrays which contains Employee IDs and there respective Wages
print("Employee IDs:", employee_ID)
print("Employee Wages:", wages)
In [19]: '''Write a NumPy program to sort the Book Names with increasing Prices of the
Books from given Book Names and Prices. Print the integer indices that
describes the sort order by multiple columns and the sorted data.'''
# Importing a package
# Import package_name as alias name
import numpy as np
# Creating two array which contains Book names and there respective Prices
# arrayname = alias.name.array([v1,v2,v3,..vn])
# 1-D array
book_names = np.array(["Book A", "Book B", "Book C", "Book D", "Book E"])
prices = np.array([250, 150, 300, 200, 180])
# Printing the arrays which contains Book names and there respective Prices
print("Book Names:", book_names)
print("Book Prices:", prices)
Book Names: ['Book A' 'Book B' 'Book C' 'Book D' 'Book E']
Book Prices: [250 150 300 200 180]
Combined Data:
[['Book A' '250']
['Book B' '150']
['Book C' '300']
['Book D' '200']
['Book E' '180']]
Sorted Indices (Based on Prices): [1 4 3 0 2]
Sorted Book Names: ['Book B' 'Book E' 'Book D' 'Book A' 'Book C']
Sorted Book Prices: ['150' '180' '200' '250' '300']
In [15]: '''Write a NumPy program to sort the Product Names with increasing Prices of the
Products from given Product Names and Prices. Print the integer indices that
describes the sort order by multiple columns and the sorted data.'''
# Importing a package
# Import package_name as alias name
import numpy as np
# Creating two array which contains Product names and there respective Prices
# arrayname = alias.name.array([v1,v2,v3,..vn])
# 1-D array
product_names = np.array(["Product A", "Product B", "Product C", "Product D"])
prices = np.array([250, 150, 300, 200])
# Printing the arrays which contains Product names and there respective Prices
print("Product Names:", product_names)
print("Product Prices:", prices)