Expt - No.2. RUSHYA
Expt - No.2. RUSHYA
Expt - No.2. RUSHYA
ipynb - Colab
Div: A
Roll No = 59
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
#from empiricaldist import Pmf , Cdf
from matplotlib.ticker import PercentFormatter
df_titanic = pd.read_csv('/content/drive/MyDrive/train.csv')
df_house = pd.read_csv('/content/drive/MyDrive/train (3)advanced.csv')
df_police = pd.read_csv('/content/drive/MyDrive/police_project.csv.zip')
df_olympic = pd.read_csv('/content/drive/MyDrive/athlete_events.csv.zip')
plt.xticks(fontsize = ticksfont)
plt.yticks(fontsize = ticksfont)
#fig, ax = plt.subplots(figsize=(12,8))
#sns.set_style("whitegrid")
#cdf = Cdf.from_seq(df_house['SalePrice'])
#cdf.plot()
#ax.annotate("25% of houses <= 129900$ ", xy=(140000, 0.24), xytext=(150000, 0.06) , fontsize = 18 ,
#arrowprops={'arrowstyle': '-|>', 'lw': 2 , 'color' : 'b'})
def cdf(data):
"""Compute CDF for a one-dimensional array of measurements."""
# Number of data points: n
n = len(data)
return x, y
https://colab.research.google.com/drive/1UEuYHKnd6InNNgYRTYinubPGgEcKMQQx#printMode=true 1/3
4/30/24, 11:40 AM Expt.No.2. Plotting of probability distribution using different dataset. .ipynb - Colab
fig, ax = plt.subplots(figsize=(10,6))
x_price , y_price = cdf(df_house['SalePrice'])
plt.plot(x_price , y_price)
label_graph(10 ,'Sale Price' , 'CDF' , " " , 10 )
#fig, ax = plt.subplots(figsize=(12,8))
#pmf = Pmf.from_seq(df_house['BedroomAbvGr'])
#pmf.bar()
fig, ax = plt.subplots(figsize=(10,6))
https://colab.research.google.com/drive/1UEuYHKnd6InNNgYRTYinubPGgEcKMQQx#printMode=true 2/3
4/30/24, 11:40 AM Expt.No.2. Plotting of probability distribution using different dataset. .ipynb - Colab
https://colab.research.google.com/drive/1UEuYHKnd6InNNgYRTYinubPGgEcKMQQx#printMode=true 3/3