0% found this document useful (0 votes)
5 views

04_boxplot

The document provides a comprehensive guide on using Seaborn to create boxplots, particularly focusing on the 'mpg' dataset of cars. It includes data preprocessing, visualizations, and various styling options for boxplots, illustrating how to analyze and present data based on different categorical variables such as origin and cylinder count. Additionally, it demonstrates how to customize boxplots with color, width, and order of categories.

Uploaded by

kart238
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

04_boxplot

The document provides a comprehensive guide on using Seaborn to create boxplots, particularly focusing on the 'mpg' dataset of cars. It includes data preprocessing, visualizations, and various styling options for boxplots, illustrating how to analyze and present data based on different categorical variables such as origin and cylinder count. Additionally, it demonstrates how to customize boxplots with color, width, and order of categories.

Uploaded by

kart238
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 22

Seaborn: boxplot

from matplotlib import pyplot as plt


import seaborn as sns

cars = sns.load_dataset('mpg').dropna()

cars.shape

(392, 9)

cars.head()

mpg cylinders displacement horsepower weight acceleration \


0 18.0 8 307.0 130.0 3504 12.0
1 15.0 8 350.0 165.0 3693 11.5
2 18.0 8 318.0 150.0 3436 11.0
3 16.0 8 304.0 150.0 3433 12.0
4 17.0 8 302.0 140.0 3449 10.5

model_year origin name


0 70 usa chevrolet chevelle malibu
1 70 usa buick skylark 320
2 70 usa plymouth satellite
3 70 usa amc rebel sst
4 70 usa ford torino

Intro Visuals
import pandas as pd
import numpy as np

sns.set_style('white')
plt.rc('xtick', labelsize=14)
plt.rc('ytick', labelsize=14)

blue, orange, green, red = sns.color_palette()[:4]

pts = [1, 1.5, (7/3), 3, 3.75, 4.25, 5, 19/3, 8, 12]

plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
pd.Series(pts).describe()

count 10.000000
mean 4.716667
std 3.341010
min 1.000000
25% 2.500000
50% 4.000000
75% 6.000000
max 12.000000
dtype: float64

plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E') #median at 4
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E') #25th percentile
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E') #75th percentile
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();

plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axhline(0.092, xmin=(4.5/16), xmax=0.5, color='#3E3E3E') #connect
box
plt.axhline(0.268, xmin=(4.5/16), xmax=0.5, color='#3E3E3E') #connect
box
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
xvals=np.linspace(2.5, 6, 100)

plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axhline(0.092, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.268, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.gca().fill_between(xvals, 0.092, 0.268, color=green) #add color
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axhline(0.092, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.268, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.18, xmin=(3/16), xmax=(4.5/16), color='#3E3E3E') #add
lower whisker
plt.axvline(1, ymin=0.3, ymax=0.4, color='#3E3E3E') #add
lower whisker
plt.gca().fill_between(xvals, 0.092, 0.268, color=green)
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axhline(0.092, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.268, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.18, xmin=(3/16), xmax=(4.5/16), color='#3E3E3E')
plt.axvline(1, ymin=0.3, ymax=0.4, color='#3E3E3E')
plt.axhline(0.18, xmin=(8/16), xmax=(13.25/16), color='#3E3E3E',
linestyle='--') #add hypothetical upper whisker
plt.axvline(11.25, ymin=0.3, ymax=0.4, color='#3E3E3E') #add
hypothetical upper whisker
plt.gca().fill_between(xvals, 0.092, 0.268, color=green)
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axhline(0.092, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.268, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.18, xmin=(3/16), xmax=(4.5/16), color='#3E3E3E')
plt.axvline(1, ymin=0.3, ymax=0.4, color='#3E3E3E')
plt.axhline(0.18, xmin=(8/16), xmax=(10/16), color='#3E3E3E') #add
upper whisker
plt.axvline(8, ymin=0.3, ymax=0.4, color='#3E3E3E') #add
upper whisker
plt.gca().fill_between(xvals, 0.092, 0.268, color=green)
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
plt.figure(figsize=(8, 4))
plt.scatter(pts, [0]*len(pts), s=100, zorder=1, color=green)
plt.axvline(4, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(2.5, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axvline(6, ymin=0.2, ymax=0.5, color='#3E3E3E')
plt.axhline(0.092, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.268, xmin=(4.5/16), xmax=0.5, color='#3E3E3E')
plt.axhline(0.18, xmin=(3/16), xmax=(4.5/16), color='#3E3E3E')
plt.axvline(1, ymin=0.3, ymax=0.4, color='#3E3E3E')
plt.axhline(0.18, xmin=(8/16), xmax=(10/16), color='#3E3E3E')
plt.axvline(8, ymin=0.3, ymax=0.4, color='#3E3E3E')
plt.scatter(12, 0.18, marker="d", s=30, color='#3E3E3E') #add flier
plt.gca().fill_between(xvals, 0.092, 0.268, color=green)
plt.axhline(0, color='gray', zorder=0)
plt.ylim(-0.02, 0.55)
plt.xlim(-2, 14)
plt.box(False)
plt.xticks(range(13))
plt.yticks([])
plt.tight_layout();
#check manual against seaborn plot

plt.figure(figsize=(8,4))
sns.boxplot(pts, color=green, width=0.3)
sns.despine()
plt.xlim(-1, 13)
plt.tight_layout();

plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
Basics
sns.set_style('whitegrid')

cars.cylinders.value_counts()

4 199
8 103
6 83
3 4
5 3
Name: cylinders, dtype: int64

Filter down to only cars with even number of cylinders

cars = cars[cars.cylinders.isin([4,6,8])]

sns.boxplot(cars.mpg);

cars.mpg.describe()

count 385.000000
mean 23.445455
std 7.836911
min 9.000000
25% 17.000000
50% 23.000000
75% 29.000000
max 46.600000
Name: mpg, dtype: float64

sns.boxplot(x=cars.origin, y=cars.mpg);

sns.boxplot(x='origin', y='mpg', data=cars);


Hue
By Cylinders
sns.boxplot(x='origin', y='mpg', hue='cylinders', data=cars);

By Model Year
cars.model_year.describe()

count 385.000000
mean 75.961039
std 3.692058
min 70.000000
25% 73.000000
50% 76.000000
75% 79.000000
max 82.000000
Name: model_year, dtype: float64

cars['newer_model'] = cars.model_year > 76

sns.boxplot(x='origin', y='mpg', hue='newer_model', data=cars);


Styling Options
order
sns.boxplot(x='mpg', y='origin',
data=cars
);
sns.boxplot(x='mpg', y='origin',
data=cars,
order=['japan', 'europe', 'usa']
);
sns.boxplot(x='mpg', y='origin', hue='newer_model',
data=cars,
order=['japan', 'europe', 'usa'],
hue_order=[True, False]
);

color
sns.boxplot(x='mpg', y='origin',
data=cars,
color='g'
);
sns.boxplot(x='mpg', y='origin', hue='newer_model',
data=cars,
color='g'
);
width
sns.boxplot(x='mpg', y='origin',
data=cars,
width=0.5
);

linewidth
sns.boxplot(x='mpg', y='origin',
data=cars,
linewidth=2.5
);
whis
sns.boxplot(x='mpg', y='origin',
data=cars,
whis=1
);
sns.boxplot(x='mpg', y='origin',
data=cars,
whis=2
);

fliersize
sns.boxplot(x='mpg', y='origin',
data=cars,
fliersize=2
);
sns.boxplot(x='mpg', y='origin',
data=cars,
fliersize=10
);
showcaps
Check the matplotlib documentation for even more styling options

sns.boxplot(x='mpg', y='origin',
data=cars,
showcaps=False
);

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy