An Introduction to Data Analysis Visualization Using R
An Introduction to Data Analysis Visualization Using R
An Introduction to
Data Analysis &
Visualization Using R
1
04/22/2025
2
04/22/2025
Assignment Operator
# uses '<-' symbol to assign values to objects
x <- 5
Comments
# anything following '#' on a line is ignored by R
# allows you to include explanatory notes in your code
Vectors
vector <- c(1, 2, 3)
3
04/22/2025
Data frame
df <- data.frame(Name=c("Jhoana", "Pablo"), Score=c(90, 85))
Package Installation
# install the 'ggplot2' package
install.packages("ggplot2")
install.packages("agricolae")
Package Loading
# load the 'ggplot2' package
library(ggplot2)
library(agricolae)
4
04/22/2025
Built-in Datasets
library(agricolae) # load the package containing the built-in
dataset
data(yacon) # load built-in datasets
head(yacon, 3) # displays first 3 rows of the dataset
## locality site dose entry replication height stalks wfr wff wfk roots FOS
## 1 CAJ 1 F0 P1385 1 57.9 3.2 6700 3600 5490 21.9 60.2
## 2 CAJ 1 F0 P1385 2 62.1 3.0 6450 2500 3800 21.3 60.4
## 3 CAJ 1 F0 P1385 3 53.7 2.6 7550 2450 5360 21.8 54.5
## glucose fructose sucrose brix foliage dry IH
## 1 1.74 5.0 26.66 14.8 25.6 15.6 0.3469
## 2 1.50 4.5 28.84 15.7 27.2 17.0 0.3257
## 3 2.14 9.3 27.06 14.3 30.0 15.9 0.3221
Base R
# imports a csv file using a built-in function from baseR
data_csv <- read.csv("path/to/your/data.csv")
head(data_csv)
R Package
# imports an Excel file using 'readxl' package
install.packages("readxl")
library(readxl)
10
5
04/22/2025
ggplot2
11
ggplot2
• data visualization package built on the grammar of graphics
• creates complex and multi-layered graphics easily
General Syntax
ggplot(data, aes(x, y)) +
geom_<type>() +
theme_<style>() +
labs(title, x, y)
12
6
04/22/2025
ggplot2
13
ggplot2 iris
14
7
04/22/2025
ggplot2 iris
15
ggplot2 iris
ggplot(iris, aes(x = Species,
y = Sepal.Length,
fill = Species)) + #
boxplot colors accdng to Species
geom_boxplot() +
labs(title = "Sepal Length by
Species",
x = "Species",
y = "Sepal Length (cm)") # add
labels
16
8
04/22/2025
ggplot2 iris
ggplot(iris, aes(x = Species,
y = Sepal.Length,
fill = Species)) + #
boxplot colors accdng to Species
geom_boxplot() +
labs(title = "Sepal Length by
Species",
x = "Species",
y = "Sepal Length (cm)") # add
labels
17
ggplot2 iris
ggplot(iris, aes(x = Species,
y = Sepal.Length,
fill = Species)) + #
boxplot colors accdng to Species
geom_boxplot(width = 0.5) + # modify
box width
labs(title = "Sepal Length by
Species",
x = "Species",
y = "Sepal Length (cm)") # add
labels
18
9
04/22/2025
ggplot2 iris
ggplot(iris, aes(x = Species,
y = Sepal.Length,
fill = Species)) + #
boxplot colors accdng to Species
geom_boxplot(width = 0.5) + # modify
box width
labs(title = "Sepal Length by
Species",
x = "Species",
y = "Sepal Length (cm)") + #add
labels
geom_jitter() # add data points
19
ggplot2 iris
ggplot(iris, aes(x = Species,
y = Sepal.Length,
fill = Species)) + #
boxplot colors accdng to Species
geom_boxplot(width = 0.5) + # modify
box width
labs(title = "Sepal Length by
Species",
x = "Species",
y = "Sepal Length (cm)") + # add
labels
geom_jitter(alpha = 0.25) # add data
points, modify transparency
20
10
04/22/2025
ggplot2
21
ggplot2 Soybean
22
11
04/22/2025
ggplot2 Soybean
23
ggplot2 Soybean
ggplot(soybean, aes(x = Time,
y = weight,
color = Variety)) +
# add lines with different colors for
each variety
geom_line() +
labs(title = "Average Leaf Weight Over
Time",
x = "Days after planting (DAP)",
y = "Average Leaf Weight (g)") #
add labels
24
12
04/22/2025
ggplot2 Soybean
ggplot(soybean, aes(x = Time,
y = weight,
color = Variety)) +
# add lines with different colors for
each variety
geom_point() + # add data points
geom_line() +
labs(title = "Average Leaf Weight Over
Time",
x = "Days after planting (DAP)",
y = "Average Leaf Weight (g)") #
add labels
25
ggplot2 Soybean
ggplot(soybean, aes(x = Time,
y = weight,
color = Variety)) +
# add lines with different colors for
each variety
geom_point(shape=18, size=3) + # add
data points, modify points
geom_line() +
labs(title = "Average Leaf Weight Over
Time",
x = "Days after planting (DAP)",
y = "Average Leaf Weight (g)") #
add labels
26
13
04/22/2025
ggplot2 Soybean
ggplot(soybean, aes(x = Time,
y = weight,
color = Variety)) +
# add lines with different colors for
each variety
geom_point(shape=18, size=3) + # add
data points, modify points
geom_line(linewidth=1.2) + # make
lines thicker
labs(title = "Average Leaf Weight Over
Time",
x = "Days after planting (DAP)",
y = "Average Leaf Weight (g)") #
add labels
27
ggplot2 Soybean
ggplot(soybean, aes(x = Time,
y = weight,
linetype = Variety))
+ # add lines with different linetypes
for each variety
geom_point(shape=18, size=3) + # add
data points, modify points
geom_line(linewidth=1.2) + # make
lines thicker
labs(title = "Average Leaf Weight Over
Time",
x = "Days after planting (DAP)",
y = "Average Leaf Weight (g)") #
add labels
28
14
04/22/2025
ggplot2 Soybean
ggplot(soybean, aes(x = Time,
y = weight,
linetype = Variety))
+ # add lines with different linetypes
for each variety
geom_point(shape=18, size=3) + # add
data points, modify points
geom_line(linewidth=1.2) + # make
lines thicker
scale_linetype_manual(values =
c("dotdash", "solid")) + # specify line
types
labs(title = "Average Leaf Weight Over
Time",
x = "Days after planting (DAP)",
y = "Average Leaf Weight (g)") #
add labels
29
ggplot2
30
15
04/22/2025
ggplot2 yacon
31
ggplot2 yacon
32
16
04/22/2025
ggplot2 yacon
ggplot(yacon, aes(x = stalks,
y = FOS)) +
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") # add labels
33
ggplot2 yacon
ggplot(yacon, aes(x = stalks,
y = FOS)) +
geom_point(color = "darkgreen") + #
change color of points
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") # add labels
34
17
04/22/2025
ggplot2 yacon
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) + #
color by grouping variable (entry)
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") # add labels
35
ggplot2 yacon
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) + #
color by grouping variable (entry)
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") + # add labels
scale_color_manual(values =
c("maroon", "forestgreen", "sienna",
"tomato", "steelblue", "mediumorchid",
"rosybrown", "orange"))
36
18
04/22/2025
ggplot2 yacon
# load required package
library(RColorBrewer)
37
ggplot2
38
19
04/22/2025
ggplot2
library(RColorBrewer)
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) + #
color by grouping variable (entry)
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") + # add labels
scale_color_brewer(palette = "Dark2")
+
theme_classic() # apply classic theme
39
ggplot2
library(RColorBrewer)
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) + #
color by grouping variable (entry)
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") + # add labels
scale_color_brewer(palette = "Dark2")
+
theme_light() # apply light theme
40
20
04/22/2025
ggplot2
library(RColorBrewer)
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) + #
color by grouping variable (entry)
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") + # add labels
scale_color_brewer(palette = "Dark2")
+
theme_minimal() # apply minimal theme
41
ggplot2
library(RColorBrewer)
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) + #
color by grouping variable (entry)
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") + # add labels
scale_color_brewer(palette = "Dark2")
+
theme_bw() # apply classic theme
42
21
04/22/2025
ggplot2
• useful for creating multi-panel plots by grouping data
• uses the functions:
facet_wrap()
facet_grid()
43
ggplot2 facet_wrap()
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) +
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") +
scale_color_brewer(palette = "Dark2")
+
theme_bw() +
facet_wrap(~locality) # creates a
panel for each location
44
22
04/22/2025
ggplot2 facet_wrap()
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) +
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") +
scale_color_brewer(palette = "Dark2")
+
theme_bw() +
facet_wrap(~locality, ncol=1) # stacks
panels in a column
45
ggplot2 facet_grid
ggplot(yacon, aes(x = stalks,
y = FOS,
color = entry)) +
geom_point() +
labs(title = "FOS vs Stalk Count",
x = "Number of Stalks",
y = "FOS (%)") +
scale_color_brewer(palette = "Dark2")
+
theme_bw() +
facet_grid(dose~locality) # creates a
panel for each dose x location
46
23
04/22/2025
agricolae
47
agricolae
• provides functions for experimental design and analysis of
agricultural experiments
• planning of field experiments
General Syntax
design.<design_type>(trt, r, serie, seed)
48
24
04/22/2025
agricolae
# create a vector defining your treatments
treatment <- c("A", "B", "C")
# create CRD layout with 10 replicates per treatment level
design_crd <- design.crd(treatment, r=10, seed=123, serie=2)
# extract the design book
book_crd <- design_crd$book
# view first 3 rows of the design book
head(book_crd, 3)
## plots r treatment
## 1 101 1 C
## 2 102 1 A
## 3 103 2 A
49
agricolae
# create a vector defining your treatments
treatment <- c("A", "B", "C", "D", "E")
50
25
04/22/2025
agricolae
# display RCBD layout
print(design_rcbd$sketch)
## [,1] [,2] [,3] [,4] [,5]
## [1,] "A" "C" "D" "E" "B"
## [2,] "B" "A" "E" "C" "D"
## [3,] "A" "D" "B" "E" "C"
## [4,] "E" "C" "D" "B" "A"
# display plot numbers
print(matrix(book_rcbd[,1],byrow = TRUE, ncol = 5))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 101 102 103 104 105
## [2,] 205 204 203 202 201
## [3,] 301 302 303 304 305
## [4,] 405 404 403 402 401
# save the design to a csv file
write.csv(book_rcbd, "book_rcbd.csv", row.names=FALSE)
51
agricolae
# create a vector for your treatments
treatment <- c("A", "B", "C", "D")
52
26
04/22/2025
agricolae
# apply zigzag plot numbering to the design layout
book_lsd <- zigzag(design_lsd)
53
agricolae
• design.ab(): factorial experiments
• design.split(): split-plot experiments
• design.strip(): strip-plot experiments
• design.alpha(): alpha-lattice experiments
54
27
04/22/2025
agricolae
data(yacon) # load the built-in dataset
head(yacon, 3) # display first 3 rows
## entry replication roots
## 1 P1385 1 21.9
## 2 P1385 2 21.3
## 3 P1385 3 21.8
55
agricolae
# display anova table
summary(model)
56
28
04/22/2025
agricolae
# perform Tukey HSD test
hsd_result <- HSD.test(model, "entry", group = TRUE)
# view groupings
print(hsd_result$groups)
## roots groups
## AMM5150 23.03333 a
## AKW5075 22.43333 a
## AMM5163 22.16667 ab
## P1385 21.66667 ab
## AMM5136 20.73333 bc
## ARB5125 19.63333 c
## SAL136 19.43333 c
## CLLUNC118 19.06667 c
57
58
29
04/22/2025
59
An Introduction to
Data Analysis &
Visualization Using R
60
30