This document contains all of the code shown in the slides for the ggplot section of the tidyverse course. A separate document has the answers to the exercises used in the course.
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.3.2
## v tibble 2.0.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("ggplot2")
msleep
class(msleep)
## [1] "tbl_df" "tbl" "data.frame"
Remove NA values from vore
msleep %>% filter(!is.na(vore)) -> msleep.clean
msleep.clean
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total)
)+geom_point() -> scatterplot
scatterplot
If we don’t want to save the filtered version we can pipe straight from the filter into ggplot without saving.
msleep %>%
filter(!is.na(vore)) %>%
ggplot(
aes(x=bodywt, y=sleep_total)
)+geom_point()
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total, colour=vore)
)+geom_point()
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total)
)+geom_point(aes(colour=vore))
ggplot(
msleep.clean,
aes(x=bodywt, y=sleep_total, colour=vore)
)+geom_point() -> scatterplot
scatterplot+scale_x_log10()
ggplot(
msleep.clean,
aes(x=log(bodywt), y=sleep_total,colour=vore)
)+geom_point()
ggplot(
msleep.clean,
aes(x=log(bodywt), y=sleep_total,colour=vore)
) +
geom_point(size=4) +
xlab("Log Body Weight") +
ylab("Total Hours Sleep") +
ggtitle("Some Sleep Data") -> scatterplot
scatterplot
theme_set(theme_bw(base_size=18))
scatterplot+theme(plot.title = element_text(hjust = 0.5)) -> scatterplot
scatterplot
This adds to the previous plot rather than recreating it.
scatterplot +
scale_colour_brewer(
palette="Set1",
name="Trophic levels",
labels=c("Carnivore", "Herbivore", "Insectivore", "Omnivore")
) +
scale_x_continuous(breaks=-5:10) +
scale_y_continuous(breaks=seq(0,20, 2)) -> scatterplot
scatterplot
scatterplot +
scale_color_manual(
values=c("chocolate3", "chartreuse3", "darkorchid2","cyan3"),
name="Trophic levels",
labels=c("Carnivore", "Herbivore", "Insectivore", "Omnivore")
) -> scatterplot
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
scatterplot
Tadah! So beautiful :-)
The file up_down_expression.txt contains an expression comparison dataset with an extra column which classifies the rows into one of 3 groups (up, down or unchanging).
Load: “up_down_expression.txt” Check the structure of the file Plot a scatterplot geom_point() with: up in red, down in blue unchanging in grey Main title: “Expression data” Colour legend: “Down, Unchanging and Up” Axis labels: “Condition 1” and “Condition 2”
expression <- read_tsv("up_down_expression.txt")
## Parsed with column specification:
## cols(
## Gene = col_character(),
## Condition1 = col_double(),
## Condition2 = col_double(),
## State = col_character()
## )
expression
expression.scatter<-ggplot(expression, aes(Condition1, Condition2, colour=State))+
geom_point()+
scale_colour_manual(values=c("blue", "grey", "red"),
name="State",
labels=c("Down", "Unchanging", "Up"))+
xlab("Condition 1") +
ylab("Condition 2") +
ggtitle("Expression data")+
theme(plot.title = element_text(hjust = 0.5))
expression.scatter
Now, let’s try another type a graph: a stripchart. It is similar to a scatterplot but the x variable is qualitative or categorical in nature.
ggplot(
msleep.clean,
aes(vore, sleep_total)
)+geom_point()
ggplot(
msleep.clean,
aes(vore,sleep_total, colour=vore)
) + geom_point(size=4,position="jitter")
ggplot(
msleep.clean,
aes(vore, sleep_total, colour=vore)
) +
geom_jitter(
width = .2,
size=4
) -> stripchart
stripchart
stripchart +
stat_summary(
fun.y="mean",
geom='errorbar',
aes(ymin=..y.., ymax=..y..),
width=0.6,
size=1.5,
colour="grey25"
) -> stripchart
stripchart
Little piece of code to calculate mean and SEm the tidyverse way.
msleep.clean %>%
group_by(vore) %>%
summarise(sleep=mean(sleep_total), sem=sd(sleep_total)/sqrt(n()))
stripchart +
ylab("Total Hours Sleep") +
xlab("Trophic Levels") +
ggtitle("Some Sleep Data") +
scale_y_continuous(breaks=seq(0, 20, 2)) +
scale_x_discrete(labels=c("Carnivore", "Herbivore", "Insectivore", "Omnivore")) +
theme(legend.position = "none") -> stripchart
stripchart
stripchart +
scale_colour_brewer(palette="Dark2")+
scale_x_discrete(
limit=c("insecti","omni","carni", "herbi"),
labels=c("Insectivore", "Herbivore", "Carnivore", "Omnivore"))+
theme(plot.title = element_text(hjust = 0.5)
) -> stripchart
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
stripchart
library("ggthemes")
## Warning: package 'ggthemes' was built under R version 3.5.3
stripchart+
theme_wsj()+
scale_colour_wsj("colors6")+
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
Now, let’s try some other data. DownloadFestival datacontains the hygiene scores (0-5) of 810 concert goers over three days of music festival.
read_csv("DownloadFestival.csv") -> festival.data
## Parsed with column specification:
## cols(
## ticknumb = col_double(),
## gender = col_character(),
## day1 = col_double(),
## day2 = col_double(),
## day3 = col_double()
## )
festival.data
max(festival.data$day1)
## [1] 3.69
ggplot(
festival.data,
aes(day1)
)+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Let’s make it prettier.
ggplot(
festival.data,
aes(day1)
)+geom_histogram(binwidth=0.3)
Now change colour and add titles to axes.
ggplot(
festival.data,
aes(day1)
)+geom_histogram(binwidth=0.3, color="black", fill="yellow")+
labs(x="Score", y="Counts")+
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Hygiene at Day 1") -> Day1Histogram
Day1Histogram
Now we want to plot all 3 days per gender. So we need to reshape the file. We are also going to remove the NAs.
festival.data %>%
gather(day,score,-ticknumb,-gender) -> festival.data.stack
festival.data.stack %>% filter(!is.na(score)) -> festival.data.stack
festival.data.stack
ggplot(festival.data.stack,aes(score))+
geom_histogram(binwidth=0.3, color="black", fill="yellow")+
labs(x="Hygiene score", y="Counts")+
facet_grid(gender~day) -> histogram.3days
histogram.3days
It is possible to modify the labels of the facets. here are some examples below.
histogram.3days<-ggplot(festival.data.stack,aes(score))+
geom_histogram(binwidth=0.3, color="black", fill="yellow")+
labs(x="Hygiene score", y="Counts")+
facet_grid(gender~day)+
theme(strip.text.x = element_text(size = 16, colour = "purple", face="bold"),
strip.text.y = element_text(size=12, face="bold"))
histogram.3days
Density plots as below.
density.3days<-ggplot(festival.data.stack, aes(score))+
geom_density(aes(group=day, fill=day), alpha=0.5)+
facet_grid(~gender)
density.3days
Plot a stripchart representing all 3 days and each gender
stripchart <-ggplot(festival.data.stack, aes(gender, score, colour=gender))+
facet_grid(~day)+
geom_point(position="jitter")+
scale_colour_manual(values=c("darkorange", "darkorchid4"))+
stat_summary(geom='errorbar',fun.y=mean, aes(ymin=..y.., ymax=..y..),
colour="black", width=0.8, size=1.5)+
labs(x="Gender", y="Score")+
theme(legend.position = "none")
stripchart
From a stripchart, we can add a line for the mean or any other descriptive geom as a stat summary.
stripchart<-ggplot(festival.data.stack, aes(gender, score,colour=gender))+facet_grid(~day)+
geom_point(position="jitter")+
scale_colour_manual(values=c("darkorange", "darkorchid4"))+
labs(x="Gender", y="Score")+
theme(legend.position = "none")
stripchart
We saw how to add a mean:
stripchart+
stat_summary(fun.y="mean",geom="errorbar", aes(ymin=..y.., ymax=..y..), width=0.8, colour="black", size = 1.3)
Now let’s add a boxplot.
stripchart+
geom_boxplot(alpha=0, colour="black")
We can make it prettier.
stripchart+
geom_boxplot(aes(gender, score, fill=gender), alpha=0.5, colour="black")+
scale_fill_manual(values=c("darkorange", "darkorchid4"))
Speaking of making graphs prettier, we can improve on the boxplot.
We can change order on the x-axis if we want:
boxplot+scale_x_discrete(limits=c(“Male”,“Female”))
boxplot<-ggplot(festival.data.stack, aes(gender,score))+
geom_boxplot()+
facet_grid(~day)
boxplot
boxplot <-ggplot(festival.data.stack, aes(gender,score, fill=gender))+
facet_grid(~day)+
stat_boxplot(geom="errorbar", width=0.5)+
geom_boxplot(outlier.shape=8)+
theme(legend.position = "none")+
scale_fill_manual(values=c("sienna1","darkorchid3 "))+
labs(x="Gender", y="Score")
boxplot
stripchart+
geom_violin(alpha=0, colour="black")
Basic command
violinplot<-ggplot(festival.data.stack, aes(gender,score))+geom_violin()+facet_grid(~day)
violinplot
Prettier:
violinplot<-ggplot(festival.data.stack, aes(gender,score,fill=gender))+
facet_grid(~day)+
geom_violin(trim = FALSE)+
scale_fill_manual(values=c("goldenrod2","darkgrey"))+
theme(legend.position="none")+
stat_summary(fun.y=median, geom="point", size=2, color="black")+
labs(x="Gender", y="Hygiene scores")
violinplot
violinplot+geom_boxplot(width=0.3)
violinplot+geom_jitter(width=0.1,size=1, shape=1)
First we want to calculate the means and sem and store the values in a file.
festival.data.stack %>%
group_by(gender,day) %>%
summarise(mean=mean(score), sem=sd(score)/sqrt(n())) -> score.sem
score.sem
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(stat="identity")
barchart
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(stat="identity", position="dodge")+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), position="dodge")
barchart
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(position="dodge", stat="identity")+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), position="dodge")
barchart
barchart<-ggplot(score.sem, aes(day,mean, fill=gender))+
geom_bar(position="dodge", colour="black",stat="identity",size=1)+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.5, position=position_dodge(0.9), size=1)+
ylab("Mean scores")+
ggtitle("Levels of hygiene over 3 days of concert")+
theme(plot.title = element_text(hjust = 0.5))+
theme(plot.title = element_text(size = 19))+
theme(axis.title.x=element_blank())+
scale_fill_manual(values=c("darkorange3", "darkorchid4"), name="Gender")
barchart
So beautiful!
linegraph<-ggplot(score.sem, aes(day, mean, group=gender))+
geom_line()+
geom_point()+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem))
linegraph
linegraph<-ggplot(score.sem, aes(day,mean, colour=gender, group=gender))+
geom_line(size=1.5)+
geom_point(size=4)+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.2, size=1.5)
linegraph
Change the position of the legend.
linegraph<-ggplot(score.sem, aes(day,mean, colour=gender, group=gender))+
geom_line(size=1.5)+
geom_point(size=5)+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=.2, size=1.5)+
labs(x="", y="Mean scores")+
scale_y_continuous(breaks=seq(0, 2, 0.2))+
ggtitle("Levels of hygiene over 3 days of concert")+
theme(plot.title = element_text(hjust = 0.5))+
scale_colour_manual(values=c("purple","darkorange3"), name="")+
theme(legend.position = c(0.85, 0.9))+
theme(legend.text=element_text(size=14))+
theme(legend.background = element_rect(fill = "transparent"))
linegraph
The file contains positional count data for 3 different datasets (a WT and two mutants).
Plot a graph showing all 3 datasets on the same plot
Load: “chromosome_position_data.txt” Check the structure of the file Restructure the file from wide to long format gather() Rename the column: “Genotype” and “Value” Plot a basic line graph
chromosome<-read_tsv("chromosome_position_data.txt")
## Parsed with column specification:
## cols(
## Position = col_double(),
## Mut1 = col_double(),
## Mut2 = col_double(),
## WT = col_double()
## )
chromosome
chromosome %>%
gather(Genotype, Value,-Position) -> chromosome.long
chromosome.long
chromosome.linegraph<-ggplot(chromosome.long, aes(x=Position, y=Value, group=Genotype, colour=Genotype))+
geom_line(size=2)
chromosome.linegraph
Plot a graph showing the relationship between age and weight for a typical baby over the first 9 months.
Load: weight_chart.txt Check the structure of the file Plot a basic line graph Plot a prettier version: Change the size and the colour of the points Change the thickness and the colour of the line Change the y-axis: scale from 2 to 10 kgs Change the x-axis: scale from 0 t 10 months Change the labels on both the axis Add a title to the graph
weight<-read_tsv("weight_chart.txt")
## Parsed with column specification:
## cols(
## Age = col_double(),
## Weight = col_double()
## )
weight
Basic graph:
weight.linegraph<-ggplot(weight, aes(Age, Weight))+
geom_line()+
geom_point()
weight.linegraph
Pretty graph:
weight.linegraph<-ggplot(weight, aes(Age, Weight))+
geom_line(size=1, colour="lightblue2")+
geom_point(shape=16, size=3, colour="darkorchid1")+
scale_y_continuous(breaks=2:10, limits = c(2, 10))+
scale_x_continuous(breaks=0:10, limits = c(0, 10))+
labs(x="Age (months)", y="Weight (kg)")+
ggtitle("Relation between age and weight")+
theme(plot.title = element_text(hjust = 0.5))
weight.linegraph
The file brain_bodyweight.txt contains data for the log10 brain and bodyweight for a range of species, along with an SEM measure for each point.
Plot these data on a scatterplot with error bars showing the mean +/- SEM and the names of the datasets under each point.
Load: brain_bodyweight.txt Check the structure of the file Plot a basic graph. You will need: geom_barh() for the horizontal error bars geom_text() for the labels
Plot a prettier version.
brain.bodyweight<- read_tsv("brain_bodyweight.txt")
## Parsed with column specification:
## cols(
## Species = col_character(),
## Bodyweight = col_double(),
## Brainweight = col_double(),
## Bodyweight.SEM = col_double(),
## Brainweight.SEM = col_double()
## )
brain.bodyweight
Basic
brain.bodyweight.graph<-ggplot(brain.bodyweight, aes(x=Bodyweight, y=Brainweight))+
geom_point()+
geom_errorbar(aes(ymin=Brainweight-Brainweight.SEM, ymax=Brainweight+Brainweight.SEM))+
geom_errorbarh(aes(xmin=Bodyweight-Bodyweight.SEM, xmax=Bodyweight+Bodyweight.SEM))+
geom_text(aes(label=Species), hjust = 1.05, vjust = -0.6, size=2.7)
brain.bodyweight.graph
brain.bodyweight.graph<-ggplot(brain.bodyweight, aes(x=Bodyweight, y=Brainweight))+
geom_point()+
geom_errorbar(aes(ymin=Brainweight-Brainweight.SEM, ymax=Brainweight+Brainweight.SEM), width=.1, size=1, colour="tomato3")+
geom_errorbarh(aes(xmin=Bodyweight-Bodyweight.SEM, xmax=Bodyweight+Bodyweight.SEM), height=.1, size=1, colour="tomato3")+
geom_point(size=2)+
geom_text(aes(label=Species), hjust = 1.1, vjust = -0.6, size=2.7)
brain.bodyweight.graph
Prettier with ggrepel
library("ggrepel")
## Warning: package 'ggrepel' was built under R version 3.5.3
ggplot(brain.bodyweight, aes(x=Bodyweight, y=Brainweight))+
geom_errorbar(aes(ymin=Brainweight-Brainweight.SEM, ymax=Brainweight+Brainweight.SEM),
width=.1, size=0.5, colour="grey28")+
geom_errorbarh(aes(xmin=Bodyweight-Bodyweight.SEM, xmax=Bodyweight+Bodyweight.SEM),
height=.1, size=0.5, colour="grey28")+
geom_point(shape=21, size=3, colour="black", fill="maroon3")+
geom_label_repel(aes(label = Species), box.padding=0.6, point.padding =0.5,
fill="mintcream", segment.colour="grey", size=3) -> brain.bodyweight.graph
brain.bodyweight.graph
Changing<-read_csv("Changing.csv")
## Parsed with column specification:
## cols(
## Type.of.Behaviour = col_character(),
## Sample.Size = col_double(),
## Stage.of.Change = col_character()
## )
Changing
Basic
stackedBar<-ggplot(Changing, aes(Type.of.Behaviour, Sample.Size, fill=Stage.of.Change))+
geom_bar(stat="identity")
stackedBar
Changing the order of the levels: factor(variable name, levels = c(“”, “” .))
Rotate the graph to read the x-axis labels: coord_flip()
Changing$Stage.of.Change <- factor(Changing$Stage.of.Change, levels = c("Maintenance","Action","Preparation","Contemplation","Precontemplation"))
stackedBar<-ggplot(Changing, aes(Type.of.Behaviour, Sample.Size, fill = Stage.of.Change))+
geom_bar(stat="identity", colour="black")+
coord_flip()
stackedBar
Prettier:
stackedBar<-stackedBar+
labs(title="Stages for Each of the 12 Problem Behaviours", y="Sample Size", fill="Stages of Change")+
theme(plot.title = element_text(hjust = 0.5, size=12, face="bold"))+
theme(axis.title.y=element_blank())+
scale_fill_brewer(palette = 4)+
theme(axis.text.x = element_text(size=10), axis.text.y = element_text(size=9))+
theme(legend.text=element_text(size=8), legend.title=element_text(size=10, face="bold"))+
theme(axis.title.x = element_text(size=10))
stackedBar
With a divergent palette
stackedBar+scale_fill_brewer(palette="RdYlGn", direction=-1)
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
Let’s plot the same data but as percentages
Plot the Changing data as percentages.
Change the format of the file into contingency xtabs() Calculate the percentages prop.table() Change the format into a dataframe as.data.frame() Check your file head() Plot the data as before using a suitable divergent palette
contingency.table100<-prop.table(xtabs(Sample.Size~Type.of.Behaviour+Stage.of.Change, Changing),1)*100
contingency.table100
## Stage.of.Change
## Type.of.Behaviour Maintenance Action Preparation Contemplation
## Adolescent delinquency 25.786164 27.044025 0.000000 28.930818
## Condom use 35.294118 6.191950 0.000000 17.956656
## Exercise acquisition 19.386332 14.086471 25.383543 33.751743
## High fat diet 56.666667 2.777778 0.000000 17.777778
## Mammography screening 42.553191 18.439716 0.000000 17.021277
## Physicians'practices 49.629630 1.481481 2.222222 14.814815
## Quitting cocaine 39.743590 45.512821 0.000000 9.615385
## Radon gas exposure 0.000000 8.166189 0.000000 17.335244
## Safer sex 0.000000 47.887324 0.000000 7.981221
## Smoking cessation 36.538462 19.822485 0.000000 27.662722
## Sunscreen use 35.242291 4.405286 0.000000 7.929515
## Weight control 14.634146 17.886179 0.000000 52.845528
## Stage.of.Change
## Type.of.Behaviour Precontemplation
## Adolescent delinquency 18.238994
## Condom use 40.557276
## Exercise acquisition 7.391911
## High fat diet 22.777778
## Mammography screening 21.985816
## Physicians'practices 31.851852
## Quitting cocaine 5.128205
## Radon gas exposure 74.498567
## Safer sex 44.131455
## Smoking cessation 15.976331
## Sunscreen use 52.422907
## Weight control 14.634146
Changing.percent<-as.data.frame(contingency.table100)
Changing.percent
Plot the data as percentages.
stackedBar.percent<-ggplot(Changing.percent,aes(Type.of.Behaviour, Freq, fill = Stage.of.Change))+
geom_bar(stat="identity",colour="black")+
coord_flip()+
scale_fill_brewer(palette = "Spectral", direction=-1)+
labs(title="Stages for Each of the 12 Problem Behaviours", y="Frequencies")+
theme(axis.title.y=element_blank())+
theme(plot.title = element_text(hjust = 0.5, size=12, face="bold"))+
theme(axis.text.x = element_text(size=10), axis.text.y = element_text(size=9))+
theme(legend.text=element_text(size=8), legend.title=element_text(size=10, face="bold"))+
theme(axis.title.x = element_text(size=10))
stackedBar.percent
Linegraph.saved<-ggsave(Linegraph, file=“Line.png”)