GitHub - eclarke/ggbeeswarm: Column scatter / beeswarm-style plots in ggplot2 (original) (raw)

Beeswarm-style plots with ggplot2

Build Status CRAN status

Introduction

Beeswarm plots (aka column scatter plots or violin scatter plots) are a way of plotting points that would ordinarily overlap so that they fall next to each other instead. In addition to reducing overplotting, it helps visualize the density of the data at each point (similar to a violin plot), while still showing each data point individually.

ggbeeswarm provides two different methods to create beeswarm-style plots using ggplot2. It does this by adding two new ggplot geom objects:

Features:

See the examples below.

Installation

This package is on CRAN so install should be a simple:

install.packages('ggbeeswarm')

If you want the development version from GitHub, you can do:

devtools::install_github("eclarke/ggbeeswarm")

Examples

Here is a comparison between geom_jitter and geom_quasirandom on theiris dataset:

set.seed(12345) library(ggplot2) library(ggbeeswarm) #compare to jitter ggplot(iris,aes(Species, Sepal.Length)) + geom_jitter()

ggplot(iris,aes(Species, Sepal.Length)) + geom_quasirandom()

geom_quasirandom()

Using geom_quasirandom:

#default geom_quasirandom ggplot(mpg,aes(class, hwy)) + geom_quasirandom()

With categorical y-axis

ggplot(mpg,aes(hwy, class)) + geom_quasirandom(groupOnX=FALSE)

Some groups may have only a few points. Use varwidth=TRUE to adjust width dynamically.

ggplot(mpg,aes(class, hwy)) + geom_quasirandom(varwidth = TRUE)

Automatic dodging

sub_mpg <- mpg[mpg$class %in% c("midsize", "pickup", "suv"),] ggplot(sub_mpg, aes(class, displ, color=factor(cyl))) + geom_quasirandom(dodge.width=1)

Alternative methods

geom_quasirandom can also use several other methods to distribute points. For example:

ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "tukey") + ggtitle("Tukey texture")

ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "tukeyDense") + ggtitle("Tukey + density")

ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "frowney") + ggtitle("Banded frowns")

ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "smiley") + ggtitle("Banded smiles")

ggplot(iris, aes(Species, Sepal.Length)) + geom_quasirandom(method = "pseudorandom") + ggtitle("Jittered density")

ggplot(iris, aes(Species, Sepal.Length)) + geom_beeswarm() + ggtitle("Beeswarm")

geom_beeswarm()

Using geom_beeswarm:

ggplot(iris,aes(Species, Sepal.Length)) + geom_beeswarm()

ggplot(iris,aes(Species, Sepal.Length)) + geom_beeswarm(side = 1L)

ggplot(mpg,aes(class, hwy)) + geom_beeswarm(size=.5)

With categorical y-axis

ggplot(mpg,aes(hwy, class)) + geom_beeswarm(size=.5)

Also watch out for points escaping from the plot with geom_beeswarm

ggplot(mpg,aes(hwy, class)) + geom_beeswarm(size=.5) + scale_y_discrete(expand=expansion(add=c(0.5,1)))

ggplot(mpg,aes(class, hwy)) + geom_beeswarm(size=1.1)

With automatic dodging

ggplot(sub_mpg, aes(class, displ, color=factor(cyl))) + geom_beeswarm(dodge.width=0.5)

Alternative methods

df <- data.frame( x = "A", y = sample(1:100, 200, replace = TRUE) ) ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "swarm") + ggtitle('method = "swarm" (default)')

ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "compactswarm") + ggtitle('method = "compactswarm"')

ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "hex") + ggtitle('method = "hex"')

ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "square") + ggtitle('method = "square"')

ggplot(df, aes(x = x, y = y)) + geom_beeswarm(cex = 2.5, method = "center") + ggtitle('method = "center"')

Different point distribution priority

#With different beeswarm point distribution priority dat<-data.frame(x=rep(1:3,c(20,40,80))) dat$y<-rnorm(nrow(dat),dat$x) ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2) + ggtitle('Default (ascending)') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))

ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='descending') + ggtitle('Descending') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))

ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='density') + ggtitle('Density') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))

ggplot(dat,aes(x,y)) + geom_beeswarm(cex=2,priority='random') + ggtitle('Random') + scale_x_continuous(expand=expansion(add=c(0.5,.5)))

Corral runaway points

set.seed(1995) df2 <- data.frame( y = rnorm(1000), id = sample(c("G1", "G2", "G3"), size = 1000, replace = TRUE) ) p <- ggplot(df2, aes(x = id, y = y, colour = id))

use corral.width to control corral width

p + geom_beeswarm(cex = 2.5, corral = "none", corral.width = 0.9) + ggtitle('corral = "none" (default)')

p + geom_beeswarm(cex = 2.5, corral = "gutter", corral.width = 0.9) + ggtitle('corral = "gutter"')

p + geom_beeswarm(cex = 2.5, corral = "wrap", corral.width = 0.9) + ggtitle('corral = "wrap"')

p + geom_beeswarm(cex = 2.5, corral = "random", corral.width = 0.9) + ggtitle('corral = "random"')

p + geom_beeswarm(cex = 2.5, corral = "omit", corral.width = 0.9) + ggtitle('corral = "omit"')

## Warning: Removed 303 rows containing missing values (geom_point).


Authors: Erik Clarke, Scott Sherrill-Mix, and Charlotte Dawson