3.1 Introduction
3.3 Looking for features
3.4 Comparing distributions by subgroups
Introduction | 28
data(btw2009, package = "flexclust") btw2009 <- within(btw2009, Linke2 <- 100*LINKE2/valid2) ggplot(btw2009, aes(Linke2)) + geom_bar(binwidth = 1, fill = "mediumpurple") + ylab("") + xlab("Percentage voter support for Die Linke in 2009")
Looking for features | 30
data(galton, package="UsingR") ht <- "height (in)" par(mfrow=c(1,2), las=1, mar=c(3.1, 4.1, 1.1, 2.1)) with(galton, { hist(child, xlab=ht, main="Children", col="green") hist(parent, xlab=ht, main="Parents", col="blue")})
31
par(mfrow=c(1,2), mar=c(3.1, 4.1, 1.1, 2.1)) with(galton, { MASS::truehist(child, h=0.1) MASS::truehist(parent, h=0.1)})
33
c1 <- ggplot(galton, aes(child)) + geom_bar( binwidth=1) + xlim(60, 75) + ylim(0, 225) + ylab("") + geom_vline(xintercept=median(galton$child), col="red") p1 <- ggplot(galton, aes(parent)) + geom_bar( binwidth=1) + xlim(60, 75) + ylim(0, 225) + ylab("") + geom_vline(xintercept=median(galton$parent), col="red") grid.arrange(c1, p1)
34
data(father.son, package="UsingR") c2 <- ggplot(father.son, aes(sheight)) + geom_histogram(aes(y = ..density..), binwidth=1) + geom_density() + xlim(58, 80) + ylim(0, 0.16) + xlab("ht (inches)") + ylab("") + ggtitle("Sons") p2 <- ggplot(father.son, aes(fheight)) + geom_histogram(aes(y = ..density..), binwidth=1) + geom_density() + xlim(58, 80) + ylim(0, 0.16) + xlab("ht (inches)") + ylab("") + ggtitle("Fathers") grid.arrange(c2, p2, nrow = 1)
35
with(father.son, { qqnorm(sheight, main="Sons", xlab="", ylab="", pch=16, ylim=c(55,80)) qqline(sheight) qqnorm(fheight, main="Fathers", xlab="", ylab="", pch=16, ylim=c(55,80)) qqline(fheight)})
36
par(mfrow=c(1,1), mar=c(3.1, 4.1, 1.1, 2.1)) with(MASS::hills, boxplot(time, horizontal=TRUE, pch=16, ylim=c(0, 220)))
37
ggplot(MASS::Boston, aes(medv)) + geom_bar() + ylab("") + xlab("Median housing value (thousands of dollars)")
38
library(tidyr) B2 <- gather(MASS::Boston, BosVars, BosValues, crim:medv) ggplot(B2, aes(BosValues)) + geom_histogram() + xlab("") + ylab("") + facet_wrap(~ BosVars, scales = "free")
41
library(KernSmooth) data(Hidalgo1872, package="MMST") par(las=1, mar=c(3.1, 4.1, 1.1, 2.1)) with(Hidalgo1872, { hist(thickness,breaks=seq(0.055,0.135,0.001), freq=FALSE, main="", col="bisque2", ylab="") lines(density(thickness), lwd=2) ks1 <- bkde(thickness, bandwidth=dpik(thickness)) lines(ks1, col="red", lty=5, lwd=2)})
42
ggplot(movies, aes(length)) + geom_bar() + ylab("")
43
ggplot(movies, aes("var", length)) + geom_boxplot() + xlab("") + scale_x_discrete(breaks=NULL) + coord_flip()
44
ggplot(movies, aes(x = length)) + xlim(0,180) + geom_histogram(binwidth=1) + xlab("Movie lengths in minutes") + ylab("")
Comparing distributions by subgroups | 45
btw2009 <- within(btw2009, Bundesland <- state) btw2009 <- within(btw2009, levels(Bundesland) <- c("BW", "BY", "BE", "BB", "HB", "HH", "HE", "MV", "NI", "NW","RP", "SL", "SN", "ST", "SH", "TH")) ggplot(btw2009, aes(Bundesland, Linke2)) + geom_boxplot(varwidth=TRUE) + ylab("")