6.2 What is a parallel coordinate plot (pcp)?
6.3 Features you can see with parallel coordinate plots
6.4 Interpreting clustering results
6.5 Parallel coordinate plots and time series
6.6 Parallel coordinate plots for indices
6.7 Options for parallel coordinate plots
What is a parallel coordinate plot (pcp)? | 100
data(food, package="MMST") names(food) <- c("Fat", "Food.energy", "Carbohyd", "Protein", "Cholest", "Wt", "Satur.Fat") ggparcoord(data = food, columns = c(1:7), scale="uniminmax") + xlab("") + ylab("")
101
food1 <- food/food$Wt ggparcoord(data = food1, columns=c(1:5, 7), scale="uniminmax", alphaLines=0.2) + xlab("") + ylab("")
Features you can see with parallel coordinate plots | 103
food1 <- within(food1, fatX <- factor(ifelse(Fat > 0.75, 1, 0))) ggparcoord(data = food1[order(food1$fatX),], columns=c(1:5, 7), groupColumn="fatX", scale="uniminmax") + xlab("") + ylab("") + theme(legend.position = "none") + coord_flip()
104
ggplot(food1, aes(Protein, Carbohyd)) + geom_point()
105
ggparcoord(iris, columns=1:4, groupColumn="Species")
Interpreting clustering results | 106
hcav <- hclust(dist(USArrests), method="ave") clu3 <- cutree(hcav, k=3) clus <- factor(clu3) usa1 <- cbind(USArrests, clus) ggparcoord(usa1, columns=1:4, groupColumn="clus", scale="uniminmax", mapping = aes(size = 1)) + xlab("") + ylab("") + theme(legend.position = "none")
107
hcav2 <- hclust(dist(scale(USArrests)), method="ave") clu32 <- cutree(hcav2, k=3) clus2 <- factor(clu32) usa2 <- cbind(USArrests, clus2) ggparcoord(usa2, columns=1:4, groupColumn="clus2", scale="uniminmax", mapping = aes(size = 1)) + xlab("") + ylab("") + theme(legend.position = "none")
Parallel coordinate plots and time series| 109
library(reshape2); data(nass.corn, package="agridat") c1 <- melt(nass.corn, id=c("year", "state")) c1 <- within(c1, StateV <- interaction(state, variable)) c2 <- dcast(c1, StateV~year) ggparcoord(subset(c2[1:48,], c2[1:48,147]> 250000), columns=2:147, groupColumn="StateV", scale="globalminmax") + xlab("Year") + ylab("Acres") + scale_x_discrete(breaks=seq(1865, 2015, 10)) + theme(legend.position = "none")
111
ggparcoord(subset(c2[1:48,], c2[1:48,147]> 250000), columns=2:147, groupColumn="StateV", scale="globalminmax", boxplot=TRUE, alphaLines=0.5) + xlab("Year") + ylab("Acres") + scale_x_discrete(breaks=seq(1865, 2015, 10)) + theme(legend.position = "none")
Parallel coordinate plots for indices | 113
data(uniranks, package="GDAdata") names(uniranks)[c(5, 6, 8, 10, 11, 13)] <- c("AvTeach", "NSSTeach", "SpendperSt", "Careers", "VAddScore", "NSSFeedb") uniranks1 <- within(uniranks, StaffStu <- 1/(StudentStaffRatio)) ggparcoord(uniranks1, columns=c(5:8, 10:14), scale="uniminmax", alphaLines=1/3) + xlab("") + ylab("") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
114
uniranks2 <- within(uniranks1, Rus <- ifelse(UniGroup=="Russell", "Russell", "not")) ggparcoord(uniranks2[order(uniranks2$Rus, decreasing=TRUE),], columns=c(5:8, 10:14), order=c(5,12,8,9,14,6,13,7,11,10), groupColumn="Rus", scale="uniminmax") + xlab("") + ylab("") + theme(legend.position = "none", axis.ticks.y = element_blank(), axis.text.y = element_blank()) + scale_colour_manual(values = c("red","grey"))
Options for parallel coordinate plots | 117
data(body, package="gclus") body1 <- body names(body1) <- abbreviate(names(body), 2) names(body1)[c(4:5, 11:13, 19:21)] <- c("CDp", "CD", "Ch", "Ws", "Ab", "Cl", "An", "Wr") a1 <- ggparcoord(body1, columns=1:24, alphaLines=0.1) + xlab("") + ylab("") a2 <- ggparcoord(body1, columns=1:24, scale="uniminmax", alphaLines=0.1) + xlab("") + ylab("") a3 <- ggparcoord(body1, columns=1:24, scale="globalminmax", alphaLines=0.1) + xlab("") + ylab("") a4 <- ggparcoord(body1, columns=1:24, scale="center", scaleSummary="median", alphaLines=0.1) + xlab("") + ylab("") grid.arrange(a1, a2, a3, a4)
118
fc <- function(xv) { bu <- boxplot(xv, plot=FALSE)$stats[5] cxv <- ifelse(xv > bu, NA, xv) bl <- boxplot(xv, plot=FALSE)$stats[1] cxv <- ifelse(cxv < bl, NA, cxv)} data(food, package="MMST") rxfood <- as.data.frame(apply(food,2,fc)) ggparcoord(data = rxfood, columns = c(1:7), scale="uniminmax", missing="exclude", alphaLines=0.3) + xlab("") + ylab("")
120
body1$Gn <- factor(body1$Gn) ggparcoord(body1, columns=1:24, scale="uniminmax", alphaLines=0.4, groupColumn="Gn", order="allClass") + xlab("") + ylab("") + theme(legend.position = "none", axis.ticks.y = element_blank(), axis.text.y = element_blank())
121
a <- ggparcoord(body1[order(body1$Gn),], columns=c(1:24), groupColumn="Gn", order="allClass", scale="uniminmax") + xlab("") + ylab("") + theme(legend.position = "none", axis.ticks.y = element_blank(), axis.text.y = element_blank()) + scale_colour_manual(values = c("grey","#00BFC4")) b <- ggparcoord(body1[order(body1$Gn, decreasing=TRUE),], columns=c(1:24), groupColumn="Gn", order="allClass", scale="uniminmax") + xlab("") + ylab("") + theme(legend.position = "none", axis.ticks.y = element_blank(), axis.text.y = element_blank()) + scale_colour_manual(values = c("#F8766D","grey")) grid.arrange(a,b)
122
m2 <- apply(body[, 1:24], 2, median, na.rm=TRUE) m2a <- order(m2) ggparcoord(data = select(body, -Gender), alphaLines=0.3, scale="globalminmax", order=m2a) + coord_flip()
123
B1 <- ggparcoord(data = body1, columns=c(1:24), scale="std") B2 <- acast(B1$data[ ,c(1,3,4)], .ID ~ variable) m4 <- apply(B2, 2, max, na.rm=TRUE) m4r <- order(m4) ggparcoord(data = body1, alphaLines=0.3, columns=c(1:24), scale="std", order=m4r)
125
data(Boston, package="MASS") Boston1 <- within(Boston, hmedv <- factor(ifelse(medv == 50,"Top", "Rest"))) Boston1 <- within(Boston1, mlevel <- ifelse(medv==50,1,0.1)) Boston1 <- within(Boston1, medv1 <- medv) a <- ggparcoord(data = Boston1[order(Boston1$hmedv),], columns=c(1:14), groupColumn="hmedv", scale="uniminmax", alphaLines="mlevel", mapping = aes(size = 1)) + xlab("") + ylab("") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()) b <- ggparcoord(data = Boston1, columns=c(1:14), groupColumn="medv1", scale="uniminmax") + xlab("") + ylab("") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()) grid.arrange(a,b)
126
Boston1 <- Boston1 %>% mutate( arad = factor(ifelse(rad < max(rad), 0, 1)), aLevel = ifelse(rad < max(rad), 0.1, 1)) ggparcoord(data = Boston1, columns=c(1:14), scale="uniminmax", groupColumn= "arad", alphaLines="aLevel", order="allClass") + xlab("") + ylab("") + theme(legend.position = "none", axis.ticks.y = element_blank(), axis.text.y = element_blank())