Ch06 Investigating Multivariate Continuous Data

6.2 What is a parallel coordinate plot (pcp)?
6.3 Features you can see with parallel coordinate plots
6.4 Interpreting clustering results
6.5 Parallel coordinate plots and time series
6.6 Parallel coordinate plots for indices
6.7 Options for parallel coordinate plots

 

What is a parallel coordinate plot (pcp)? | 100

data(food, package="MMST")
names(food) <- c("Fat", "Food.energy", "Carbohyd", "Protein",
                 "Cholest", "Wt", "Satur.Fat")
ggparcoord(data = food, columns = c(1:7), scale="uniminmax") +
           xlab("") + ylab("")

 

101

food1 <- food/food$Wt
ggparcoord(data = food1, columns=c(1:5, 7), 
           scale="uniminmax", alphaLines=0.2) +
           xlab("") + ylab("")

 

Features you can see with parallel coordinate plots | 103

food1 <- within(food1,
                fatX <- factor(ifelse(Fat > 0.75, 1, 0)))
ggparcoord(data = food1[order(food1$fatX),],
           columns=c(1:5, 7), groupColumn="fatX",
           scale="uniminmax") + xlab("") + ylab("")  +
           theme(legend.position = "none") + coord_flip()

 

104

ggplot(food1, aes(Protein, Carbohyd)) + geom_point()

 

105

ggparcoord(iris, columns=1:4, groupColumn="Species")

 

Interpreting clustering results | 106

hcav <- hclust(dist(USArrests), method="ave")
clu3 <- cutree(hcav, k=3)
clus <- factor(clu3)
usa1 <- cbind(USArrests, clus)
ggparcoord(usa1, columns=1:4, groupColumn="clus",
           scale="uniminmax", mapping = aes(size = 1)) +
           xlab("") +  ylab("") +
           theme(legend.position = "none")

 

107

hcav2 <- hclust(dist(scale(USArrests)), method="ave")
clu32 <- cutree(hcav2, k=3)
clus2 <- factor(clu32)
usa2 <- cbind(USArrests, clus2)
ggparcoord(usa2, columns=1:4, groupColumn="clus2",
           scale="uniminmax", mapping = aes(size = 1)) +
           xlab("") +  ylab("") +
           theme(legend.position = "none")

 

Parallel coordinate plots and time series| 109

library(reshape2); data(nass.corn, package="agridat")
c1 <- melt(nass.corn, id=c("year", "state"))
c1 <- within(c1, StateV <- interaction(state, variable))
c2 <- dcast(c1, StateV~year)
ggparcoord(subset(c2[1:48,], c2[1:48,147]> 250000),
           columns=2:147, groupColumn="StateV",
           scale="globalminmax") + xlab("Year") + ylab("Acres") + 
           scale_x_discrete(breaks=seq(1865, 2015, 10)) +
           theme(legend.position = "none")

 

111

ggparcoord(subset(c2[1:48,], c2[1:48,147]> 250000),
           columns=2:147, groupColumn="StateV",
           scale="globalminmax", boxplot=TRUE, alphaLines=0.5) +
           xlab("Year") + ylab("Acres") +
           scale_x_discrete(breaks=seq(1865, 2015, 10)) +
           theme(legend.position = "none")

 

Parallel coordinate plots for indices | 113

data(uniranks, package="GDAdata")
names(uniranks)[c(5, 6, 8, 10, 11, 13)] <- c("AvTeach",
   "NSSTeach", "SpendperSt", "Careers", "VAddScore", "NSSFeedb")
uniranks1 <- within(uniranks, StaffStu <- 1/(StudentStaffRatio))
ggparcoord(uniranks1, columns=c(5:8, 10:14),
           scale="uniminmax", alphaLines=1/3) +
           xlab("") + ylab("") +
           theme(axis.ticks.y = element_blank(),
           axis.text.y = element_blank())

 

114

uniranks2 <- within(uniranks1,
          Rus <- ifelse(UniGroup=="Russell", "Russell", "not"))
ggparcoord(uniranks2[order(uniranks2$Rus, decreasing=TRUE),],
           columns=c(5:8, 10:14), 
           order=c(5,12,8,9,14,6,13,7,11,10),
           groupColumn="Rus", scale="uniminmax") +
           xlab("") + ylab("") +
           theme(legend.position = "none",
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank()) +
           scale_colour_manual(values = c("red","grey"))

 

Options for parallel coordinate plots | 117

data(body, package="gclus")
body1 <- body
names(body1) <- abbreviate(names(body), 2)
names(body1)[c(4:5, 11:13, 19:21)] <-  
       c("CDp", "CD", "Ch", "Ws", "Ab", "Cl", "An", "Wr")
a1 <- ggparcoord(body1, columns=1:24, alphaLines=0.1) +
                 xlab("") + ylab("")
a2 <- ggparcoord(body1, columns=1:24, scale="uniminmax",
                 alphaLines=0.1) + xlab("") + ylab("")
a3 <- ggparcoord(body1, columns=1:24,
                 scale="globalminmax", alphaLines=0.1) +
                 xlab("") + ylab("")
a4 <- ggparcoord(body1, columns=1:24, scale="center",
                 scaleSummary="median", alphaLines=0.1) +
                 xlab("") + ylab("")
grid.arrange(a1, a2, a3, a4)

 

118

fc <- function(xv) {
    bu <- boxplot(xv, plot=FALSE)$stats[5]
    cxv <- ifelse(xv > bu, NA, xv)
    bl <- boxplot(xv, plot=FALSE)$stats[1]
    cxv <- ifelse(cxv < bl, NA, cxv)} 
data(food, package="MMST")
rxfood <- as.data.frame(apply(food,2,fc))
ggparcoord(data = rxfood, columns = c(1:7),
           scale="uniminmax", missing="exclude",
           alphaLines=0.3) + xlab("") + ylab("")

 

120

body1$Gn <- factor(body1$Gn)
ggparcoord(body1, columns=1:24, scale="uniminmax",
           alphaLines=0.4, groupColumn="Gn",
           order="allClass") + xlab("") + ylab("") +
           theme(legend.position = "none",
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank())

 

121

a <- ggparcoord(body1[order(body1$Gn),], columns=c(1:24),
           groupColumn="Gn", order="allClass",
           scale="uniminmax")  + xlab("") +  ylab("") +
           theme(legend.position = "none",
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank()) +
           scale_colour_manual(values = c("grey","#00BFC4"))
b <- ggparcoord(body1[order(body1$Gn, decreasing=TRUE),],
           columns=c(1:24), groupColumn="Gn", order="allClass",
           scale="uniminmax")  + xlab("") +  ylab("") +
           theme(legend.position = "none",
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank()) +
           scale_colour_manual(values = c("#F8766D","grey"))
grid.arrange(a,b)

 

122

m2 <- apply(body[, 1:24], 2, median, na.rm=TRUE)
m2a <- order(m2)
ggparcoord(data = select(body, -Gender), alphaLines=0.3,
    scale="globalminmax", order=m2a) + coord_flip()

 

123

B1 <- ggparcoord(data = body1, columns=c(1:24), scale="std")
B2 <- acast(B1$data[ ,c(1,3,4)], .ID ~ variable)
m4 <- apply(B2, 2, max, na.rm=TRUE)
m4r <- order(m4)
ggparcoord(data = body1, alphaLines=0.3,
           columns=c(1:24), scale="std", order=m4r)

 

125

data(Boston, package="MASS")
Boston1 <- within(Boston,
           hmedv <- factor(ifelse(medv == 50,"Top", "Rest")))
Boston1 <- within(Boston1, mlevel <- ifelse(medv==50,1,0.1))
Boston1 <- within(Boston1, medv1 <- medv)
a <- ggparcoord(data = Boston1[order(Boston1$hmedv),],
              columns=c(1:14), groupColumn="hmedv",
              scale="uniminmax", alphaLines="mlevel",
              mapping = aes(size = 1)) + xlab("") + ylab("") +
              theme(axis.ticks.y = element_blank(),
              axis.text.y = element_blank())
b <- ggparcoord(data = Boston1, columns=c(1:14),
              groupColumn="medv1", scale="uniminmax") +
              xlab("") + ylab("") +
              theme(axis.ticks.y = element_blank(),
              axis.text.y = element_blank())
grid.arrange(a,b)

 

126

Boston1 <- Boston1 %>% mutate(
           arad = factor(ifelse(rad < max(rad), 0, 1)), 
           aLevel = ifelse(rad < max(rad), 0.1, 1))
ggparcoord(data = Boston1, columns=c(1:14), 
           scale="uniminmax", groupColumn= "arad",
           alphaLines="aLevel", order="allClass") +
           xlab("") + ylab("") +
           theme(legend.position = "none",
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank())