Question

I have factors on x-axis and order those factor levels in a way that's intuitive to plot with ggplot. It works fine. However, when I use the subset command within ggplot, it re-orders my original sequence of factors. Is it possible to do subsetting within ggplot and preserve the order of factor levels?

Here is the data and code:

library(ggplot2)
library(plyr)
dat <- structure(list(SubjectID = structure(c(12L, 4L, 6L, 7L, 12L, 
7L, 5L, 8L, 14L, 1L, 15L, 1L, 7L, 1L, 7L, 5L, 4L, 2L, 9L, 6L, 
7L, 13L, 12L, 2L, 15L, 3L, 5L, 13L, 13L, 10L, 7L, 8L, 10L, 10L, 
1L, 10L, 12L, 7L, 6L, 10L), .Label = c("s001", "s002", "s003", 
"s004", "s005", "s006", "s007", "s008", "s009", "s010", "s011", 
"s012", "s013", "s014", "s015"), class = "factor"), Parameter = structure(c(7L, 
3L, 5L, 3L, 6L, 4L, 6L, 7L, 7L, 4L, 7L, 12L, 8L, 11L, 1L, 4L, 
3L, 4L, 6L, 4L, 6L, 6L, 12L, 5L, 12L, 1L, 7L, 13L, 11L, 1L, 4L, 
1L, 6L, 13L, 10L, 10L, 10L, 13L, 5L, 8L), .Label = c("(Intercept)", 
"c0.008", "c0.01", "c0.015", "c0.02", "c0.03", "PrevCorr1", "PrevFail1", 
"c0.025", "c0.004", "c0.006", "c0.009", "c0.012", "c0.005"), class = "factor"), 
    Weight = c(0.0352725634087837, 1.45546697427904, 2.29457594510248, 
    0.479548914792514, 6.39680995359234, 1.48829600339586, 2.69253113220079, 
    -0.171219812386926, -0.453625394224277, 1.43732884325816, 
    0.742416863226952, 0.256935761466245, -0.29401087047524, 
    0.34653127811481, 0.33120592543102, 2.79213318878505, 2.47047299128637, 
    1.022450287681, 6.92891513416868, 0.648982326396105, 6.58336282626389, 
    6.40600461501379, 1.80062359655524, 3.86658202530889, 1.23833324887194, 
    -0.026560261876089, 0.121670468861011, 0.9290824087063, 0.349104382483186, 
    0.24722583823016, 1.82473621255801, -0.712668411699556, 6.51789901685784, 
    0.74682257127003, 0.0755807984938072, 0.131705709322157, 
    0.246465073382095, 0.876279316248929, 1.83442709571662, -0.579086982613267
    )), .Names = c("SubjectID", "Parameter", "Weight"), row.names = c(2924L, 
784L, 1537L, 1663L, 3138L, 1744L, 1266L, 1996L, 3548L, 86L, 3692L, 
230L, 1613L, 213L, 1627L, 1024L, 832L, 384L, 2418L, 1568L, 1714L, 
3362L, 3200L, 497L, 3632L, 683L, 1020L, 3281L, 3263L, 2779L, 
1632L, 1995L, 2674L, 2753L, 312L, 2638L, 3198L, 1809L, 1569L, 
2589L), class = "data.frame")



## Sort factors in the order that will make it intuitive to read the plot
## It goes, "(Intercept), "PrevCorr1", "PrevFail1", "c0.004", "c0.006", etc.  
paramNames <- levels(dat$Parameter)
contrastNames <- sort(paramNames[grep("c0",paramNames)])
biasNames <- paramNames[!paramNames %in% contrastNames]
dat$Parameter <- factor(dat$Parameter, levels=c(biasNames, contrastNames))

## Add grouping parameter that will be used to plot different weights in different colors
dat$plotColor <-"Contrast"
dat$plotColor[dat$Parameter=="(Intercept)"] <- "Intercept"
dat$plotColor[grep("PrevCorr", dat$Parameter)] <- "PrevSuccess"
dat$plotColor[grep("PrevFail", dat$Parameter)] <- "PrevFail"

p <- ggplot(dat, aes(x=Parameter, y=Weight)) + 
        # The following command, which adds geom_line to data points of the graph, changes the order of levels
        # If I uncomment the next line, the factor level order goes wrong. 
        #geom_line(subset=.(plotColor=="Contrast"), aes(group=1), stat="summary", fun.y="mean", color="grey50", size=1) + 
        geom_point(aes(group=Parameter, color=plotColor), size=5, stat="summary", fun.y="mean") + 
        geom_point(aes(group=Parameter), size=2.5, color="white", stat="summary", fun.y="mean") + 
        theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 
print(p)

Here is the plot when geom line is commented

And here is what happens when geom_line is uncommented

Was it helpful?

Solution

If you switch the order in which you plot the objects, the problem disappears:

p <- ggplot(dat, aes(x=Parameter, y=Weight)) + 
  # The following command, which adds geom_line to data points of the graph, changes the order of levels
  # If I uncomment the next line, the factor level order goes wrong. 
  geom_point(aes(group=Parameter, color=plotColor), size=5, stat="summary", fun.y="mean") + 
  geom_line(subset = .(plotColor == "Contrast"), aes(group=1), stat="summary", fun.y="mean", color="grey50", size=1)  +                 
  geom_point(aes(group=Parameter), size=2.5, color="white", stat="summary", fun.y="mean")  +                 
  theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 
print(p)

correct plot

I think the problem lies in plotting the subsetted data first, it ditches the levels for the original data, and when you add back in the points, it doesn't know where to put them. When you plot with the original data first, it maintains the levels. I'm not sure though, you might have to take my word on it.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top