Question

I want to visualize a mosaic plot in form of a tree. For example

mosaicplot(~ Sex + Age + Survived, data = Titanic, color = TRUE)

Now what I want is to represent this in a tree form where the first node for example be sex the second node be age and at the terminal node be number of people survived. May be it should something like http://addictedtor.free.fr/graphiques/RGraphGallery.php?graph=84 where instead of p giving the number of counts. Is there an function in R to do this or should I write it on my own by taking at a look at the party:::plot.BinaryTree function

Was it helpful?

Solution

Here is how I managed to get what I wanted with the lovely igraph package. The code is an ugly hack. It will be great to have you suggestions

library(igraph)
rm(list=ls())
req.data <- as.data.frame(Titanic)
lookup <- c("M","F","C","A","N","Y")
names(lookup) <- c("Male","Female","Child","Adult","Yes","No")

req.data$board <- "board"
req.data$Class.m <- paste(req.data$board,req.data$Class,sep="_")
req.data$Sex.m <- paste(req.data$board,req.data$Class,req.data$Sex,
                        sep="_")
req.data$Age.m <- paste(req.data$board,req.data$Class,req.data$Sex,
                        req.data$Age,sep="_")
req.data$Survived.m <- paste(req.data$board,req.data$Class,req.data$Sex,
                           req.data$Age,req.data$Survived,sep="_")

tmp <- data.frame(from=
                  do.call("c",lapply(req.data[,c("board",
                                                 "Class.m",
                                                 "Sex.m",
                                                 "Age.m")],as.character)),
                  to=do.call("c",lapply(req.data[,c("Class.m",
                    "Sex.m",
                    "Age.m",
                    "Survived.m")],as.character)),
                  stringsAsFactors=FALSE)

tmp  <- tmp [!duplicated(tmp ),];rownames(tmp) <- NULL
tmp$num <- unlist(lapply(strsplit(tmp$to,"_"),
                         FUN=function(x){
                           check1 <- req.data$Class==x[2]
                           check2 <- req.data$Sex == x[3]
                           check3 <- req.data$Age == x[4]
                           check4 <- req.data$Survived == x[5]
                           sum(req.data$Freq[ifelse(is.na(check1),TRUE,check1)   &
                                             ifelse(is.na(check2),TRUE,check2)   &
                                             ifelse(is.na(check3),TRUE,check3)   &
                                             ifelse(is.na(check4),TRUE,check4)])}))


g <- graph.data.frame(tmp, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
                            FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
                              lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5,vertex.size=7)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7) 
### To find the case for crew members 
tmp1  <- tmp [grepl("Crew",tmp$from),];rownames(tmp1) <- NULL
g <- graph.data.frame(tmp1, directed=TRUE)
V(g)$label <- unlist(lapply(strsplit(V(g)$name,"_"),
                            FUN=function(y){ifelse(y[length(y)] %in% names(lookup),
                              lookup[y[length(y)]],y[length(y)])}))
E(g)$label <- tmp1$num
plot(g,layout=layout.reingold.tilford,ylim=c(1,-1),edge.arrow.size=0.5)
legend("topleft", paste(lookup ,names(lookup),sep=" : "),ncol=2,bty="n",cex=0.7) 

Here is the plot I generate. You can modify the vertex/edge colors/size as you want required plot

OTHER TIPS

This is pretty close and looks a lot easier to me.. I post it here in case it may be of use. First I convert the ftable to a more traditional long data frame using expand.dft https://stat.ethz.ch/pipermail/r-help/2009-January/185561.html Then I just use the plot.dendrite function from the plotrix package.

 expand.dft <- function(x, var.names = NULL, freq = "Freq", ...)
{
  #  allow: a table object, or a data frame in frequency form
  if(inherits(x, "table"))
    x <- as.data.frame.table(x, responseName = freq)

  freq.col <- which(colnames(x) == freq)
  if (length(freq.col) == 0)
      stop(paste(sQuote("freq"), "not found in column names"))

  DF <- sapply(1:nrow(x),
               function(i) x[rep(i, each = x[i, freq.col]), ],
               simplify = FALSE)

  DF <- do.call("rbind", DF)[, -freq.col]

  for (i in 1:ncol(DF))
  {
    DF[[i]] <- type.convert(as.character(DF[[i]]), ...)

  }

  rownames(DF) <- NULL

  if (!is.null(var.names))
  {
    if (length(var.names) < dim(DF)[2])
    {
      stop(paste("Too few", sQuote("var.names"), "given."))
    } else if (length(var.names) > dim(DF)[2]) {
      stop(paste("Too many", sQuote("var.names"), "given."))
    } else {
      names(DF) <- var.names
    }
  }

  DF
}

library(plotrix)
r = ftable(Titanic)
plot.dendrite(makeDendrite(expand.dft(data.frame(r))))

enter image description here

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top