Question

I have a dataframe that looks like this:

       a        b       c         d
ab    0        0        1         0
cd   -0.415    1.415    0         0
ef    0        0        0.0811    0.918

Is there an easy way to transform this table into:

       a        b       c         d
ab    0        0        1         0
cd   -0.415    0        0         0
cd    0    1.415        0         0
ef    0        0        0.0811    0
ef    0        0        0         0.918

If there are two or more numbers in the original table I want to transform it into corresponding numbers of rows. I haven't got a clue how to do it so any help would be appreciated

Was it helpful?

Solution

Borrowing some from @AnandaMahto and melting per your request. Please consider: any unique combination you wish to examine goes on the left hand side~ values for the variable go on the right. In this case variable names became values.

library(reshape2)
mydf <- structure(list(a = c(0, -0.415, 0), b = c(0, 1.415, 0), 
                       c = c(1, 0, 0.0811), d = c(0, 0, 0.918)), 
                  .Names = c("a", "b", "c", "d"), 
                  class = "data.frame", row.names = c("ab", "cd", "ef"))
mydf$rows<- rownames(mydf)
m1<- melt(mydf, id="rows", measured= names(mydf))
m2<- dcast(m1, rows+value~..., fill=0)
m2<- m2[m2$value!=0, ]
m2$value <- NULL    

#rows      a     b      c     d
#2   ab  0.000 0.000 1.0000 0.000
#3   cd -0.415 0.000 0.0000 0.000
#5   cd  0.000 1.415 0.0000 0.000
#7   ef  0.000 0.000 0.0811 0.000
#8   ef  0.000 0.000 0.0000 0.918

OTHER TIPS

Here is one way, using matrix indexing. (The data is turned into a matrix so it is better if your data is of one type, like it seems to be the case in your example.)

reformat.dat <- function(dat) {
  tdat <- t(dat)
  nz <- tdat != 0
  i <- col(tdat)[nz]
  j <- row(tdat)[nz]
  out <- matrix(0, sum(nz), ncol(dat))
  out[cbind(seq_len(sum(nz)), j)] <- tdat[nz]
  rownames(out) <- rownames(dat)[i]
  colnames(out) <- colnames(dat)
  out
}

reformat.dat(dat)
#         a     b      c     d
# ab  0.000 0.000 1.0000 0.000
# cd -0.415 0.000 0.0000 0.000
# cd  0.000 1.415 0.0000 0.000
# ef  0.000 0.000 0.0811 0.000
# ef  0.000 0.000 0.0000 0.918

Here's a straightforward solution using diag:

o <- apply(df, 1, function(x) {
    t <- diag(x)
    colnames(t) <- names(x)
    t <- t[rowSums(t == 0) != length(x), ,drop = FALSE]
    t
})
ids <- rep(names(o), sapply(o, nrow))
o <- do.call(rbind, o)
row.names(o) <- ids

#         a     b      c     d
# ab  0.000 0.000 1.0000 0.000
# cd -0.415 0.000 0.0000 0.000
# cd  0.000 1.415 0.0000 0.000
# ef  0.000 0.000 0.0811 0.000
# ef  0.000 0.000 0.0000 0.918

This is a matrix. Use as.data.frame(.) if you require a data.frame.

Here's one approach, but you'll need to follow up with some cosmetic changes for fixing the row names.

Your data in a reproducible form:

mydf <- structure(list(a = c(0, -0.415, 0), b = c(0, 1.415, 0), 
                       c = c(1, 0, 0.0811), d = c(0, 0, 0.918)), 
                  .Names = c("a", "b", "c", "d"), 
                  class = "data.frame", row.names = c("ab", "cd", "ef"))

Replace zeroes with NAs:

mydf[mydf == 0] <- NA

stack your data.frame to make it a "long" data.frame:

mydf1 <- data.frame(Rows = rownames(mydf), stack(mydf))

Generate unique values for "Rows"

mydf1$Rows <- make.unique(as.character(mydf1$Rows))
# Let's see what we have so far....
mydf1
#    Rows  values ind
# 1    ab      NA   a
# 2    cd -0.4150   a
# 3    ef      NA   a
# 4  ab.1      NA   b
# 5  cd.1  1.4150   b
# 6  ef.1      NA   b
# 7  ab.2  1.0000   c
# 8  cd.2      NA   c
# 9  ef.2  0.0811   c
# 10 ab.3      NA   d
# 11 cd.3      NA   d
# 12 ef.3  0.9180   d

Now, just use xtabs to get the output you're looking for. Wrap it in as.data.frame.matrix if you want a data.frame, and clean up the row names if you need to.

as.data.frame.matrix(xtabs(values ~ Rows + ind, mydf1))
#           a     b      c     d
# ab.2  0.000 0.000 1.0000 0.000
# cd   -0.415 0.000 0.0000 0.000
# cd.1  0.000 1.415 0.0000 0.000
# ef.2  0.000 0.000 0.0811 0.000
# ef.3  0.000 0.000 0.0000 0.918

I don't think there's an elegant version of what you're asking for precisely, but maybe you can use melt from reshape2 instead? It will give you one line per row/column pair:

> library(reshape2) 
> # add row names as column
> df <- cbind(df, names=rownames(df))
> df <- melt(df,id.var="names")
Using  as id variables
> df[df$value != 0,]
   names variable   value
2     cd        a -0.4150
5     cd        b  1.4150
7     ab        c  1.0000
9     ef        c  0.0811
12    ef        d  0.9180
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top