سؤال

Sorry I'm not particularly articulate in the post title..I hope my example will be clearer!

If I start out with a data frame:

test.df <- data.frame(group=c(rep("a",4), rep("b",4)), 
                  var=rep(1:4,2),
                  min= runif(8),
                  q25=runif(8,1,2),
                  q75=runif(8,2,3),
                  max=runif(8,3,4))
head(test.df,2)
  group var        min      q25      q75      max
1     a   1 0.59078504 1.199138 2.119283 3.869486
2     a   2 0.06131107 1.676109 2.603068 3.739955

I know can melt it with id=c(group, var)

library(reshape2)
head(melt(test.df, id=c("group", "var")),2)
  group var variable      value
1     a   1      min 0.59078504
2     a   2      min 0.06131107

But what I'm looking for is a way to get two "value" columns by pairing min-max and q25-q75 so that it looks like:

  group var variable  value1     value2  
1     a   1 min-max   0.59078504 3.869486
1     a   1 q25-q75   1.199138   2.119283
2     a   2 min-max   0.06131107 3.739955
2     a   2 q25-q75   1.676109   2.603068 

I got a bit stuck on melt/cast and cant pull myself out, I'm sure there must be a neat way to accomplish this?

edit: this is a simplified example with only two pairs of variables - the idea is to solve this for larger numbers of pairs with minimal 'manual' work.

هل كانت مفيدة؟

المحلول

Another attempt:

newnames <- c("value1","value2")
data.frame(
  test.df[c("group","var")],
  variable=rep(c("min-max","q25-q75"),each=nrow(test.df)),
  rbind(
    setNames(test.df[c("min","max")],newnames),
    setNames(test.df[c("q25","q75")],newnames)
  )
)

Result:

   group var variable    value1   value2
1      a   1  min-max 0.6939545 3.479807
2      a   2  min-max 0.5646825 3.564637
3      a   3  min-max 0.3509824 3.928308
4      a   4  min-max 0.4217888 3.376821
5      b   1  min-max 0.6493916 3.933157
6      b   2  min-max 0.3978330 3.129940
7      b   3  min-max 0.4407376 3.707715
8      b   4  min-max 0.1651875 3.798546
9      a   1  q25-q75 1.3531055 2.242076
10     a   2  q25-q75 1.1811900 2.240188
11     a   3  q25-q75 1.3043822 2.695175
12     a   4  q25-q75 1.3315480 2.542576
13     b   1  q25-q75 1.2397527 2.107442
14     b   2  q25-q75 1.1973467 2.545511
15     b   3  q25-q75 1.9193746 2.502551
16     b   4  q25-q75 1.0425474 2.225601

نصائح أخرى

There are a few approaches. eg, multiple melts, dcast, recast, etc. The following approach uses data.table

require(data.table)
test.dt <- data.table(rbind(test.df, test.df))

ind <- 1:nrow(test.df)
test.dt[, c("variable", "value1", "value2") :=
          list(rep(c("min-max", "q25-q75"), each=nrow(test.df)),
               c(min[ind], q25[ind]),
               c(max[ind], q75[ind])
               )]

## drop the columns you don't need
test.dt[, c("min", "max", "q25", "q75") := NULL]

## if you'd like to order it, you can use `order` or `setkey`
##   the latter is quicker, but changes the DT
##   the former is slower but the DT's order is preserved.
test.dt[order(group, var)]
# or
setkey(test.dt, group, var)

Results

test.dt

    group var variable     value1   value2
 1:     a   1  min-max 0.63256600 3.514519
 2:     a   1  q25-q75 1.66013227 2.543394
 3:     a   2  min-max 0.53387108 3.029701
 4:     a   2  q25-q75 1.71870889 2.620395
 5:     a   3  min-max 0.41179300 3.210039
 6:     a   3  q25-q75 1.28926891 2.539023
 7:     a   4  min-max 0.58886768 3.419263
 8:     a   4  q25-q75 1.95738512 2.368881
 9:     b   1  min-max 0.06941305 3.047981
10:     b   1  q25-q75 1.03638939 2.341807
11:     b   2  min-max 0.64073458 3.774208
12:     b   2  q25-q75 1.04405064 2.164377
13:     b   3  min-max 0.57886703 3.703984
14:     b   3  q25-q75 1.95881989 2.039100
15:     b   4  min-max 0.25317366 3.870050
16:     b   4  q25-q75 1.53970571 2.093513

From data.table v1.9.5+, melt can handle melting to multiple columns. We can simply do:

require(data.table) # v1.9.5+
ans = melt(setDT(test.df), id=1:2, measure=list(c(3:4), c(6:5)))
setattr(ans$variable, 'levels', c("min-max", "q25-q75"))
#     group var variable     value1   value2
#  1:     a   1  min-max 0.10099765 3.462315
#  2:     a   2  min-max 0.08818443 3.051679
#  3:     a   3  min-max 0.53342060 3.737234
#  4:     a   4  min-max 0.07875220 3.560566
#  5:     b   1  min-max 0.25263233 3.137646
#  6:     b   2  min-max 0.10126603 3.451190
#  7:     b   3  min-max 0.18560800 3.819189
#  8:     b   4  min-max 0.65526231 3.721172
#  9:     a   1  q25-q75 1.79852967 2.542847
# 10:     a   2  q25-q75 1.41175623 2.560623
# 11:     a   3  q25-q75 1.01159079 2.931624
# 12:     a   4  q25-q75 1.60892118 2.392346
# 13:     b   1  q25-q75 1.06122928 2.229654
# 14:     b   2  q25-q75 1.13817060 2.751216
# 15:     b   3  q25-q75 1.26490475 2.400336
# 16:     b   4  q25-q75 1.65309127 2.213093

You can install it by following the instructions here.

A (clumsy!) solution:

df2 <- melt(test.df, id=c("group", "var"))

df3 <- within(df2, v <- substr(variable, 1, 1))

minq25 <- df3$variable %in% c("min","q25")

maxq75 <- !minq25

df4 <- merge(df3[minq25,], df3[maxq75,], by=c("group","var","v"))

df5 <- within(df4, variable <- paste(variable.x, variable.y, sep="-"))

df5[,c("group","var","variable","value.x","value.y")]

Result:

   group var variable    value.x  value.y
1      a   1  min-max 0.05265472 3.879102
2      a   1  q25-q75 1.38637281 2.083420
3      a   2  min-max 0.04639894 3.656561
4      a   2  q25-q75 1.38182351 2.652118
5      a   3  min-max 0.49758025 3.227566
6      a   3  q25-q75 1.07336356 2.193336
7      a   4  min-max 0.08901318 3.018771
8      a   4  q25-q75 1.11195873 2.754820
9      b   1  min-max 0.49791501 3.494425
10     b   1  q25-q75 1.17453226 2.221016
11     b   2  min-max 0.66731388 3.942841
12     b   2  q25-q75 1.71656969 2.317466
13     b   3  min-max 0.88250686 3.258755
14     b   3  q25-q75 1.93997804 2.191714
15     b   4  min-max 0.32003321 3.060668
16     b   4  q25-q75 1.05766626 2.757572
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top