How to convert dataframe to matrix based on 2 factor & 1 numeric columns

https://stackoverflow.com/questions/23508574

matrix
r

16-07-2023
|

Question

I have following dataframe:

structure(list(vnum1 = c(-1.38, 1.22, -0.17, -0.47, -0.08, -1.11, 
-1.56, -0.14, 0.55, -0.43, 0.25, 0.8, 0.77, -0.1, -0.21, -0.62, 
-0.6, -0.19, -0.41, 0.11, -0.46, -3.08, -2.09, 1.27, -1.5, 0.57, 
-1.69, 0.86, -0.12, -0.22, -0.85, 0.66, 0.11, -1.15, 0.32, -0.36, 
-0.42, -1.17, -0.71, 0.45, -0.41, 0.43, 2.18, 0.39, 0.1, -0.12, 
1.64, -1.24, -1.14, 1.22), vint1 = c(7L, 7L, 9L, 6L, 6L, 2L, 
8L, 10L, 8L, 8L, 10L, 5L, 7L, 4L, 7L, 4L, 2L, 9L, 3L, 7L, 4L, 
9L, 3L, 4L, 10L, 10L, 1L, 6L, 4L, 2L, 1L, 6L, 10L, 9L, 3L, 9L, 
3L, 8L, 7L, 7L, 3L, 4L, 5L, 6L, 5L, 9L, 3L, 10L, 10L, 4L), vfac1 = structure(c(2L, 
4L, 2L, 1L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 2L, 2L, 3L, 2L, 3L, 3L, 
3L, 1L, 2L, 1L, 2L, 3L, 3L, 3L, 1L, 2L, 2L, 3L, 2L, 1L, 3L, 3L, 
2L, 4L, 2L, 4L, 3L, 1L, 1L, 2L, 4L, 3L, 4L, 1L, 1L, 2L, 1L, 1L, 
4L), .Label = c("1", "2", "3", "4"), class = "factor")), .Names = c("vnum1", 
"vint1", "vfac1"), row.names = c(NA, -50L), class = "data.frame")


> head(ddf)
  vnum1 vint1 vfac1
1 -1.38     7     2
2  1.22     7     4
3 -0.17     9     2
4 -0.47     6     1
5 -0.08     6     1
6 -1.11     2     2
>

I want to create a matrix which will have unique values of vint1 as rows, unique values of vfac1 as columns. The matrix needs to be filled with mean values of vnum1 for corresponding vint1 and vfac1. I tried following function:

df2mat = function(gdf){
        for(i in sort(unique(vint1))) cat("\t",i)
        cat("\n")
        for(j in sort(levels(vfac1))) {
                cat("j:",j)
                sum =0
                for(j in 1:10){
                        cat(with(gdf[vint1==i & vfac1==j,], mean(vnum1, na.rm=T)),"\t")
                        #cat("\t")
                        }
                cat("\n")
        }
        cat("\n")
}   

> df2mat(ddf)
         1       2       3       4       5       6       7       8       9       10
j: 1-0.6033333  NaN     -0.51   0.25    NaN     NaN     NaN     NaN     NaN     NaN 
j: 2-0.6033333  NaN     -0.51   0.25    NaN     NaN     NaN     NaN     NaN     NaN 
j: 3-0.6033333  NaN     -0.51   0.25    NaN     NaN     NaN     NaN     NaN     NaN 
j: 4-0.6033333  NaN     -0.51   0.25    NaN     NaN     NaN     NaN     NaN     NaN

It produces an output which is not correct since values of first row are being repeated. Moreover, missing values are producing NaN error. Also how can I get this into a proper matrix object? How can I correct these problems. Thanks for your help.

Solution

This would be a good use of the standard tapply with two factors. You can do

with(ddf, tapply(vnum1, list(vint1,vfac1), mean))

#            1       2          3      4
#1  -0.8500000 -1.6900         NA     NA
#2          NA -0.6650 -0.6000000     NA
#3  -0.4100000  0.6150 -2.0900000 -0.050
#4  -0.4600000      NA  0.1075000  0.825
#5   0.1000000  0.8000  2.1800000     NA
#6  -0.2750000  0.8600  0.6600000  0.390
#7  -0.1300000 -0.1775         NA  1.220
#8          NA -0.4300 -0.7266667     NA
#9  -0.1200000 -1.1900 -0.1900000     NA
#10 -0.6033333      NA -0.5100000  0.250

OTHER TIPS

You can use the function acast from reshape2 package, which gives you what you need:

library(reshape2)
acast(ddf, vint1 ~ vfac1, fun.aggregate = mean, value.var = 'vnum1')

             1       2          3      4
1  -0.8500000 -1.6900        NaN    NaN
2         NaN -0.6650 -0.6000000    NaN
3  -0.4100000  0.6150 -2.0900000 -0.050
4  -0.4600000     NaN  0.1075000  0.825
5   0.1000000  0.8000  2.1800000    NaN
6  -0.2750000  0.8600  0.6600000  0.390
7  -0.1300000 -0.1775        NaN  1.220
8         NaN -0.4300 -0.7266667    NaN
9  -0.1200000 -1.1900 -0.1900000    NaN
10 -0.6033333     NaN -0.5100000  0.250

Licensed under: CC-BY-SA with attribution

Not affiliated with StackOverflow