Question

I have the following data frame, which gives a logical response for a number of criteria arranged across columns:

    structure(list(Householdref = c("003015002024001", "003016003006001", 
"003016004013001", "003016006002001", "003017003009002", "003017003009003", 
"003017003009004", "003017003037001", "003017003049001", "003070001026001", 
"003070003042001", "003070005002001", "003070005021001", "003070009005001", 
"003071002055001", "003071003037001", "003072003029001", "003072006002001", 
"003072006018001"), B5_Glass = c(FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), B5_Glass.bottles = c(TRUE, 
TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, 
TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), B5_Paper.cardboard = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), B5_Metal.cans = c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, 
TRUE, TRUE, TRUE), B5_Food.biodegradables = c(FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), B5_Plastic.polybags = c(TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), B5_Plastic.bottles.containers = c(FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, 
FALSE, TRUE, FALSE, FALSE, FALSE), B5_other = c(FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)), .Names = c("Householdref", 
"B5_Glass", "B5_Glass.bottles", "B5_Paper.cardboard", "B5_Metal.cans", 
"B5_Food.biodegradables", "B5_Plastic.polybags", "B5_Plastic.bottles.containers", 
"B5_other"), row.names = c("13", "21", "25", "29", "55", "56", 
"57", "60", "61", "69", "76", "81", "83", "90", "101", "108", 
"120", "125", "127"), class = "data.frame")

I need to reshape this data so that I have just three columns: Householdref, TRUE and FALSE so that the names of columns now appear in the TRUE or FALSE columns so that there would be multiple rows for each Householdref.

I have been experimenting with reshape() as follows:

reshape(ar_waste_sep, direction="long", varying=2:9, sep=c("_"))

but end up with this:

                                 Householdref                       time  B5   id
1.Glass                       003015002024001                      Glass FALSE  1
2.Glass                       003016003006001                      Glass FALSE  2
3.Glass                       003016004013001                      Glass FALSE  3
4.Glass                       003016006002001                      Glass FALSE  4
5.Glass                       003017003009002                      Glass FALSE  5
6.Glass                       003017003009003                      Glass FALSE  6
7.Glass                       003017003009004                      Glass FALSE  7
8.Glass                       003017003037001                      Glass FALSE  8
9.Glass                       003017003049001                      Glass FALSE  9
10.Glass                      003070001026001                      Glass FALSE 10
11.Glass                      003070003042001                      Glass FALSE 11
12.Glass                      003070005002001                      Glass FALSE 12

So as you can see not quite what I'm after. I have searched high and low for an example that would show me how to do what I want, and am sure it's not overly complicated, but am just going round in circle. Would someone know a quick and easy way to achieve what I want i.e. this:

  Householdref                       TRUE          FALSE
003015002024001                      Glass          NA
003015002024001                      Glass.bottles  NA
003015002024001                      NA             Paper.cardboard
003015002024001                      NA             Metal.cans
003015002024001                      NA             Plastic.bottles
003015002024001                      NA             Plastic
003017003009004                      GLass          NA
003017003009004                      NA             Glass.bottles                
003017003009004                      Paper.cardboardNA

thanks in advance,

Marty

Was it helpful?

Solution

You can use melt and dcast from "reshape2" to get (what I think) you want:

library(reshape2)
mydf <- melt(ar_waste_sep, id.vars="Householdref")
head(mydf)
#      Householdref variable value
# 1 003015002024001 B5_Glass FALSE
# 2 003016003006001 B5_Glass FALSE
# 3 003016004013001 B5_Glass FALSE
# 4 003016006002001 B5_Glass FALSE
# 5 003017003009002 B5_Glass FALSE
# 6 003017003009003 B5_Glass FALSE
out <- dcast(mydf, Householdref + variable ~ value, value.var="variable")
head(out)
#      Householdref               variable                  FALSE                TRUE
# 1 003015002024001               B5_Glass               B5_Glass                <NA>
# 2 003015002024001       B5_Glass.bottles                   <NA>    B5_Glass.bottles
# 3 003015002024001     B5_Paper.cardboard     B5_Paper.cardboard                <NA>
# 4 003015002024001          B5_Metal.cans                   <NA>       B5_Metal.cans
# 5 003015002024001 B5_Food.biodegradables B5_Food.biodegradables                <NA>
# 6 003015002024001    B5_Plastic.polybags                   <NA> B5_Plastic.polybags
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top