Question

I have two datasets that I'm trying to merge together. The first one contains information for every test subject with a unique ID (in rows). The second set contains measurements for every test subject (in columns), however each subject was measured twice so the unique ID reads "IDa and IDb." I'd like to find a way to merge these two tables based on the unique ID, regardless of whether it is measurement A or B.

Here's a small sample of the 2 datasets, and a table of the intended output. Any help would be appreciated!

UniqueID        Site        State       Age     Height  
Tree001           FK           OR         23    70  
Tree002           FK           OR         45    53  
Tree003           NM           OR         35    84  


UniqueID    Tree001A    Tree001B    Tree002A    Tree002B    Tree003A    Tree003B  
1996    4       2           
1997    7   8   7       3   
1998    3   2   9   4   7   
1999    11  9   2   12  3   13  
2010    8   8   4   6   11  4  
2011    10  5   6   3   8   9


UniqueID    Tree001A    Tree001B    Tree002A    Tree002B    Tree003A    Tree003B  
Site    FK  FK  FK  FK  NM  NM  
State   OR  OR  OR  OR  OR  OR  
Age     23  23  45  45  35  35  
Height  70  70  53  53  84  84  
1996    4       2             
1997    7   8   7       3     
1998    3   2   9   4   7     
1999    11  9   2   12  3   13  
2010    8   8   4   6   11  4  
2011    10  5   6   3   8   9 
Was it helpful?

Solution

This can be one approach.

df1 <- structure(list(UniqueID = structure(1:3, .Label = c("Tree001", 
"Tree002", "Tree003"), class = "factor"), Site = structure(c(1L, 
1L, 2L), .Label = c("FK", "NM"), class = "factor"), State = structure(c(1L, 
1L, 1L), .Label = "OR", class = "factor"), Age = c(23L, 45L, 
35L), Height = c(70L, 53L, 84L)), .Names = c("UniqueID", "Site", 
"State", "Age", "Height"), class = "data.frame", row.names = c(NA, 
-3L))


df2 <- structure(list(UniqueID = c(1996L, 1997L, 1998L, 1999L, 2010L, 
2011L), Tree001A = c(4L, 7L, 3L, 11L, 8L, 10L), Tree001B = c(NA, 
8L, 2L, 9L, 8L, 5L), Tree002A = c(2L, 7L, 9L, 2L, 4L, 6L), Tree002B = c(NA, 
NA, 4L, 12L, 6L, 3L), Tree003A = c(NA, 3L, 7L, 3L, 11L, 8L), 
    Tree003B = c(NA, NA, NA, 13L, 4L, 9L)), .Names = c("UniqueID", 
"Tree001A", "Tree001B", "Tree002A", "Tree002B", "Tree003A", "Tree003B"
), class = "data.frame", row.names = c(NA, -6L))


    > df1
  UniqueID Site State Age Height
1  Tree001   FK    OR  23     70
2  Tree002   FK    OR  45     53
3  Tree003   NM    OR  35     84
> df2
  UniqueID Tree001A Tree001B Tree002A Tree002B Tree003A Tree003B
1     1996        4     <NA>        2     <NA>     <NA>     <NA>
2     1997        7        8        7     <NA>        3     <NA>
3     1998        3        2        9        4        7     <NA>
4     1999       11        9        2       12        3       13
5     2010        8        8        4        6       11        4
6     2011       10        5        6        3        8        9

# Use transpose function to change df1 
df3 <- as.data.frame(t(df1[,-1]))

colnames(df3) <- df1[,1]

# Change rownames to UniqueID
df3$UniqueID <- rownames(df3)

# ROwnames to numeric
rownames(df3) <- c(1:4)

# Modify dataframe so that you have two columns for each subject
df3 <- df3[,c(4,1,1,2,2,3,3)]
colnames(df3) <- c("UniqueID", "Tree001A", "Tree001B", "Tree002A",
                   "Tree002B", "Tree003A", "Tree003B")

# Change classes of columns of df2 to factor
df2 <- data.frame(sapply(df2,function(x) class(x)<- as.factor(x)))

# Now combine two data frames
new <- rbind(df3,df2)
> new
   UniqueID Tree001A Tree001B Tree002A Tree002B Tree003A Tree003B
1      Site       FK       FK       FK       FK       NM       NM
2     State       OR       OR       OR       OR       OR       OR
3       Age       23       23       45       45       35       35
4    Height       70       70       53       53       84       84
5      1996        4     <NA>        2     <NA>     <NA>     <NA>
6      1997        7        8        7     <NA>        3     <NA>
7      1998        3        2        9        4        7     <NA>
8      1999       11        9        2       12        3       13
9      2010        8        8        4        6       11        4
10     2011       10        5        6        3        8        9
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top