Pergunta

I have the following dataframe. I want to recast the dataframe for one variable and keep one value per cell. The value is not simply the first but I want to use set the drop order manually.

as an example for: Variant_Classification.recode I want to keep in descending order SNVs Indel Splice_Site Translation_Start_Site and last Noncoding.

I am struggling for a while but I have not find any solution!

Thanks!

mydata.df.test_3_mutations_col  <- structure(list(Individual = structure(c(1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 6L, 6L,6L, 6L, 8L, 8L, 8L, 8L), .Label = c("p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9"), class = "factor"), Variant_Classification.recode = structure(c(3L, 1L, 2L, 4L, 5L, 3L, 4L, 2L, 1L, 3L, 2L, 2L, 3L, 3L, 5L, 1L, 3L,5L, 1L, 3L), .Label = c("Noncoding", "Indel", "SNVs", "Splice_Site", "Translation_Start_Site"), class = "factor"), Canonical_Hugo_Symbol = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,1L, 1L, 1L), .Label = c("Gene1", "Gene2"), class = "factor")), .Names = c("Individual", "Variant_Classification.recode","Canonical_Hugo_Symbol"), row.names = c(50L, 51L, 116L, 166L, 167L, 168L, 169L, 292L, 293L, 342L, 343L, 344L, 357L, 358L, 359L, 462L, 522L, 523L, 524L, 631L), class = "data.frame")

mydata.reshape <- reshape(mydata.df.test_3_mutations_col, direction = 'wide', idvar = 'Individual', timevar = 'Canonical_Hugo_Symbol')
 colnames(mydata.reshape) <- sub("Variant_Classification.recode.(.*?)","\\1",    colnames(mydata.reshape))
 df <- melt(mydata.reshape,  id = 'Individual', variable_name = 'Canonical_Hugo_Symbol')
 df <- na.omit(df)

q <- ggplot(df,aes(Individual,variable,fill=value))+geom_raster() # Plot mutation group and fill with gene symbol
q + theme(axis.text.x=element_text(angle=-90, hjust = 1, size = 4), text = element_text(size=8), panel.background =  element_blank() , panel.grid.major =  element_line(colour = "black", size = 0.03)) + labs(x = "Patients ID", y = "Gene Symbol") + scale_fill_brewer(palette="Set1", name = "Variant Classification")

Now I have found the solution!

first I have to reorder the factor level and the data frame

mydata.df.test_3_mutations_col$Variant_Classification.recode  <- factor(mydata.df.test_3_mutations_col$Variant_Classification.recode, levels = c("SNVs", "Indel",  "Splice_Site", "Translation_Start_Site","Noncoding"),ordered = TRUE)
mydata.df.test_3_mutations_col  <- mydata.df.test_3_mutations_col[order(mydata.df.test_3_mutations_col$Variant_Classification.recode),]

and then I can make the reshape for ggplot!

mydata.reshape <- reshape(mydata.df.test_3_mutations_col, direction = 'wide', idvar = 'Individual', timevar = 'Canonical_Hugo_Symbol')
   colnames(mydata.reshape) <- sub("Variant_Classification.recode.(.*?)","\\1",       colnames(mydata.reshape))
    df <- melt(mydata.reshape,  id = 'Individual', variable_name = 'Canonical_Hugo_Symbol')
    df <- na.omit(df)
Foi útil?

Solução

You can try dcast from reshape2 package where you can specify a custom aggregation function to pick the value according to your preference, as we have done here:

pref.order <- c("SNVs", "Indel", "Splice_Site", "Translation_Start_Site", "Noncoding")
my.fun <- function(x) 
  if(length(x)) 
    head(as.character(x[order(match(x, pref.order))]), 1L) else NA_character_ 

library(reshape2)
dcast(
  mydata.df.test_3_mutations_col, 
  Individual ~ Canonical_Hugo_Symbol, 
  value.var="Variant_Classification.recode",
  fun.aggregate=my.fun
)

Produces:

  Individual Gene1 Gene2
1         p1  SNVs  <NA>
2         p3  SNVs Indel
3         p5  <NA>  SNVs
4         p6  SNVs  SNVs
5         p8  SNVs  <NA>
Licenciado em: CC-BY-SA com atribuição
Não afiliado a StackOverflow
scroll top