You can do as follows :
library(igraph)
DF <- read.csv(text=
"type1,chrx,startx,endx,chry,starty,endy,type2
gain_765,chr15,9681969,9685418,chr15,9660912,9712719,loss_1136
gain_766,chr15,9706682,9852347,chr15,9660912,9712719,loss_1136
gain_766,chr15,9706682,9852347,chr15,9765125,9863990,loss_765
gain_780,chr20,9706682,9852347,ch20,9765125,9863990,loss_769
gain_760,chr15,9706682,9852347,chr15,9660912,9712719,loss_1137
gain_760,chr15,9706682,9852347,chr15,9765125,9863990,loss_763",
stringsAsFactors=F)
# create a graph with the relations type1 --> type2
# you can visualize it using: plot(g)
g <- graph.data.frame(DF[,c('type1','type2')])
# decompose in the connected components
subgraphs <- decompose.graph(g,mode="weak")
# create the sub data.frames using the subgraphs vertices
subDFs <- lapply(subgraphs,
FUN=function(sg){
v <- V(sg)$name;
DF[DF$type1 %in% v | DF$type2 %in% v,];
}
)
# create the single-line data.frames for each group
subRes <- lapply(subDFs,
FUN=function(sd){
data.frame(chrx=sd$chrx[1],
start=min(c(sd$startx,sd$starty)),
end=max(c(sd$endx,sd$endy)))
}
)
# merge the result in one single data.frame
res <- do.call(rbind.data.frame,subRes)
res
>
chrx start end
1 chr15 9660912 9863990
2 chr20 9706682 9863990
3 chr15 9660912 9863990
The step 2 and 3 (creation of subgraphs
and subDFs
) can be done in one step by putting the code of the function in the 3rd step in the function in the 2nd step.
I left them separated to be clearer.