Question

I am trying to have 2 "shadows" on the background of the below plot. These shadows should represent the density of the orange and blue points separately. Does it make sense?

Here is the ggplot to improve: enter image description here

Here is the code and the data (matrix df) I used to create this plot:

                         PC1           PC2 aa
A_akallopisos    0.043272525  0.0151023307  2
A_akindynos     -0.020707141 -0.0158198405  1
A_allardi       -0.020277664 -0.0221016281  2
A_barberi       -0.023165596  0.0389906701  2
A_bicinctus     -0.025354572 -0.0059122384  2
A_chrysogaster   0.012608835 -0.0339330213  2
A_chrysopterus  -0.022402365 -0.0092476009  1
A_clarkii       -0.014474658 -0.0127024469  1
A_ephippium     -0.016859412  0.0320034231  2
A_frenatus      -0.024190876  0.0238499714  2
A_latezonatus   -0.010718845 -0.0289904165  1
A_latifasciatus -0.005645811 -0.0183202248  2
A_mccullochi    -0.031664307 -0.0096059126  2
A_melanopus     -0.026915545  0.0308399009  2
A_nigripes       0.023420045  0.0293801537  2
A_ocellaris      0.052042539  0.0126144250  2
A_omanensis     -0.020387101  0.0010944998  2
A_pacificus      0.042406273 -0.0260308092  2
A_percula        0.034591721  0.0071153133  2
A_perideraion    0.052830132  0.0064495142  2
A_polymnus       0.030902254 -0.0005091421  2
A_rubrocinctus  -0.033318659  0.0474995722  2
A_sandaracinos   0.055839755  0.0093724082  2
A_sebae          0.021767793 -0.0218640814  2
A_tricinctus    -0.016230301 -0.0018526482  1
P_biaculeatus   -0.014466403  0.0024864574  2



 ggplot(data=df,aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + ggtitle(paste('Site n° ',Sites_names[j],sep='')) +geom_smooth(se=F, method='lm')+ geom_point() + scale_color_manual(name='mutation', values = c("darkorange2","cornflowerblue"), labels = c("A","S")) + geom_text(hjust=0.5, vjust=-1 ,size=3) + xlim(-0.05,0.07)
Was it helpful?

Solution

Here are some possible approaches using stat_density2d() with geom="polygon" and mapping or setting alpha transparency for the density fill regions. If you are willing to experiment with some the parameters, I think you can get some very useful plots. Specifically, you may want to adjust the following:

  1. n controls the smoothness of the density polygon.
  2. h is the bandwidth of the density estimation.
  3. bins controls the number of density levels.

enter image description here

df = read.table(header=TRUE, text=
"                         PC1           PC2 aa
A_akallopisos    0.043272525  0.0151023307  2
A_akindynos     -0.020707141 -0.0158198405  1
A_allardi       -0.020277664 -0.0221016281  2
A_barberi       -0.023165596  0.0389906701  2
A_bicinctus     -0.025354572 -0.0059122384  2
A_chrysogaster   0.012608835 -0.0339330213  2
A_chrysopterus  -0.022402365 -0.0092476009  1
A_clarkii       -0.014474658 -0.0127024469  1
A_ephippium     -0.016859412  0.0320034231  2
A_frenatus      -0.024190876  0.0238499714  2
A_latezonatus   -0.010718845 -0.0289904165  1
A_latifasciatus -0.005645811 -0.0183202248  2
A_mccullochi    -0.031664307 -0.0096059126  2
A_melanopus     -0.026915545  0.0308399009  2
A_nigripes       0.023420045  0.0293801537  2
A_ocellaris      0.052042539  0.0126144250  2
A_omanensis     -0.020387101  0.0010944998  2
A_pacificus      0.042406273 -0.0260308092  2
A_percula        0.034591721  0.0071153133  2
A_perideraion    0.052830132  0.0064495142  2
A_polymnus       0.030902254 -0.0005091421  2
A_rubrocinctus  -0.033318659  0.0474995722  2
A_sandaracinos   0.055839755  0.0093724082  2
A_sebae          0.021767793 -0.0218640814  2
A_tricinctus    -0.016230301 -0.0018526482  1
P_biaculeatus   -0.014466403  0.0024864574  2")


library(ggplot2)

p1 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + 
     ggtitle(paste('Site n° ',sep='')) +
     stat_density2d(aes(fill=factor(aa), alpha = ..level..), 
                    geom="polygon", color=NA, n=200, h=0.03, bins=4) + 
     geom_smooth(se=F, method='lm') + 
     geom_point() + 
     scale_color_manual(name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     scale_fill_manual( name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     geom_text(hjust=0.5, vjust=-1 ,size=3, color="black") + 
     scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
     scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
     coord_cartesian(xlim=c(-0.05, 0.07),
                     ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.


p2 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + 
     ggtitle(paste('Site n° ',sep='')) +
     stat_density2d(aes(fill=factor(aa)), alpha=0.2,
                    geom="polygon", color=NA, n=200, h=0.045, bins=2) + 
     geom_smooth(se=F, method='lm', size=1) + 
     geom_point(size=2) + 
     scale_color_manual(name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     scale_fill_manual( name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     geom_text(hjust=0.5, vjust=-1 ,size=3) + 
     scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
     scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
     coord_cartesian(xlim=c(-0.05, 0.07),
                     ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.

library(gridExtra)
ggsave("plots.png", plot=arrangeGrob(p1, p2, ncol=1), width=8, height=11, dpi=120)

OTHER TIPS

Here's my suggestion. Using shadows or polygons is going to get pretty ugly when you overlay two colors and densities. Contour plot could be nicer to look at and is certainly easier to work with.

I've created some random data as a reproducible example and used a simple density function that uses the average distance of the nearest 5 points.

df <- data.frame(PC1 = runif(20),
            PC2 = runif(20),
            aa = rbinom(20,1,0.5))


point.density <- function(row){
  points <- df[df$aa == row[[3]],]
  x.dist <- (points$PC1 - row[[1]])^2
  y.dist <- (points$PC2 - row[[2]])^2
  x <- x.dist[order(x.dist)[1:5]]
  y <- y.dist[order(y.dist)[1:5]]
  1/mean(sqrt(x + y))
}

# you need to calculate the density for the whole grid.
res <- c(1:100)/100 # this is the resolution, so gives a 100x100 grid

plot.data0 <- data.frame(x.val = rep(res,each = length(res)),
                        y.val = rep(res, length(res)),
                        type = rep(0,length(res)^2))

plot.data1 <- data.frame(x.val = rep(res,each = length(res)),
                         y.val = rep(res, length(res)),
                         type = rep(1,length(res)^2))

plot.data <- rbind(plot.data0,plot.data1)

# we need a density value for each point type, so 2 grids
densities <- apply(plot.data,1,point.density)
plot.data <- cbind(plot.data, z.val = densities)

library(ggplot2)

# use stat_contour to draw the densities. Be careful to specify which dataset you're using
ggplot() +  stat_contour(data = plot.data, aes(x=x.val, y=y.val, z=z.val, colour =    factor(type)), bins = 20, alpha = 0.4) + geom_point(data = df, aes(x=PC1,y=PC2,colour = factor(aa)))

contour plot http://img34.imageshack.us/img34/6215/1yvb.png

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top