Question

I am trying to create a figure in R. It consists of the contour plot of a bivariate normal distribution for the vector variable (x,y) along with the marginals f(x), f(y); the conditional distribution f(y|x) and the line through the conditioning value X=x (it will be a simple abline(v=x)). I already got the contour and the abline:

http://i62.tinypic.com/noyfls.png

but I don't know how to continue.

Here is the code I used so far:

bivariate.normal <- function(x, mu, Sigma) {
  exp(-.5 * t(x-mu) %*% solve(Sigma) %*% (x-mu)) / sqrt(2 * pi * det(Sigma))
}

mu <- c(0,0)
Sigma <- matrix(c(1,.8,.8,1), nrow=2)
x1 <- seq(-3, 3, length.out=50)
x2 <- seq(-3, 3, length.out=50)

z <- outer(x1, x2, FUN=function(x1, x2, ...){
             apply(cbind(x1,x2), 1, bivariate.normal, ...)
           }, mu=mu, Sigma=Sigma)

contour(x1, x2, z, col="blue", drawlabels=FALSE, nlevels=4,
        xlab=expression(x[1]), ylab=expression(x[2]), lwd=1)
abline(v=.7, col=1, lwd=2, lty=2)
text(2, -2, labels=expression(x[1]==0.7))
Was it helpful?

Solution

It would have been helpful if you had provided the function to calculate the marginal distribution. I may have got the marginal distribution function wrong, but I think this gets you what you want:

par(lwd=2,mgp=c(1,1,0))
# Modified to extract diagonal.
bivariate.normal <- function(x, mu, Sigma) 
  exp(-.5 * diag(t(x-mu) %*% solve(Sigma) %*% (x-mu))) / sqrt(2 * pi * det(Sigma))

mu <- c(0,0)
Sigma <- matrix(c(1,.8,.8,1), nrow=2)
x1 <- seq(-3, 3, length.out=50)
x2 <- seq(-3, 3, length.out=50)

plot(1:10,axes=FALSE,frame.plot=TRUE,lwd=1)

# z can now be calculated much easier.
z<-bivariate.normal(t(expand.grid(x1,x2)),mu,Sigma)
dim(z)<-c(length(x1),length(x2))
contour(x1, x2, z, col="#4545FF", drawlabels=FALSE, nlevels=4,
        xlab=expression(x[1]), ylab=expression(x[2]), lwd=2,xlim=range(x1),ylim=range(x2),frame.plot=TRUE,axes=FALSE,xaxs = "i", yaxs = "i")
axis(1,labels=FALSE,lwd.ticks=2)
axis(2,labels=FALSE,lwd.ticks=2)
abline(v=.7, col=1, lwd=2, lty=2)
text(2, -2, labels=expression(x[1]==0.7))

# Dotted
f<-function(x1,x2) bivariate.normal(t(cbind(x1,x2)),mu,Sigma)
x.s<-seq(from=min(x1),to=max(x1),by=0.1)
vals<-f(x1=0.7,x2=x.s)
lines(vals-abs(min(x1)),x.s,lty=2,lwd=2)

# Marginal probability distribution: http://mpdc.mae.cornell.edu/Courses/MAE714/biv-normal.pdf
# Please check this, I'm not sure it is correct.
marginal.x1<-function(x)  exp((-(x-mu[1])^2)/2*(Sigma[1,2]^2)) / (Sigma[1,2]*sqrt(2*pi))
marginal.x2<-function(x)  exp((-(x-mu[1])^2)/2*(Sigma[2,1]^2)) / (Sigma[2,1]*sqrt(2*pi))

# Left side solid
vals<-marginal.x2(x.s)
lines(vals-abs(min(x1)),x.s,lty=1,lwd=2)

# Bottom side solid
vals<-marginal.x1(x.s)
lines(x.s,vals-abs(min(x2)),lty=1,lwd=2)

enter image description here

OTHER TIPS

My solution in ggplot2, inspired in this post

rm(list=ls())
options(max.print=999999)
library(pacman)
p_load(tidyverse)
p_load(mvtnorm)

my_mean<-c(25,65)
mycors<-seq(-1,1,by=.25)
sd_vec<-c(5,7)

i<-3
temp_cor<-matrix(c(1,mycors[i],
                   mycors[i],1),
                 byrow = T,ncol=2)
V<-sd_vec %*% t(sd_vec) *temp_cor

###data for vertical curve
my_dnorm<- function(x, mean = 0, sd = 1, log = FALSE, new_loc, multplr){
  new_loc+dnorm(x, mean , sd, log)*multplr
}

##margina Y distribution
yden<-data.frame(y=seq(48,82,length.out = 100),x=my_dnorm(seq(48,82,length.out = 100),my_mean[2],sd_vec[2],new_loc=8,multplr=100))

##conditional distribution
my_int<-(my_mean[2]-(V[1,2]*my_mean[1]/V[1,1]))
my_slp<-V[1,2]/V[1,1]

givenX<-34
mu_givenX<-my_int+givenX*my_slp
sigma2_givenX<-(1-mycors[i]^2)*V[2,2]

y_givenX_range<-seq(mu_givenX-3*sqrt(sigma2_givenX),mu_givenX+3*sqrt(sigma2_givenX),length.out = 100)

yden_x<-data.frame(y=y_givenX_range,                   x=my_dnorm(y_givenX_range,mu_givenX,sqrt(sigma2_givenX),new_loc=givenX,multplr=80))

yden_x<-data.frame(y=y_givenX_range,                   x=my_dnorm(y_givenX_range,mu_givenX,sqrt(sigma2_givenX),new_loc=8,multplr=80))

###data for drawing ellipse
data.grid <- expand.grid(x = seq(my_mean[1]-3*sd_vec[1], my_mean[1]+3*sd_vec[1], length.out=200),
                         y = seq(my_mean[2]-3*sd_vec[2], my_mean[2]+3*sd_vec[2], length.out=200))
q.samp <- cbind(data.grid, prob = dmvnorm(data.grid, mean = my_mean, sigma = V))

###plot
ggplot(q.samp, aes(x=x, y=y, z=prob)) + 
  geom_contour() + theme_bw()+ 
  geom_abline(intercept = my_int, slope = my_slp, color="red", 
                                           linetype="dashed")+
  stat_function(fun = my_dnorm, n = 101, args = list(mean = my_mean[1], sd = sd_vec[1], new_loc=35,multplr=100),color=1) +
  geom_path(aes(x=x,y=y), data = yden,inherit.aes = FALSE) +
  geom_path(aes(x=x,y=y), data = yden_x,inherit.aes = FALSE,color=1,linetype="dashed") +
  
  geom_vline(xintercept = givenX,linetype="dashed")

Created on 2020-10-31 by the reprex package (v0.3.0)

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top