Expected covariances in a causal network
This is a skeleton post
Standardized variables (Wright’s rules)
n <- 10^5
# z is the common cause of g1 and g2
z <- rnorm(n)
# effects of z on g1 and g2
b1 <- 0.7
b2 <- 0.7
r12 <- b1*b2
g1 <- b1*z + sqrt(1-b1^2)*rnorm(n)
g2 <- b2*z + sqrt(1-b2^2)*rnorm(n)
var(g1) # E(VAR(g1)) = 1
## [1] 1.001849
var(g2) # E(VAR(g2)) = 1
## [1] 1.006102
cor(g1, g2) # E(COR(g1,g2)) = b1*b2
## [1] 0.4892741
b1*b2
## [1] 0.49
# effects of g1 and g2 on y1 and y2
a11 <- 0.5 # effect of g1 on y1
a12 <- 0.5 # effect of g1 on y2
a21 <- 0.5 # effect of g2 on y1
a22 <- 0.5 # effect of g2 on y2
r2.1 <- a11^2 + a21^2 + 2*a11*a21*b1*b2 # systematic variance of y1
r2.2 <- a12^2 + a22^2 + 2*a12*a22*b1*b2 # systematic variance of y1
y1 <- a11*g1 + a21*g2 + sqrt(1-r2.1)*rnorm(n)
y2 <- a12*g1 + a22*g2 + sqrt(1-r2.2)*rnorm(n)
var(y1) # E() = 1
## [1] 0.9997248
var(y2) # E() = 1
## [1] 1.001052
cor(y1, y2) # E() = a11*a12 + a21*a22 + a11*r12*a22 + a21*r12*a12
## [1] 0.7468488
a11*a12 + a21*a22 + a11*r12*a22 + a21*r12*a12 # sum of all paths
## [1] 0.745
Non-standardized variables (Wright’s rules)
n <- 10^6
# z is the common cause of g1 and g2
sigma_z <- 2
z <- rnorm(n, sd=sigma_z)
# effects of z on g1 and g2
b1 <- 1.2
b2 <- 1.2
s12 <- b1*b2*sigma_z^2 # cov(g1, g2)
sigma.sq_g1.sys <- (b1*sigma_z)^2 # systematic variance of g1
sigma.sq_g2.sys <- (b2*sigma_z)^2 # systematic variance of g1
r2.g1 <- 0.9 # R^2 for g1
r2.g2 <- 0.9 # R^2 for g2
sigma.sq_g1 <- sigma.sq_g1.sys/r2.g1
sigma.sq_g2 <- sigma.sq_g2.sys/r2.g2
sigma_g1 <- sqrt(sigma.sq_g1)
sigma_g2 <- sqrt(sigma.sq_g2)
g1 <- b1*z + sqrt(sigma.sq_g1 - sigma.sq_g1.sys)*rnorm(n)
g2 <- b2*z + sqrt(sigma.sq_g2 - sigma.sq_g2.sys)*rnorm(n)
var(g1) # E(VAR(g1)) = sigma.sq_g1
## [1] 6.397673
var(g2) # E(VAR(g2)) = sigma.sq_g2
## [1] 6.406897
sigma.sq_g1
## [1] 6.4
sigma.sq_g2
## [1] 6.4
cov(g1,g2) # E() = b1*b2*sigma_z^2
## [1] 5.760453
s12 # b1*b2*sigma_z^2
## [1] 5.76
# effects of g1 and g2 on y1 and y2
a11 <- 1.5 # effect of g1 on y1
a12 <- 1.5 # effect of g1 on y2
a21 <- 3 # effect of g2 on y1
a22 <- 1.5 # effect of g2 on y2
sigma.sq_y1.sys <- a11^2*sigma.sq_g1 + a21^2*sigma.sq_g2 + 2*a11*a21*s12 # systematic variance of y1
sigma.sq_y2.sys <- a12^2*sigma.sq_g1 + a22^2*sigma.sq_g2 + 2*a12*a22*s12 # systematic variance of y2
r2.1 <- 0.9 # R^2 for y1
r2.2 <- 0.9 # R^2 for y2
sigma.sq_y1 <- sigma.sq_y1.sys/r2.1
sigma.sq_y2 <- sigma.sq_y2.sys/r2.2
sigma_y1 <- sqrt(sigma.sq_y1)
sigma_y2 <- sqrt(sigma.sq_y2)
y1 <- a11*g1 + a21*g2 + sqrt(sigma.sq_y1 - sigma.sq_y1.sys)*rnorm(n)
y2 <- a12*g1 + a22*g2 + sqrt(sigma.sq_y2 - sigma.sq_y2.sys)*rnorm(n)
var(y1) # E() = sigma.sq_y1
## [1] 137.721
var(y2) # E() = sigma.sq_y1
## [1] 60.808
sigma.sq_y1
## [1] 137.6
sigma.sq_y2
## [1] 60.8
a11.s <- a11*sigma_g1/sigma_y1
a12.s <- a12*sigma_g1/sigma_y2
a21.s <- a21*sigma_g2/sigma_y1
a22.s <- a22*sigma_g2/sigma_y2
r12 <- s12/sigma_g1/sigma_g2
r12 # expected correlation between g1 and g2
## [1] 0.9
cor(g1, g2) # actual correlation between g1 and g2
## [1] 0.8997498
# covariance y1 and y2
cov(y1, y2) #
## [1] 82.12391
(a11.s*a12.s + a21.s*a22.s + a11.s*r12*a22.s + a21.s*r12*a12.s)*sigma_y1*sigma_y2 # expected cov(y1, y2)
## [1] 82.08
# more compact and not using the standardized parameters
# = sum of paths where path coef = stand coef * parent sd
a11*a12*sigma.sq_g1 + a21*a22*sigma.sq_g2 + a11*s12*a22 + a21*s12*a12
## [1] 82.08
# correlation y1 and y2
cor(y1, y2)
## [1] 0.8974064
a11.s*a12.s + a21.s*a22.s + a11.s*r12*a22.s + a21.s*r12*a12.s
## [1] 0.8973799
Expected value of b with missing covariate
coef(summary(lm(y1 ~ g1 + g2)))
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0001228728 0.003701744 -0.03319322 0.9735205
## g1 1.5023148063 0.003353549 447.97754641 0.0000000
## g2 2.9998699773 0.003351134 895.18045136 0.0000000
coef(summary(lm(y1 ~ g1)))
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.000198208 0.004968271 3.989476e-02 0.968177
## g1 4.203392309 0.001964239 2.139959e+03 0.000000
# expected = direct + cor*indirect using cor
a11 + cor(g1,g2)*a21
## [1] 4.199249
# expected = direct + cor*indirect, using cov as model param
a11 + (s12/sigma_g1/sigma_g2)*a21
## [1] 4.2
# need to correct if different variances in g1 and g2
# expected is a11 + rho*sigma_g2/sigma_g1*a21
rho <- s12/sigma_g1/sigma_g2
a11 + rho*sigma_g2/sigma_g1*a21
## [1] 4.2
# this is equal to
a11 + s12/sigma_g1^2*a21
## [1] 4.2
# which is equal to a11 + b21*a21