# Today we demonstrated the calculation of confidence intervals for
# a regression slope.  We pasted one of the slopes and its standard
# error from the grades regression output in SAS:

>  0.98784561      0.36088447
Error: unexpected numeric constant in " 0.98784561      0.36088447"

# The regression had 97 degrees of freedom; we found the correct
# critical value of t:

> qt(.975, 97)
[1] 1.984723

# Then we calculated the slope plus or minus its standard error
# times the critical t value:

> 0.98784561 + 0.36088447*1.984723
[1] 1.704101
> 0.98784561 - 0.36088447*1.984723
[1] 0.2715899
 
# Next, we demonstrated the performance of such confidence intervals
# using a simulation. We generate data to fit a simple regression with
# a slope of 20:

> x <- rnorm(100)
> y <- 10 + 20*x
> y <- y + rnorm(100, 0, 40)

# We calculate a confidence interval.  This time, df=98 because although
# we still have 100 cases, there is only one predictor.  
> tcrit <- qt(.975, 98)
> regout <- lm(y~x)
> se <- sqrt(var(regout$res)/98/var(x))
> low <- regout$coef[2] - se*tcrit
> high <- regout$coef[2] + se*tcrit
> low
       x 
19.72490 
> high
       x 
38.03909 

# Note that this particular confidence interval was successful, inasmuch as
# it contained the true value of the slope.

# Next, we simulated a set of 100 such intervals, producing graphics so that
# the intervals that missed are color coded red.  We repeated the exercise
# several times, and noted that we tended to get about 5 misses in each
# set of 100 intervals:

 
> plot( c(0,40), c(0,101), type='n', axes=FALSE, xlab="", ylab="")
> axis(side=1, at=seq(0,40,5))
> lines(c(20,20),c(0,101))
> 
> for( i in 1:100) {
+ par(col=1)
+ x <- rnorm(100)
+ y <- 10 + 20*x
+ y <- y + rnorm(100,0,40)
+ regout <- lm(y~x)
+ se <- sqrt(var(regout$res)/98/var(x))
+ low <- regout$coef[2] - tcrit*se
+ high <- regout$coef[2] + tcrit*se
+ if(low > 20) par(col=2)
+ if(high < 20) par(col=2)
+ lines(c(low,high),c(i,i))
+ }

> plot( c(0,40), c(0,101), type='n', axes=FALSE, xlab="", ylab="")
> axis(side=1, at=seq(0,40,5))
> lines(c(20,20),c(0,101))
> 
> for( i in 1:100) {
+ par(col=1)
+ x <- rnorm(100)
+ y <- 10 + 20*x
+ y <- y + rnorm(100,0,40)
+ regout <- lm(y~x)
+ se <- sqrt(var(regout$res)/98/var(x))
+ low <- regout$coef[2] - tcrit*se
+ high <- regout$coef[2] + tcrit*se
+ if(low > 20) par(col=2)
+ if(high < 20) par(col=2)
+ lines(c(low,high),c(i,i))
+ }
> plot( c(0,40), c(0,101), type='n', axes=FALSE, xlab="", ylab="")
> axis(side=1, at=seq(0,40,5))
> lines(c(20,20),c(0,101))
> 
> for( i in 1:100) {
+ par(col=1)
+ x <- rnorm(100)
+ y <- 10 + 20*x
+ y <- y + rnorm(100,0,40)
+ regout <- lm(y~x)
+ se <- sqrt(var(regout$res)/98/var(x))
+ low <- regout$coef[2] - tcrit*se
+ high <- regout$coef[2] + tcrit*se
+ if(low > 20) par(col=2)
+ if(high < 20) par(col=2)
+ lines(c(low,high),c(i,i))
+ }
> plot( c(0,40), c(0,101), type='n', axes=FALSE, xlab="", ylab="")
> axis(side=1, at=seq(0,40,5))
> lines(c(20,20),c(0,101))
> 
> for( i in 1:100) {
+ par(col=1)
+ x <- rnorm(100)
+ y <- 10 + 20*x
+ y <- y + rnorm(100,0,40)
+ regout <- lm(y~x)
+ se <- sqrt(var(regout$res)/98/var(x))
+ low <- regout$coef[2] - tcrit*se
+ high <- regout$coef[2] + tcrit*se
+ if(low > 20) par(col=2)
+ if(high < 20) par(col=2)
+ lines(c(low,high),c(i,i))
+ }
> plot( c(0,40), c(0,101), type='n', axes=FALSE, xlab="", ylab="")
> axis(side=1, at=seq(0,40,5))
> lines(c(20,20),c(0,101))
> 
> for( i in 1:100) {
+ par(col=1)
+ x <- rnorm(100)
+ y <- 10 + 20*x
+ y <- y + rnorm(100,0,40)
+ regout <- lm(y~x)
+ se <- sqrt(var(regout$res)/98/var(x))
+ low <- regout$coef[2] - tcrit*se
+ high <- regout$coef[2] + tcrit*se
+ if(low > 20) par(col=2)
+ if(high < 20) par(col=2)
+ lines(c(low,high),c(i,i))
+ }
 
 
# Next, we demonstrated how the standard error of a conditional mean
# is assembled from the variances and covariances of the X'X^-1 matrix,
# using the formula for the variance of a linear combination of
# random variables:

> se <- sqrt(50.290426 * (
+    1^2 * .54593567 + 13^2 * 0.0029356502
+    + 2^2 * 0.0025897097
+    + 2 * (1 * 13 *  -0.037302111
+           + 1*2 * -0.002472899
+           + 13 * 2 * -0.000763273
+ )))
> se
[1] 1.287911

# We calculated the predicted value by pasting in the intercept
# and slopes from SAS: 
> pred <-  63.22702455
> pred <- pred + 13 *  0.87062304
> pred <- pred + 2* 0.98784561
> pred
[1] 76.52082


> tcrit <- qt(.975,97)

> pred - tcrit*se
[1] 73.96467
> pred + tcrit*se
[1] 79.07696
 
# Note that those values match the numbers produced by SAS.  (This transcript corrects a false
# start caused by a misplaced parenthesis in the standard error calculation.)