# We continued the example of collinearity that we had begun last time.
# Recall that we had already created two variables, z1 and z2, that
# were correlated .8:

> cor(z1,z2)
[1] 0.8008038

# We generate y to fit a regression model:
> y <- 10 + 10*z1 - 10*z2

# We created some noise:
> noise <- rnorm(100, 0, 100)

# And we fit the regression of y (with the noise) on the two predictors:

> summary(lm(y+noise~z1+z2))

Call:
lm(formula = y + noise ~ z1 + z2)

Residuals:
      Min        1Q    Median        3Q       Max 
-254.7833  -46.5296   -0.3359   71.3679  205.1611 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   23.966     51.738   0.463    0.644    
z1             9.068      1.710   5.304 7.15e-07 ***
z2            -9.138      1.671  -5.467 3.55e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 97.8 on 97 degrees of freedom
Multiple R-squared: 0.2497,     Adjusted R-squared: 0.2342 
F-statistic: 16.14 on 2 and 97 DF,  p-value: 8.903e-07 

# Note that the slopes were pretty close to their true values.  Moreover,
# if we repeat the process with a new noise sample, the estimates are
# pretty stable.  Here are four repetitions of the process, to make 
# that point.  Note that the slope estimates stay close to 10 and -10:

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2))

Call:
lm(formula = y + noise ~ z1 + z2)

Residuals:
     Min       1Q   Median       3Q      Max 
-231.762  -52.723   -8.535   55.583  222.862 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -16.800     49.715  -0.338    0.736    
z1             9.691      1.643   5.899 5.35e-08 ***
z2            -9.366      1.606  -5.832 7.21e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 93.97 on 97 degrees of freedom
Multiple R-squared: 0.2826,     Adjusted R-squared: 0.2679 
F-statistic: 19.11 on 2 and 97 DF,  p-value: 1.007e-07 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2))

Call:
lm(formula = y + noise ~ z1 + z2)

Residuals:
     Min       1Q   Median       3Q      Max 
-214.608  -61.809    1.382   67.693  218.164 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -28.319     49.961  -0.567    0.572    
z1            11.181      1.651   6.772 9.77e-10 ***
z2           -10.401      1.614  -6.444 4.49e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 94.44 on 97 degrees of freedom
Multiple R-squared: 0.3346,     Adjusted R-squared: 0.3209 
F-statistic: 24.39 on 2 and 97 DF,  p-value: 2.631e-09 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2))

Call:
lm(formula = y + noise ~ z1 + z2)

Residuals:
      Min        1Q    Median        3Q       Max 
-273.5096  -62.0025   -0.6437   61.2825  271.0550 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   64.920     51.753   1.254    0.213    
z1             8.212      1.710   4.802 5.71e-06 ***
z2            -9.407      1.672  -5.627 1.78e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 97.82 on 97 degrees of freedom
Multiple R-squared: 0.2475,     Adjusted R-squared: 0.232 
F-statistic: 15.95 on 2 and 97 DF,  p-value: 1.024e-06 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2))

Call:
lm(formula = y + noise ~ z1 + z2)

Residuals:
     Min       1Q   Median       3Q      Max 
-218.386  -70.161   -4.683   55.634  238.015 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -21.823     52.077  -0.419    0.676    
z1            10.328      1.721   6.002 3.37e-08 ***
z2            -9.655      1.682  -5.739 1.09e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 98.44 on 97 degrees of freedom
Multiple R-squared: 0.2838,     Adjusted R-squared: 0.2691 
F-statistic: 19.22 on 2 and 97 DF,  p-value: 9.283e-08 


# Now we create a third predictor that is actually the ratio
# of the other two:

> z3 <- z1/z2

# If we add this third variable to the regression model, the
# estimates becomre quite unstable.  Notice how, in the following
# replications of the regression, the parameter estimates are
# quite unstable, and the significance levels vary wildly:

> summary(lm(y+noise~z1+z2+z3))

Call:
lm(formula = y + noise ~ z1 + z2 + z3)

Residuals:
     Min       1Q   Median       3Q      Max 
-222.813  -62.897    1.509   55.799  241.542 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  331.233    251.904   1.315  0.19167   
z1            17.785      5.481   3.245  0.00162 **
z2           -17.508      5.733  -3.054  0.00292 **
z3          -330.953    231.088  -1.432  0.15535   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 97.91 on 96 degrees of freedom
Multiple R-squared: 0.2988,     Adjusted R-squared: 0.2769 
F-statistic: 13.64 on 3 and 96 DF,  p-value: 1.753e-07 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2+z3))

Call:
lm(formula = y + noise ~ z1 + z2 + z3)

Residuals:
    Min      1Q  Median      3Q     Max 
-216.37  -64.70  -13.14   65.94  279.02 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) -274.954    252.711  -1.088    0.279
z1             4.312      5.498   0.784    0.435
z2            -3.782      5.751  -0.658    0.512
z3           263.147    231.828   1.135    0.259

Residual standard error: 98.22 on 96 degrees of freedom
Multiple R-squared: 0.2985,     Adjusted R-squared: 0.2766 
F-statistic: 13.62 on 3 and 96 DF,  p-value: 1.792e-07 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2+z3))

Call:
lm(formula = y + noise ~ z1 + z2 + z3)

Residuals:
    Min      1Q  Median      3Q     Max 
-318.01  -60.49    5.38   74.58  236.55 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  -80.875    266.062  -0.304    0.762
z1             9.463      5.789   1.635    0.105
z2            -8.531      6.055  -1.409    0.162
z3            31.739    244.076   0.130    0.897

Residual standard error: 103.4 on 96 degrees of freedom
Multiple R-squared: 0.2573,     Adjusted R-squared: 0.2341 
F-statistic: 11.09 on 3 and 96 DF,  p-value: 2.587e-06 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2+z3))

Call:
lm(formula = y + noise ~ z1 + z2 + z3)

Residuals:
     Min       1Q   Median       3Q      Max 
-228.817  -53.238   -4.891   60.117  271.161 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) -496.892    251.015  -1.980   0.0506 .
z1             3.730      5.461   0.683   0.4962  
z2            -1.130      5.713  -0.198   0.8436  
z3           376.250    230.272   1.634   0.1055  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 97.56 on 96 degrees of freedom
Multiple R-squared: 0.3607,     Adjusted R-squared: 0.3407 
F-statistic: 18.05 on 3 and 96 DF,  p-value: 2.281e-09 

> noise <- rnorm(100, 0, 100)
> summary(lm(y+noise~z1+z2+z3))

Call:
lm(formula = y + noise ~ z1 + z2 + z3)

Residuals:
     Min       1Q   Median       3Q      Max 
-264.794  -59.015   -1.465   79.317  157.664 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept) -569.6448   246.7130  -2.309   0.0231 *
z1             2.9018     5.3679   0.541   0.5900  
z2            -0.4568     5.6147  -0.081   0.9353  
z3           444.5589   226.3257   1.964   0.0524 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 95.89 on 96 degrees of freedom
Multiple R-squared: 0.4003,     Adjusted R-squared: 0.3815 
F-statistic: 21.36 on 3 and 96 DF,  p-value: 1.112e-10 

# That is fairly typical behavior for a regression with collinearity.

# We calculated the simplest form of the condition number. Note that
# the magnitude of the index is very large:

> X <- cbind(rep(1,100),z1,z2,z3)
> sqrt( eigen(t(X)%*%X)$values[1] / eigen(t(X)%*%X)$values[4])
[1] 2375.445


# Next, we illustrated the problem of model misspecification.  First,
# we created two variables that were correlated .6:
 
> xx1 <- rnorm(50,0,1)
> xx2 <- rnorm(50,0,1)
> x1 <- xx1*10+50
> x2 <- 100 + 20*(.6*xx1 + sqrt(1-.6^2)*xx2)
> cor(x1,x2)
[1] 0.569278

# We create a variable that perfectly fits a regression model,
# add some noise, and estimate the regression:

> y <- 10 + 5*x1 + 10*x2
> noise <- rnorm(50, 0, 100)
> lm(y+noise~x1+x2)

Call:
lm(formula = y + noise ~ x1 + x2)

Coefficients:
(Intercept)           x1           x2  
    146.827        3.858        9.185  

# Note that across several replications, the slope estimates
# stay close to the true values:

> noise <- rnorm(50, 0, 100)
> lm(y+noise~x1+x2)

Call:
lm(formula = y + noise ~ x1 + x2)

Coefficients:
(Intercept)           x1           x2  
     25.317        5.715        9.475  

> noise <- rnorm(50, 0, 100)
> lm(y+noise~x1+x2)

Call:
lm(formula = y + noise ~ x1 + x2)

Coefficients:
(Intercept)           x1           x2  
    -201.95         4.13        12.52  

> noise <- rnorm(50, 0, 100)
> lm(y+noise~x1+x2)

Call:
lm(formula = y + noise ~ x1 + x2)

Coefficients:
(Intercept)           x1           x2  
    -85.948        4.833       11.174  

# (We can also do print out the regression with testing information:)

> summary(lm(y+noise~x1+x2))

Call:
lm(formula = y + noise ~ x1 + x2)

Residuals:
    Min      1Q  Median      3Q     Max 
-252.72  -38.66   15.41   56.18  235.54 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -85.9483    77.8973  -1.103  0.27549    
x1            4.8331     1.6702   2.894  0.00576 ** 
x2           11.1745     0.8419  13.273  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 88.02 on 47 degrees of freedom
Multiple R-squared: 0.8778,     Adjusted R-squared: 0.8726 
F-statistic: 168.9 on 2 and 47 DF,  p-value: < 2.2e-16 

# Next, we removed one of the predictors (x2) even though it
# was in fact a cause of y.  (We know this not because it was
# significant in the regression, but because we used it when
# we created y.)

# Notice that across several repetitions, the estimate of
# the slope for x1 has nothing to do with the true slope for
# x1.  This is because, without x2, the model is misspecified,
# which will tend to bias the estimates of other coefficients.

> summary(lm(y+noise~x1))

Call:
lm(formula = y + noise ~ x1)

Residuals:
     Min       1Q   Median       3Q      Max 
-400.242  -82.745    4.404  105.238  649.647 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  399.994    148.262   2.698   0.0096 ** 
x1            17.453      2.961   5.895 3.63e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 189.8 on 48 degrees of freedom
Multiple R-squared: 0.4199,     Adjusted R-squared: 0.4078 
F-statistic: 34.75 on 1 and 48 DF,  p-value: 3.631e-07 

> noise <- rnorm(50, 0, 100)
> summary(lm(y+noise~x1))

Call:
lm(formula = y + noise ~ x1)

Residuals:
    Min      1Q  Median      3Q     Max 
-299.16 -128.69   17.24  116.85  504.24 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  473.600    118.099   4.010 0.000211 ***
x1            15.506      2.359   6.574 3.31e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 151.2 on 48 degrees of freedom
Multiple R-squared: 0.4738,     Adjusted R-squared: 0.4628 
F-statistic: 43.22 on 1 and 48 DF,  p-value: 3.308e-08 

> noise <- rnorm(50, 0, 100)
> summary(lm(y+noise~x1))

Call:
lm(formula = y + noise ~ x1)

Residuals:
     Min       1Q   Median       3Q      Max 
-420.304 -124.494    4.501  118.328  537.300 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  398.813    141.973   2.809  0.00717 ** 
x1            17.133      2.835   6.043 2.16e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 181.7 on 48 degrees of freedom
Multiple R-squared: 0.432,      Adjusted R-squared: 0.4202 
F-statistic: 36.51 on 1 and 48 DF,  p-value: 2.158e-07 

> noise <- rnorm(50, 0, 100)
> summary(lm(y+noise~x1))

Call:
lm(formula = y + noise ~ x1)

Residuals:
     Min       1Q   Median       3Q      Max 
-329.247 -111.173    6.969   78.393  502.848 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  473.005    129.065   3.665 0.000618 ***
x1            15.561      2.578   6.037  2.2e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 165.2 on 48 degrees of freedom
Multiple R-squared: 0.4316,     Adjusted R-squared: 0.4198 
F-statistic: 36.45 on 1 and 48 DF,  p-value: 2.201e-07 

> noise <- rnorm(50, 0, 100)
> summary(lm(y+noise~x1))

Call:
lm(formula = y + noise ~ x1)

Residuals:
     Min       1Q   Median       3Q      Max 
-383.157 -130.784    5.397  112.582  659.183 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  317.871    147.178   2.160   0.0358 *  
x1            18.561      2.939   6.315 8.27e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 188.4 on 48 degrees of freedom
Multiple R-squared: 0.4538,     Adjusted R-squared: 0.4424 
F-statistic: 39.88 on 1 and 48 DF,  p-value: 8.267e-08 


# Finally, we reviewed some diagnostic graphics for the Grades data set:

> grades
    Grade Education Homework
1      78        13        2
2      79        14        6
3      79        13        1
4      89        13        5
5      82        16        3
6      77        13        4
7      88        13        5
8      70        13        3
9      86        15        5
10     80        14        5
11     76        16        1
12     72        13        5
13     66        12        3
14     79        14        4
15     76        10        4
16     80        20        6
17     91        15        8
18     85        16        6
19     79        13        8
20     82        14        3
21     94        12        8
22     91        12        6
23     80        14        5
24     73        14        7
25     77        12        5
26     76        16        5
27     84        12        3
28     81        16        5
29     97        16       10
30     80        15        6
31     74        16        5
32     83        15       11
33     78        13        6
34     64        17        7
35     82        13        4
36     81        17        1
37     73        13        4
38     87        13        7
39     81        14        2
40     86        11        4
41     92        14        4
42     79        13        1
43     69        15        4
44     66        14        5
45     67        13        7
46     76        13        7
47     83        13        6
48     73        15        3
49     85        15        6
50     80        17        5
51     83        13        4
52     82        15        3
53     90        12        7
54     69        12        5
55     95        17        8
56     79        13        2
57     72        13        3
58     83        16        7
59     81        12        4
60     86        15        5
61     78        11        2
62     84        13        9
63     81        14        5
64     72        12        4
65     64        12        5
66     83        14        8
67     94        14        5
68     77        13        6
69     80        14        5
70     74        18        5
71     89        16        8
72     87        15        6
73     82        16        7
74     87        15        4
75     71        12        2
76     88        17        5
77     80        13        7
78     89        16        6
79     87        15        2
80     72        10        5
81     79        17        4
82     93        14        7
83    100        18        7
84     90        13        4
85     69        10        4
86     85        14        5
87     75        14        7
88     82        12        2
89     72        10        2
90     87        14        5
91     81        15        7
92     80        15        9
93     79        17        7
94     79        14        5
95     94        17        8
96     71        11        4
97     77        14        8
98     68        15        4
99     87        16        6
100    74        12        4

# If we save the regression output as an object in R, we can easily
# access components such as predicted values ("fit") and residuals ("res").

> regout <- lm(grades$Grade~ grades$Education + grades$Homework)
> regout$fit
       1        2        3        4        5        6        7        8 
76.52082 81.34282 75.53297 79.48435 80.12053 78.49651 79.48435 77.50866 
       9       10       11       12       13       14       15       16 
81.22560 80.35498 78.14484 79.48435 76.63804 79.36713 75.88464 86.56656 
      17       18       19       20       21       22       23       24 
84.18914 83.08407 82.44789 78.37928 81.57727 79.60157 80.35498 82.33067 
      25       26       27       28       29       30       31       32 
78.61373 82.09622 76.63804 82.09622 87.03545 82.21344 82.09622 87.15267 
      33       34       35       36       37       38       39       40 
80.47220 84.94254 78.49651 79.01546 78.49651 81.46004 77.39144 76.75526 
      41       42       43       44       45       46       47       48 
79.36713 75.53297 80.23775 80.35498 81.46004 81.46004 80.47220 79.24991 
      49       50       51       52       53       54       55       56 
82.21344 82.96684 78.49651 79.24991 80.58942 78.61373 85.93038 76.52082 
      57       58       59       60       61       62       63       64 
77.50866 84.07191 77.62588 81.22560 74.77957 83.43573 80.35498 77.62588 
      65       66       67       68       69       70       71       72 
78.61373 83.31851 80.35498 80.47220 80.35498 83.83747 85.05976 82.21344 
      73       74       75       76       77       78       79       80 
84.07191 80.23775 75.65019 82.96684 81.46004 83.08407 78.26206 76.87248 
      81       82       83       84       85       86       87       88 
81.97900 82.33067 85.81316 78.49651 75.88464 80.35498 82.33067 75.65019 
      89       90       91       92       93       94       95       96 
73.90895 80.35498 83.20129 85.17698 84.94254 80.35498 85.93038 76.75526 
      97       98       99      100 
83.31851 80.23775 83.08407 77.62588 
> regout$res
          1           2           3           4           5           6 
  1.4791847  -2.3428208   3.4670303   9.5156478   1.8794699  -1.4965065 
          7           8           9          10          11          12 
  8.5156478  -7.5086609   4.7744018  -0.3549752  -2.1448388  -7.4843522 
         13          14          15          16          17          18 
-10.6380379  -0.3671296   0.1153626  -6.5665591   6.8108649   1.9159331 
         19          20          21          22          23          24 
 -3.4478890   3.6207160  12.4227340  11.3984253  -0.3549752  -9.3306664 
         25          26          27          28          29          30 
 -1.6137291  -6.0962213   7.3619621  -1.0962213   9.9645507  -2.2134439 
         31          32          33          34          35          36 
 -8.0962213  -4.1526719  -2.4721978 -20.9425356   3.5034935   1.9845381 
         37          38          39          40          41          42 
 -5.4965065   5.5399566   3.6085616   9.2447395  12.6328704   3.4670303 
         43          44          45          46          47          48 
-11.2377526 -14.3549752 -14.4600434  -5.4600434   2.5278022  -6.2499070 
         49          50          51          52          53          54 
  2.7865561  -2.9668443   4.5034935   2.7500930   9.4105797  -9.6137291 
         55          56          57          58          59          60 
  9.0696188   2.4791847  -5.5086609  -1.0719125   3.3741165   4.7744018 
         61          62          63          64          65          66 
  3.2204308   0.5642654   0.6450248  -5.6258835 -14.6137291  -0.3185120 
         67          68          69          70          71          72 
 13.6450248  -3.4721978  -0.3549752  -9.8374674   3.9402419   4.7865561 
         73          74          75          76          77          78 
 -2.0719125   6.7622474  -4.6501923   5.0331557  -1.4600434   5.9159331 
         79          80          81          82          83          84 
  8.7379386  -4.8724830  -2.9789987  10.6693336  14.1868414  11.5034935 
         85          86          87          88          89          90 
 -6.8846374   4.6450248  -7.3306664   6.3498077  -1.9089462   6.6450248 
         91          92          93          94          95          96 
 -2.2012895  -5.1769807  -5.9425356  -1.3549752   8.0696188  -5.7552605 
         97          98          99         100 
 -6.3185120 -12.2377526   3.9159331  -3.6258835 

# Using those, we could easily (or perhaps NOT so easily, if we want really
# nice graphics with clean labels, sufficient white space, etc.) create a
# plot of residuals versus predicted values.  We didn't actually do that
# in class.

# The plot we DID do was a normal quantile-quantile plot of the residuals:

> qqnorm(regout$res)
> qqline(regout$res)

# That plot is useful for evaluating the assumption that errors are 
# normally distributed.  (They look pretty good here.)


# We returned to R much later in the class when we were discussing
# externally Studentized residuals.

# Here, we calculate a p-value for the resudual for observation number 34
# in the Grades data.  The original df is for the error term in the
# regression was 97.  For the outlier test, we use 96 because the external
# Studentization is equivalent to dropping the observation or creating
# a dummy code for it, both of which would reduce the error df by 1. 

> 2*pt(-3.14360, 96)
[1] 0.002220702
 
 
# Finally, we used our Peabody and Raven data set to illsutrate the idea
# of leverage.  We create an artificial data set that adds a fictitious
# observation precisely at the mean of the Raven distribution (where the
# leverage is zero), but with an outrageously large value for Peabody.
# Note that the slope estimate is completely unchanged:

> plot(Raven,Peabody)
> mean(Raven)
[1] 32.525
> newRaven <- c(Raven,32.525)
> newPeabody <- c(Peabody,10000000)
> lm(Peabody~Raven)

Call:
lm(formula = Peabody ~ Raven)

Coefficients:
(Intercept)        Raven  
    63.7942       0.5498  

> lm(newPeabody~newRaven)

Call:
lm(formula = newPeabody ~ newRaven)

Coefficients:
(Intercept)     newRaven  
  2.440e+05    5.498e-01  

>