# We reviewed what we know about ANOVA coding and contrasts using # the example from Howell (Eysenck's memory experiment). # Here's the data set. The first four groups are listed in what should # be the order of increasingly good performance if Eysenck is correct # about level of processing and memory. The fifth group is the one that # was simply instructed to remember the words; it really doesn't fit in # with the theory. > Eysenck <- read.csv("http://faculty.ucmerced.edu/jvevea/classes/202a/data/Eysenck.csv") > Eysenck Score Group 1 9 counting 2 8 counting 3 6 counting 4 8 counting 5 10 counting 6 4 counting 7 6 counting 8 5 counting 9 7 counting 10 7 counting 11 7 rhyming 12 9 rhyming 13 6 rhyming 14 6 rhyming 15 6 rhyming 16 11 rhyming 17 6 rhyming 18 3 rhyming 19 8 rhyming 20 7 rhyming 21 11 adjective 22 13 adjective 23 8 adjective 24 6 adjective 25 14 adjective 26 11 adjective 27 13 adjective 28 13 adjective 29 10 adjective 30 11 adjective 31 12 imagery 32 11 imagery 33 16 imagery 34 11 imagery 35 9 imagery 36 23 imagery 37 12 imagery 38 10 imagery 39 19 imagery 40 11 imagery 41 10 intent 42 19 intent 43 14 intent 44 5 intent 45 10 intent 46 11 intent 47 14 intent 48 15 intent 49 11 intent 50 11 intent # Here are the means. They seem somewhat consistent # with the theory: > attach(Eysenck) > tapply(Score,Group,mean) adjective counting imagery intent rhyming 11.0 7.0 13.4 12.0 6.9 # Here are parallel boxplots. (If we had time, it would have # been more useful to separate the groups so that we could force # them to appear in the order that matches how we are thinking # about them.) > boxplot(Score~Group) # And here's the easy was to do the ANOVA. (Remember that the # only test we want to interpret here is the overall F test.) > summary(lm(Score~Group)) Call: lm(formula = Score ~ Group) Residuals: Min 1Q Median 3Q Max -7.00 -1.85 -0.45 2.00 9.60 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 11.0000 0.9835 11.184 1.39e-14 *** Groupcounting -4.0000 1.3909 -2.876 0.00614 ** Groupimagery 2.4000 1.3909 1.725 0.09130 . Groupintent 1.0000 1.3909 0.719 0.47589 Grouprhyming -4.1000 1.3909 -2.948 0.00506 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.11 on 45 degrees of freedom Multiple R-squared: 0.4468, Adjusted R-squared: 0.3976 F-statistic: 9.085 on 4 and 45 DF, p-value: 1.815e-05 # Here, we create a dummy coding system: > Count <- c(rep(1,10),rep(0,40)) > Count [1] 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [39] 0 0 0 0 0 0 0 0 0 0 0 0 > Rhyme <- c(rep(0,10),rep(1,10),rep(0,30)) > Rhyme [1] 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [39] 0 0 0 0 0 0 0 0 0 0 0 0 > Adject <- c(rep(0,20),rep(1,10),rep(0,20)) > Adject [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 [39] 0 0 0 0 0 0 0 0 0 0 0 0 > Image <- c(rep(0,30),rep(1,10),rep(0,10)) > Image [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 [39] 1 1 0 0 0 0 0 0 0 0 0 0 # We can see that each group is identified by a unique # pattern across the four variables: > cbind(as.character(Group),Count,Rhyme,Adject,Image) Count Rhyme Adject Image [1,] "counting" "1" "0" "0" "0" [2,] "counting" "1" "0" "0" "0" [3,] "counting" "1" "0" "0" "0" [4,] "counting" "1" "0" "0" "0" [5,] "counting" "1" "0" "0" "0" [6,] "counting" "1" "0" "0" "0" [7,] "counting" "1" "0" "0" "0" [8,] "counting" "1" "0" "0" "0" [9,] "counting" "1" "0" "0" "0" [10,] "counting" "1" "0" "0" "0" [11,] "rhyming" "0" "1" "0" "0" [12,] "rhyming" "0" "1" "0" "0" [13,] "rhyming" "0" "1" "0" "0" [14,] "rhyming" "0" "1" "0" "0" [15,] "rhyming" "0" "1" "0" "0" [16,] "rhyming" "0" "1" "0" "0" [17,] "rhyming" "0" "1" "0" "0" [18,] "rhyming" "0" "1" "0" "0" [19,] "rhyming" "0" "1" "0" "0" [20,] "rhyming" "0" "1" "0" "0" [21,] "adjective" "0" "0" "1" "0" [22,] "adjective" "0" "0" "1" "0" [23,] "adjective" "0" "0" "1" "0" [24,] "adjective" "0" "0" "1" "0" [25,] "adjective" "0" "0" "1" "0" [26,] "adjective" "0" "0" "1" "0" [27,] "adjective" "0" "0" "1" "0" [28,] "adjective" "0" "0" "1" "0" [29,] "adjective" "0" "0" "1" "0" [30,] "adjective" "0" "0" "1" "0" [31,] "imagery" "0" "0" "0" "1" [32,] "imagery" "0" "0" "0" "1" [33,] "imagery" "0" "0" "0" "1" [34,] "imagery" "0" "0" "0" "1" [35,] "imagery" "0" "0" "0" "1" [36,] "imagery" "0" "0" "0" "1" [37,] "imagery" "0" "0" "0" "1" [38,] "imagery" "0" "0" "0" "1" [39,] "imagery" "0" "0" "0" "1" [40,] "imagery" "0" "0" "0" "1" [41,] "intent" "0" "0" "0" "0" [42,] "intent" "0" "0" "0" "0" [43,] "intent" "0" "0" "0" "0" [44,] "intent" "0" "0" "0" "0" [45,] "intent" "0" "0" "0" "0" [46,] "intent" "0" "0" "0" "0" [47,] "intent" "0" "0" "0" "0" [48,] "intent" "0" "0" "0" "0" [49,] "intent" "0" "0" "0" "0" [50,] "intent" "0" "0" "0" "0" # The regression of Score on the four dummy variables # gets us the same F statistic as when we let R do # the coding. (Again, we do not want to interpret any # tests here except the overall F statistic.) > summary(lm(Score~Count+Rhyme+Adject+Image)) Call: lm(formula = Score ~ Count + Rhyme + Adject + Image) Residuals: Min 1Q Median 3Q Max -7.00 -1.85 -0.45 2.00 9.60 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 12.0000 0.9835 12.201 7.18e-16 *** Count -5.0000 1.3909 -3.595 0.000802 *** Rhyme -5.1000 1.3909 -3.667 0.000647 *** Adject -1.0000 1.3909 -0.719 0.475890 Image 1.4000 1.3909 1.007 0.319544 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.11 on 45 degrees of freedom Multiple R-squared: 0.4468, Adjusted R-squared: 0.3976 F-statistic: 9.085 on 4 and 45 DF, p-value: 1.815e-05 # If we change the coding system so that the Intentional # group is identified by the pattern -1 -1 -1 -1 instead # of 0 0 0 0... > Count[41:50] <- -1 > Rhyme[41:50] <- -1 > Adject[41:50] <- -1 > Image[41:50] <- -1 > cbind(as.character(Group),Count,Rhyme,Adject,Image) Count Rhyme Adject Image [1,] "counting" "1" "0" "0" "0" [2,] "counting" "1" "0" "0" "0" [3,] "counting" "1" "0" "0" "0" [4,] "counting" "1" "0" "0" "0" [5,] "counting" "1" "0" "0" "0" [6,] "counting" "1" "0" "0" "0" [7,] "counting" "1" "0" "0" "0" [8,] "counting" "1" "0" "0" "0" [9,] "counting" "1" "0" "0" "0" [10,] "counting" "1" "0" "0" "0" [11,] "rhyming" "0" "1" "0" "0" [12,] "rhyming" "0" "1" "0" "0" [13,] "rhyming" "0" "1" "0" "0" [14,] "rhyming" "0" "1" "0" "0" [15,] "rhyming" "0" "1" "0" "0" [16,] "rhyming" "0" "1" "0" "0" [17,] "rhyming" "0" "1" "0" "0" [18,] "rhyming" "0" "1" "0" "0" [19,] "rhyming" "0" "1" "0" "0" [20,] "rhyming" "0" "1" "0" "0" [21,] "adjective" "0" "0" "1" "0" [22,] "adjective" "0" "0" "1" "0" [23,] "adjective" "0" "0" "1" "0" [24,] "adjective" "0" "0" "1" "0" [25,] "adjective" "0" "0" "1" "0" [26,] "adjective" "0" "0" "1" "0" [27,] "adjective" "0" "0" "1" "0" [28,] "adjective" "0" "0" "1" "0" [29,] "adjective" "0" "0" "1" "0" [30,] "adjective" "0" "0" "1" "0" [31,] "imagery" "0" "0" "0" "1" [32,] "imagery" "0" "0" "0" "1" [33,] "imagery" "0" "0" "0" "1" [34,] "imagery" "0" "0" "0" "1" [35,] "imagery" "0" "0" "0" "1" [36,] "imagery" "0" "0" "0" "1" [37,] "imagery" "0" "0" "0" "1" [38,] "imagery" "0" "0" "0" "1" [39,] "imagery" "0" "0" "0" "1" [40,] "imagery" "0" "0" "0" "1" [41,] "intent" "-1" "-1" "-1" "-1" [42,] "intent" "-1" "-1" "-1" "-1" [43,] "intent" "-1" "-1" "-1" "-1" [44,] "intent" "-1" "-1" "-1" "-1" [45,] "intent" "-1" "-1" "-1" "-1" [46,] "intent" "-1" "-1" "-1" "-1" [47,] "intent" "-1" "-1" "-1" "-1" [48,] "intent" "-1" "-1" "-1" "-1" [49,] "intent" "-1" "-1" "-1" "-1" [50,] "intent" "-1" "-1" "-1" "-1" # ...we get the same overall F statistic (which, again, # is the only test we should interpret here), but the # coefficient estimates now represent the grand mean and # the estimated effects of being in each group: > summary(lm(Score~Count+Rhyme+Adject+Image)) Call: lm(formula = Score ~ Count + Rhyme + Adject + Image) Residuals: Min 1Q Median 3Q Max -7.00 -1.85 -0.45 2.00 9.60 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 10.0600 0.4398 22.872 < 2e-16 *** Count -3.0600 0.8797 -3.478 0.001131 ** Rhyme -3.1600 0.8797 -3.592 0.000808 *** Adject 0.9400 0.8797 1.069 0.290971 Image 3.3400 0.8797 3.797 0.000436 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.11 on 45 degrees of freedom Multiple R-squared: 0.4468, Adjusted R-squared: 0.3976 F-statistic: 9.085 on 4 and 45 DF, p-value: 1.815e-05 # We confirm that the intercept is the grand mean: > mean(Score) [1] 10.06 # The slopes represent the difference between each group # mean and the grand mean. For example, the mean of the # Counting group is 7.0, which is 3.06 below the grand mean # of 10.06. # The effect for the fifth group doesn't appear in the regression # output, but if we want it, we can calculate it as -1 times the # sum of the other effects: > -(-3.0600-3.1600+.94+3.34) [1] 1.94 # Here, we implement a contrast coding system that replicates the # first set of orthogonal contrasts in the Powerpoint for today. # The first contrast compares Intentional with everything else. # The second compares the two groups that should do best with the # two that should do worst. The third compares what should be the # two highest groups, and the fourth compares what should be the # two lowest groups. > c1 <- c(rep(-1,40),rep(4,10)) > c2 <- c(rep(-1,20),rep(1,20),rep(0,10)) > c3 <- c(rep(0,20),rep(-1,10),rep(1,10),rep(0,10)) > c4 <- c(rep(-1,10),rep(1,10),rep(0,30)) # Once again, we have created a coding system that identifies # the five groups by unique patterns on four variables: > cbind(as.character(Group),c1,c2,c3,c4) c1 c2 c3 c4 [1,] "counting" "-1" "-1" "0" "-1" [2,] "counting" "-1" "-1" "0" "-1" [3,] "counting" "-1" "-1" "0" "-1" [4,] "counting" "-1" "-1" "0" "-1" [5,] "counting" "-1" "-1" "0" "-1" [6,] "counting" "-1" "-1" "0" "-1" [7,] "counting" "-1" "-1" "0" "-1" [8,] "counting" "-1" "-1" "0" "-1" [9,] "counting" "-1" "-1" "0" "-1" [10,] "counting" "-1" "-1" "0" "-1" [11,] "rhyming" "-1" "-1" "0" "1" [12,] "rhyming" "-1" "-1" "0" "1" [13,] "rhyming" "-1" "-1" "0" "1" [14,] "rhyming" "-1" "-1" "0" "1" [15,] "rhyming" "-1" "-1" "0" "1" [16,] "rhyming" "-1" "-1" "0" "1" [17,] "rhyming" "-1" "-1" "0" "1" [18,] "rhyming" "-1" "-1" "0" "1" [19,] "rhyming" "-1" "-1" "0" "1" [20,] "rhyming" "-1" "-1" "0" "1" [21,] "adjective" "-1" "1" "-1" "0" [22,] "adjective" "-1" "1" "-1" "0" [23,] "adjective" "-1" "1" "-1" "0" [24,] "adjective" "-1" "1" "-1" "0" [25,] "adjective" "-1" "1" "-1" "0" [26,] "adjective" "-1" "1" "-1" "0" [27,] "adjective" "-1" "1" "-1" "0" [28,] "adjective" "-1" "1" "-1" "0" [29,] "adjective" "-1" "1" "-1" "0" [30,] "adjective" "-1" "1" "-1" "0" [31,] "imagery" "-1" "1" "1" "0" [32,] "imagery" "-1" "1" "1" "0" [33,] "imagery" "-1" "1" "1" "0" [34,] "imagery" "-1" "1" "1" "0" [35,] "imagery" "-1" "1" "1" "0" [36,] "imagery" "-1" "1" "1" "0" [37,] "imagery" "-1" "1" "1" "0" [38,] "imagery" "-1" "1" "1" "0" [39,] "imagery" "-1" "1" "1" "0" [40,] "imagery" "-1" "1" "1" "0" [41,] "intent" "4" "0" "0" "0" [42,] "intent" "4" "0" "0" "0" [43,] "intent" "4" "0" "0" "0" [44,] "intent" "4" "0" "0" "0" [45,] "intent" "4" "0" "0" "0" [46,] "intent" "4" "0" "0" "0" [47,] "intent" "4" "0" "0" "0" [48,] "intent" "4" "0" "0" "0" [49,] "intent" "4" "0" "0" "0" [50,] "intent" "4" "0" "0" "0" # We still get the same F statistic, but now, because # the contrasts are planned and orthogonal, we can interpret # the t statistics as tests of the questions asked by the # contrasts. > summary(lm(Score~c1+c2+c3+c4)) Call: lm(formula = Score ~ c1 + c2 + c3 + c4) Residuals: Min 1Q Median 3Q Max -7.00 -1.85 -0.45 2.00 9.60 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 10.0600 0.4399 22.872 < 2e-16 *** c1 0.4850 0.2199 2.205 0.0326 * c2 2.6250 0.4918 5.338 2.96e-06 *** c3 1.2000 0.6955 1.725 0.0913 . c4 -0.0500 0.6955 -0.072 0.9430 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.11 on 45 degrees of freedom Multiple R-squared: 0.4468, Adjusted R-squared: 0.3976 F-statistic: 9.085 on 4 and 45 DF, p-value: 1.815e-05 # It's easier, though, to let R set up that coding system. # Here's how we are thinking about the groups: > # count rhyme adject image intent # But R thinks in alphabetical order: > # adject count image intent rhyme # Here are the contrasts: > mycontrasts <- matrix(c( + -1, -1, -1, 4, -1, + 1, -1, 1, 0, -1, + -1, 0, 1, 0, 0, + 0, -1, 0, 0, 1),4,5,byrow=TRUE) > mycontrasts [,1] [,2] [,3] [,4] [,5] [1,] -1 -1 -1 4 -1 [2,] 1 -1 1 0 -1 [3,] -1 0 1 0 0 [4,] 0 -1 0 0 1 # Here, we force R to use those contrasts to implement the ANOVA: > contrasts(Group) <- t(mycontrasts) # Note that the results are identical to what we got from our # contrast coding system: > summary(lm(Score~Group)) Call: lm(formula = Score ~ Group) Residuals: Min 1Q Median 3Q Max -7.00 -1.85 -0.45 2.00 9.60 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 10.0600 0.4399 22.872 < 2e-16 *** Group1 0.4850 0.2199 2.205 0.0326 * Group2 2.6250 0.4918 5.338 2.96e-06 *** Group3 1.2000 0.6955 1.725 0.0913 . Group4 -0.0500 0.6955 -0.072 0.9430 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.11 on 45 degrees of freedom Multiple R-squared: 0.4468, Adjusted R-squared: 0.3976 F-statistic: 9.085 on 4 and 45 DF, p-value: 1.815e-05 # When we checked that our set of contrasts was mutually # orthogonal, what we were actually doing was confirming # that the variables in the contrast coding system are # perfectly uncorrelated: > cor(c1,c2) [1] 0 > cor(c1,c3) [1] 0 > cor(c1,c4) [1] 0 > cor(c2,c3) [1] 0 > cor(c2,c4) [1] 0 > cor(c3,c4) [1] 0 # Here's another approach to the problem. This happens to # be a case where Eysenck's theory is well represented by # Helmert contrasts. (Helmert contrasts are an approach in # which one group is compared with the others and then ignored # in subsequent contrasts. Then, from the remaining groups, one # is compared with the others and subsequently ignored. This # pattern continues until we run out of groups to compare. > # adject count image intent rhyme > mycontrasts <- matrix(c( + -1, -1, -1, 4, -1, + -1, -1, 3, 0, -1, + 2, -1, 0, 0, -1, + 0, -1, 0, 0, 1),4,5,byrow=TRUE) > contrasts(Group) <- t(mycontrasts) # The results provide even better support for Eysenck's theory: > summary(lm(Score~Group)) Call: lm(formula = Score ~ Group) Residuals: Min 1Q Median 3Q Max -7.00 -1.85 -0.45 2.00 9.60 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 10.0600 0.4399 22.872 < 2e-16 *** Group1 0.4850 0.2199 2.205 0.03258 * Group2 1.2750 0.2839 4.491 4.91e-05 *** Group3 1.3500 0.4015 3.362 0.00159 ** Group4 -0.0500 0.6955 -0.072 0.94300 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.11 on 45 degrees of freedom Multiple R-squared: 0.4468, Adjusted R-squared: 0.3976 F-statistic: 9.085 on 4 and 45 DF, p-value: 1.815e-05 >