# We began by illustrating the principle of maximum likelihood, # graphically finding the maximum likelihood estimate of the # mean of the Peabody distribution. > Peabody [1] 69 72 94 64 80 77 96 86 89 69 92 71 81 90 84 76 100 57 61 [20] 84 81 65 87 92 89 79 91 65 91 81 86 85 95 93 83 76 84 90 [39] 95 67 > like <- function(mu, x, variance) { + return(sum(-1/2 * log(1*pi*variance) - 1/2*(x-mu)^2 / variance)) } # In this particular case, the variance (which doesn't interest us # at the moment) doesn't matter. I can estimate it: > var(Peabody) [1] 119.2506 #...but my estimate of the mean will be the same regardless of what # value I plug in for the variance: > variance <- 100 # We calculate the log likelihood for a few different values of the mean: > like( 75, Peabody, variance) [1] -147.163 > like( 76, Peabody, variance) [1] -144.693 > like( 78, Peabody, variance) [1] -140.953 # The likelihood is highest for 78, which means that, according to the # likelihood principle, 78 is a better estimate of the mean than the # other two values we tried. # Let's look at a sequence of many possible values: > mu <- seq(50,100,.1) > loglike <- rep(0,length(mu)) > for(i in 1:length(mu)) loglike[i] <- like(mu[i], Peabody, variance) > plot(mu, loglike, type='l') # Note that the likelihood peaks a little over 80. We can try another # search in that neighborhood: > mu <- seq(70,82,.0001) > loglike <- rep(0,length(mu)) > for(i in 1:length(mu)) loglike[i] <- like(mu[i], Peabody, variance) > plot(mu, loglike, type='l') # Here, we move even closer to the solution: > mu <- seq(81,83,.0001) > loglike <- rep(0,length(mu)) > for(i in 1:length(mu)) loglike[i] <- like(mu[i], Peabody, variance) > plot(mu, loglike, type='l') # The curve peaks at 81.675, which happens to be the value of the sample # mean. Under the assumption of normality, the sample mean is the # maximum likelihood estimate of the mean. > mean(Peabody) [1] 81.675 # Here, we see that we get the same estimate, even if we set the # variance to a very different value: > for(i in 1:length(mu)) loglike[i] <- like(mu[i], Peabody, 1) > plot(mu, loglike, type='l') # Next, we illustrated the Box-Cox proceedure. We worked first # with a case where we know that the optimal normalizing transformation # is the square root: > y <- rnorm(250, 50, 10)^2 > hist(y) > qqnorm(y) > hist(sqrt(y)) > qqnorm(sqrt(y)) # We set up a grid of possible values of lambda: > lambda <- seq(-1,1,.01) > lambda [1] -1.00 -0.99 -0.98 -0.97 -0.96 -0.95 -0.94 -0.93 -0.92 -0.91 -0.90 -0.89 [13] -0.88 -0.87 -0.86 -0.85 -0.84 -0.83 -0.82 -0.81 -0.80 -0.79 -0.78 -0.77 [25] -0.76 -0.75 -0.74 -0.73 -0.72 -0.71 -0.70 -0.69 -0.68 -0.67 -0.66 -0.65 [37] -0.64 -0.63 -0.62 -0.61 -0.60 -0.59 -0.58 -0.57 -0.56 -0.55 -0.54 -0.53 [49] -0.52 -0.51 -0.50 -0.49 -0.48 -0.47 -0.46 -0.45 -0.44 -0.43 -0.42 -0.41 [61] -0.40 -0.39 -0.38 -0.37 -0.36 -0.35 -0.34 -0.33 -0.32 -0.31 -0.30 -0.29 [73] -0.28 -0.27 -0.26 -0.25 -0.24 -0.23 -0.22 -0.21 -0.20 -0.19 -0.18 -0.17 [85] -0.16 -0.15 -0.14 -0.13 -0.12 -0.11 -0.10 -0.09 -0.08 -0.07 -0.06 -0.05 [97] -0.04 -0.03 -0.02 -0.01 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 [109] 0.08 0.09 0.10 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 [121] 0.20 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.30 0.31 [133] 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40 0.41 0.42 0.43 [145] 0.44 0.45 0.46 0.47 0.48 0.49 0.50 0.51 0.52 0.53 0.54 0.55 [157] 0.56 0.57 0.58 0.59 0.60 0.61 0.62 0.63 0.64 0.65 0.66 0.67 [169] 0.68 0.69 0.70 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 [181] 0.80 0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89 0.90 0.91 [193] 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.00 # We create a function to calculate the geometric mean (needed in the # likelihood): > gmean <- function(y) { + for(i in 1:length(y)) if(y[i] <= 0) { + message("Error: argument must be strictly positive!") + return(NaN) } + return(exp(mean(log(y)))) } # Here are some comparisons of geometric and arithmetic means: > mean(Peabody) [1] 81.675 > gmean(Peabody) [1] 80.91876 > mean(y) [1] 2414.658 > gmean(y) [1] 2205.419 # In general, Box-Cox is used to normalize residuals in a regression. # Here, we want to normalize a data set. That's equivalent to normalizing # residuals from an intercept-only regression. So we set up a matrix # of regression predictors that is simply a column of ones: > X <- rep(1, length(y)) # Note that, apart from R's choice of rounding, the intercept in # the regression is the mean: > mean(y) [1] 2414.658 > lm(y~X-1) Call: lm(formula = y ~ X - 1) Coefficients: X 2415 # Now we create our Box-Cox likelihood function: > boxcox <- function(lambda,y,X) { + n = length(y) + retval <- rep(0,length(lambda)) + for(i in 1:length(lambda)) { + L = lambda[i] + if(L==0) { + yy <- log(y) + resids <- lm(yy~X-1)$res + RSS <- (n-1)*var(resids) + retval[i] <- (-n/2 * log(RSS) - n*log(gmean(y))) + } + else { + yy <- y^L + resids <- lm(yy~X-1)$res + RSS <- (n-1)*var(resids) + retval[i] <- (n*log(abs(L)) -n/2 * log(RSS) + n*(L-1)*log(gmean(y))) + } + } + return(retval) + } > plot(lambda,boxcox(lambda,y,X), type='l',xlab="Lambda",ylab="Likelihood") # The procedure selected the square root transformation, as we hoped. # We did another example with the polishing time data from last class. # Recall that for those data, a log transformation on time worked well. > Time [1] 47.65 63.13 58.76 34.88 55.53 43.14 54.86 44.14 17.46 21.04 [11] 109.38 17.67 16.41 12.02 49.48 48.74 23.21 28.64 44.95 23.77 [21] 20.21 32.62 17.84 22.82 29.48 15.61 13.25 45.78 26.53 37.11 [31] 45.12 26.09 68.63 33.71 44.45 23.74 86.42 39.71 26.52 33.89 [41] 64.30 22.55 31.86 53.18 74.48 34.16 31.46 21.34 20.83 20.59 [51] 33.70 32.90 27.76 30.20 20.85 26.25 21.87 23.88 16.66 > Diameter [1] 10.7 14.0 9.0 8.0 10.0 10.5 16.0 15.0 6.5 5.0 25.0 10.4 7.4 5.4 15.4 [16] 12.4 6.0 9.0 9.0 12.4 7.5 14.0 7.0 9.0 12.0 5.5 6.0 12.0 5.5 14.2 [31] 11.0 16.0 13.5 11.1 9.8 10.0 13.0 13.0 11.7 12.3 19.5 15.2 10.0 11.0 17.8 [46] 11.5 12.7 8.0 7.5 9.0 14.0 12.4 8.8 8.5 6.0 11.0 11.1 14.5 5.0 > length(Diameter) [1] 59 > X <- cbind(rep(1,59),Diameter) > X Diameter [1,] 1 10.7 [2,] 1 14.0 [3,] 1 9.0 [4,] 1 8.0 [5,] 1 10.0 [6,] 1 10.5 [7,] 1 16.0 [8,] 1 15.0 [9,] 1 6.5 [10,] 1 5.0 [11,] 1 25.0 [12,] 1 10.4 [13,] 1 7.4 [14,] 1 5.4 [15,] 1 15.4 [16,] 1 12.4 [17,] 1 6.0 [18,] 1 9.0 [19,] 1 9.0 [20,] 1 12.4 [21,] 1 7.5 [22,] 1 14.0 [23,] 1 7.0 [24,] 1 9.0 [25,] 1 12.0 [26,] 1 5.5 [27,] 1 6.0 [28,] 1 12.0 [29,] 1 5.5 [30,] 1 14.2 [31,] 1 11.0 [32,] 1 16.0 [33,] 1 13.5 [34,] 1 11.1 [35,] 1 9.8 [36,] 1 10.0 [37,] 1 13.0 [38,] 1 13.0 [39,] 1 11.7 [40,] 1 12.3 [41,] 1 19.5 [42,] 1 15.2 [43,] 1 10.0 [44,] 1 11.0 [45,] 1 17.8 [46,] 1 11.5 [47,] 1 12.7 [48,] 1 8.0 [49,] 1 7.5 [50,] 1 9.0 [51,] 1 14.0 [52,] 1 12.4 [53,] 1 8.8 [54,] 1 8.5 [55,] 1 6.0 [56,] 1 11.0 [57,] 1 11.1 [58,] 1 14.5 [59,] 1 5.0 # Note that the Box-Cox graph suggests a value of lambda near zero, which # implies a log transformation. > plot(lambda,boxcox(lambda,Time,X), type='l',xlab="Lambda",ylab="Likelihood") # If we want to know exactly where the curve peaks, we can examine a table: > cbind(lambda,boxcox(lambda,Time,X)) lambda [1,] -1.00 -267.2598 [2,] -0.99 -267.1189 [3,] -0.98 -266.9795 [4,] -0.97 -266.8416 [5,] -0.96 -266.7053 [6,] -0.95 -266.5704 [7,] -0.94 -266.4371 [8,] -0.93 -266.3053 [9,] -0.92 -266.1750 [10,] -0.91 -266.0463 [11,] -0.90 -265.9191 [12,] -0.89 -265.7934 [13,] -0.88 -265.6693 [14,] -0.87 -265.5467 [15,] -0.86 -265.4257 [16,] -0.85 -265.3062 [17,] -0.84 -265.1883 [18,] -0.83 -265.0720 [19,] -0.82 -264.9572 [20,] -0.81 -264.8440 [21,] -0.80 -264.7324 [22,] -0.79 -264.6224 [23,] -0.78 -264.5139 [24,] -0.77 -264.4071 [25,] -0.76 -264.3018 [26,] -0.75 -264.1982 [27,] -0.74 -264.0961 [28,] -0.73 -263.9956 [29,] -0.72 -263.8968 [30,] -0.71 -263.7996 [31,] -0.70 -263.7039 [32,] -0.69 -263.6099 [33,] -0.68 -263.5176 [34,] -0.67 -263.4268 [35,] -0.66 -263.3377 [36,] -0.65 -263.2502 [37,] -0.64 -263.1644 [38,] -0.63 -263.0802 [39,] -0.62 -262.9977 [40,] -0.61 -262.9168 [41,] -0.60 -262.8376 [42,] -0.59 -262.7600 [43,] -0.58 -262.6841 [44,] -0.57 -262.6099 [45,] -0.56 -262.5373 [46,] -0.55 -262.4664 [47,] -0.54 -262.3972 [48,] -0.53 -262.3296 [49,] -0.52 -262.2638 [50,] -0.51 -262.1996 [51,] -0.50 -262.1372 [52,] -0.49 -262.0764 [53,] -0.48 -262.0174 [54,] -0.47 -261.9600 [55,] -0.46 -261.9044 [56,] -0.45 -261.8504 [57,] -0.44 -261.7982 [58,] -0.43 -261.7477 [59,] -0.42 -261.6989 [60,] -0.41 -261.6519 [61,] -0.40 -261.6065 [62,] -0.39 -261.5630 [63,] -0.38 -261.5211 [64,] -0.37 -261.4810 [65,] -0.36 -261.4426 [66,] -0.35 -261.4060 [67,] -0.34 -261.3711 [68,] -0.33 -261.3380 [69,] -0.32 -261.3067 [70,] -0.31 -261.2771 [71,] -0.30 -261.2493 [72,] -0.29 -261.2232 [73,] -0.28 -261.1989 [74,] -0.27 -261.1764 [75,] -0.26 -261.1557 [76,] -0.25 -261.1367 [77,] -0.24 -261.1196 [78,] -0.23 -261.1042 [79,] -0.22 -261.0906 [80,] -0.21 -261.0788 [81,] -0.20 -261.0688 [82,] -0.19 -261.0606 [83,] -0.18 -261.0543 [84,] -0.17 -261.0497 [85,] -0.16 -261.0469 [86,] -0.15 -261.0460 [87,] -0.14 -261.0469 [88,] -0.13 -261.0496 [89,] -0.12 -261.0541 [90,] -0.11 -261.0604 [91,] -0.10 -261.0686 [92,] -0.09 -261.0786 [93,] -0.08 -261.0905 [94,] -0.07 -261.1041 [95,] -0.06 -261.1197 [96,] -0.05 -261.1371 [97,] -0.04 -261.1563 [98,] -0.03 -261.1774 [99,] -0.02 -261.2003 [100,] -0.01 -261.2251 [101,] 0.00 -261.2517 [102,] 0.01 -261.2803 [103,] 0.02 -261.3106 [104,] 0.03 -261.3429 [105,] 0.04 -261.3770 [106,] 0.05 -261.4130 [107,] 0.06 -261.4509 [108,] 0.07 -261.4906 [109,] 0.08 -261.5323 [110,] 0.09 -261.5758 [111,] 0.10 -261.6212 [112,] 0.11 -261.6685 [113,] 0.12 -261.7177 [114,] 0.13 -261.7688 [115,] 0.14 -261.8218 [116,] 0.15 -261.8767 [117,] 0.16 -261.9335 [118,] 0.17 -261.9922 [119,] 0.18 -262.0528 [120,] 0.19 -262.1153 [121,] 0.20 -262.1797 [122,] 0.21 -262.2461 [123,] 0.22 -262.3144 [124,] 0.23 -262.3845 [125,] 0.24 -262.4567 [126,] 0.25 -262.5307 [127,] 0.26 -262.6067 [128,] 0.27 -262.6845 [129,] 0.28 -262.7644 [130,] 0.29 -262.8461 [131,] 0.30 -262.9298 [132,] 0.31 -263.0154 [133,] 0.32 -263.1030 [134,] 0.33 -263.1925 [135,] 0.34 -263.2839 [136,] 0.35 -263.3773 [137,] 0.36 -263.4727 [138,] 0.37 -263.5700 [139,] 0.38 -263.6692 [140,] 0.39 -263.7704 [141,] 0.40 -263.8735 [142,] 0.41 -263.9786 [143,] 0.42 -264.0856 [144,] 0.43 -264.1947 [145,] 0.44 -264.3056 [146,] 0.45 -264.4185 [147,] 0.46 -264.5334 [148,] 0.47 -264.6503 [149,] 0.48 -264.7691 [150,] 0.49 -264.8899 [151,] 0.50 -265.0126 [152,] 0.51 -265.1373 [153,] 0.52 -265.2640 [154,] 0.53 -265.3926 [155,] 0.54 -265.5233 [156,] 0.55 -265.6559 [157,] 0.56 -265.7904 [158,] 0.57 -265.9270 [159,] 0.58 -266.0655 [160,] 0.59 -266.2059 [161,] 0.60 -266.3484 [162,] 0.61 -266.4928 [163,] 0.62 -266.6393 [164,] 0.63 -266.7876 [165,] 0.64 -266.9380 [166,] 0.65 -267.0903 [167,] 0.66 -267.2447 [168,] 0.67 -267.4010 [169,] 0.68 -267.5592 [170,] 0.69 -267.7195 [171,] 0.70 -267.8817 [172,] 0.71 -268.0459 [173,] 0.72 -268.2121 [174,] 0.73 -268.3803 [175,] 0.74 -268.5504 [176,] 0.75 -268.7226 [177,] 0.76 -268.8967 [178,] 0.77 -269.0728 [179,] 0.78 -269.2508 [180,] 0.79 -269.4308 [181,] 0.80 -269.6129 [182,] 0.81 -269.7968 [183,] 0.82 -269.9828 [184,] 0.83 -270.1707 [185,] 0.84 -270.3606 [186,] 0.85 -270.5525 [187,] 0.86 -270.7464 [188,] 0.87 -270.9422 [189,] 0.88 -271.1400 [190,] 0.89 -271.3397 [191,] 0.90 -271.5414 [192,] 0.91 -271.7451 [193,] 0.92 -271.9508 [194,] 0.93 -272.1584 [195,] 0.94 -272.3680 [196,] 0.95 -272.5795 [197,] 0.96 -272.7930 [198,] 0.97 -273.0084 [199,] 0.98 -273.2259 [200,] 0.99 -273.4452 [201,] 1.00 -273.6665 >