# Today we worked on iterative solutions for maximum likelihood # estimation when there are interdependent parameters involved # in the likelihood. We illustrated the problem with an example # of random-effects meta-analysis, in which our goal is to estimate # the mean and variance of a population of true correlations. # The sample data set we used was a large (N=755) sample of studies # that correlated the General Aptitude Test Battery's General Ability # scale with various job-related outcomes. # Here's the data set. You'll notice that the "V" column is 1/(N-3) # for each study. The "Z" column contains correlations that have # been transformed so that their sampling variance IS 1/(N-3). > gatb <- read.csv(file.choose()) > head(gatb) X N Z V 1 1 38 0.321 0.02857143 2 2 31 0.192 0.03571429 3 3 32 0.266 0.03448276 4 4 27 0.255 0.04166667 5 5 23 0.288 0.05000000 6 6 47 0.161 0.02272727 # Here, we illustrate the idea that V is just 1/(N-3) by # calculating that for the first case in the data set: > 1/35 [1] 0.02857143 # Here's a function that implements the log likelihood: > loglike <- function(Z, V, pars) { + mu <- pars[1] + vc <- pars[2] + + return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) ) + } > attach(gatb) # Using that log-likelihood function, we can see that the # combination mu=.3, vc=.1... > pars <- c(.3, .1) > loglike(Z, V, pars) [1] 720.0979 # ...is less likely than mu=.2, vc=.05: > pars <- c(.2, .05) > loglike(Z, V, pars) [1] 847.9547 # We create a form of the log-likelihood that allows us # to try different values for mu: > loglikeMu <- function(Z, V, pars) { + vc <- pars[2] + mu <- x[i] + return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) ) + } # And here's a version that lets us try different values for # the variance component: > loglikeVC <- function(Z, V, pars) { + mu <- pars[1] + vc <- x[i] + return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) ) + } # We pick a pair of more-or-less arbitrary starting values: > pars [1] 0.20 0.05 # We create a range of mu's to consider, while keeping the # vc=.05: > x <- seq(.1,.3,.01) > y <- NULL # We calculate the log-likelihood for each trial value of mu: > for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars) > plot(x,y,type='l') # We can see that the curve peaks somewhere near 0.26, so we # try a narrower range of values in that neighborhood: > x <- seq(.25,.27,.001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars) > plot(x,y,type='l') # We narrow it down further: > x <- seq(.263,.265,.0001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars) > plot(x,y,type='l') # We can see that the likelihood peaks at mu=.2641 and # mu=.2642, so we use .26415 as our estimated value of mu: > length(x) [1] 21 > cbind(x,y) x y [1,] 0.2630 871.8065 [2,] 0.2631 871.8078 [3,] 0.2632 871.8090 [4,] 0.2633 871.8101 [5,] 0.2634 871.8111 [6,] 0.2635 871.8119 [7,] 0.2636 871.8126 [8,] 0.2637 871.8133 [9,] 0.2638 871.8138 [10,] 0.2639 871.8141 [11,] 0.2640 871.8144 [12,] 0.2641 871.8146 [13,] 0.2642 871.8146 [14,] 0.2643 871.8145 [15,] 0.2644 871.8143 [16,] 0.2645 871.8140 [17,] 0.2646 871.8136 [18,] 0.2647 871.8131 [19,] 0.2648 871.8124 [20,] 0.2649 871.8116 [21,] 0.2650 871.8107 > pars[1] <- .26415 > pars [1] 0.26415 0.05000 # Now, we do the same thing for the vc, using the special # log-likelihood function that lets us vary the vc: > x <- seq(.01,.09,.001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars) > plot(x,y,type='l') # We narrow it down: > x <- seq(.001,.019,.0001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars) > plot(x,y,type='l') # And narrow it down some more: > x <- seq(.010,.013,.0001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars) > plot(x,y,type='l') # The log-likelihood peaks at vc=0.0110, so we use that # as our new estimate for the variance component: > length(x) [1] 31 > cbind(x,y) x y [1,] 0.0100 992.9724 [2,] 0.0101 993.0363 [3,] 0.0102 993.0925 [4,] 0.0103 993.1413 [5,] 0.0104 993.1829 [6,] 0.0105 993.2172 [7,] 0.0106 993.2446 [8,] 0.0107 993.2650 [9,] 0.0108 993.2787 [10,] 0.0109 993.2858 [11,] 0.0110 993.2864 [12,] 0.0111 993.2806 [13,] 0.0112 993.2686 [14,] 0.0113 993.2504 [15,] 0.0114 993.2262 [16,] 0.0115 993.1961 [17,] 0.0116 993.1602 [18,] 0.0117 993.1186 [19,] 0.0118 993.0714 [20,] 0.0119 993.0187 [21,] 0.0120 992.9606 [22,] 0.0121 992.8972 [23,] 0.0122 992.8286 [24,] 0.0123 992.7550 [25,] 0.0124 992.6763 [26,] 0.0125 992.5927 [27,] 0.0126 992.5042 [28,] 0.0127 992.4110 [29,] 0.0128 992.3131 [30,] 0.0129 992.2107 [31,] 0.0130 992.1037 > > pars[2] <- .011 > pars [1] 0.26415 0.01100 # But mu and vc are interdependent, so now that we've changed # the vc, our estimate for mu might change: > x <- seq(.24,.28,.001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars) > plot(x,y,type='l') > length(x) [1] 41 > cbind(x,y) x y [1,] 0.240 987.3125 [2,] 0.241 987.9140 [3,] 0.242 988.4849 [4,] 0.243 989.0253 [5,] 0.244 989.5350 [6,] 0.245 990.0141 [7,] 0.246 990.4627 [8,] 0.247 990.8806 [9,] 0.248 991.2680 [10,] 0.249 991.6247 [11,] 0.250 991.9509 [12,] 0.251 992.2464 [13,] 0.252 992.5114 [14,] 0.253 992.7457 [15,] 0.254 992.9495 [16,] 0.255 993.1227 [17,] 0.256 993.2653 [18,] 0.257 993.3772 [19,] 0.258 993.4586 [20,] 0.259 993.5094 [21,] 0.260 993.5296 [22,] 0.261 993.5192 [23,] 0.262 993.4782 [24,] 0.263 993.4066 [25,] 0.264 993.3044 [26,] 0.265 993.1716 [27,] 0.266 993.0082 [28,] 0.267 992.8142 [29,] 0.268 992.5896 [30,] 0.269 992.3345 [31,] 0.270 992.0487 [32,] 0.271 991.7323 [33,] 0.272 991.3853 [34,] 0.273 991.0078 [35,] 0.274 990.5996 [36,] 0.275 990.1609 [37,] 0.276 989.6915 [38,] 0.277 989.1916 [39,] 0.278 988.6610 [40,] 0.279 988.0999 [41,] 0.280 987.5081 > pars [1] 0.26415 0.01100 # We narrow the range of trial values for mu: > x <- seq(.259,.261,.0001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars) > plot(x,y) > plot(x,y,type='l') # Now the function peaks at mu=.2602, so we take that as our # new maximum likelihood estimate for mu: > cbind(x,y) x y [1,] 0.2590 993.5094 [2,] 0.2591 993.5128 [3,] 0.2592 993.5159 [4,] 0.2593 993.5187 [5,] 0.2594 993.5211 [6,] 0.2595 993.5233 [7,] 0.2596 993.5252 [8,] 0.2597 993.5267 [9,] 0.2598 993.5280 [10,] 0.2599 993.5289 [11,] 0.2600 993.5296 [12,] 0.2601 993.5299 [13,] 0.2602 993.5300 [14,] 0.2603 993.5297 [15,] 0.2604 993.5291 [16,] 0.2605 993.5282 [17,] 0.2606 993.5270 [18,] 0.2607 993.5255 [19,] 0.2608 993.5237 [20,] 0.2609 993.5216 [21,] 0.2610 993.5192 > pars [1] 0.26415 0.01100 > pars[1] <- .2602 > pars [1] 0.2602 0.0110 # But now the variance component might change again: > x <- seq(.010,.012,.0001) > y <- NULL > for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars) > plot(x,y,type='l') > cbind(x,y) x y [1,] 0.0100 993.2709 [2,] 0.0101 993.3288 [3,] 0.0102 993.3792 [4,] 0.0103 993.4223 [5,] 0.0104 993.4581 [6,] 0.0105 993.4870 [7,] 0.0106 993.5089 [8,] 0.0107 993.5240 [9,] 0.0108 993.5325 [10,] 0.0109 993.5344 [11,] 0.0110 993.5300 [12,] 0.0111 993.5192 [13,] 0.0112 993.5023 [14,] 0.0113 993.4793 [15,] 0.0114 993.4504 [16,] 0.0115 993.4157 [17,] 0.0116 993.3752 [18,] 0.0117 993.3292 [19,] 0.0118 993.2776 [20,] 0.0119 993.2206 [21,] 0.0120 993.1583 # Our new value for vc is .0109: > pars[2] <- .0109 > pars [1] 0.2602 0.0109 # We could keep doing that until nothing changed, but it would # become very tedious (and actually wouldn't work for some problems), # so instead, we use an optimizer. The one I have chosen, nlminb(), # is actually a minimizer rather than a maximizer, so I create a # function that is the NEGATIVE of the log-likelihood. (This is sloppy # programming: I assumed that Z and V are present as variables called # Z and V). > neglike <- function(pars) { + mu <- pars[1] + vc <- pars[2] + return(sum( 1/2*log(V+vc) + 1/2*(Z-mu)^2/(V+vc))) + } # Note that the neglike() function is just -1 times the loglikelihood function: > loglikeMu function(Z, V, pars) { vc <- pars[2] mu <- x[i] return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) ) } # The optimizer tells us that the maximum likelihood estimates # for mu and vc are 0.26012031 and 0.01087881: > nlminb(pars, neglike) $par [1] 0.26012031 0.01087881 $objective [1] -993.5346 $convergence [1] 0 $iterations [1] 8 $evaluations function gradient 24 29 $message [1] "relative convergence (4)" Warning messages: 1: In log(V + vc) : NaNs produced 2: In nlminb(pars, neglike) : NA/NaN function evaluation 3: In log(V + vc) : NaNs produced 4: In nlminb(pars, neglike) : NA/NaN function evaluation # Those warning messages are just indicating that somewhere during # the course of iterating, the program tried the impossible task # of taking the log of a negative number. # The estimates round to the same values we had when we stopped our # manual iteration process: > pars [1] 0.2602 0.0109 # The following R code was automaticall generted when I used the "Packages" # menu. > utils:::menuInstallPkgs() --- Please select a CRAN mirror for use in this session --- Warning in install.packages(lib = .libPaths()[1L], dependencies = NA, type = type) : 'lib = "C:/Program Files/R/R-4.0.3/library"' is not writable trying URL 'https://ftp.osuosl.org/pub/cran/bin/windows/contrib/4.0/metafor_2.4-0.zip' Content type 'application/zip' length 2988491 bytes (2.9 MB) downloaded 2.9 MB package ‘metafor’ successfully unpacked and MD5 sums checked The downloaded binary packages are in C:\Users\jvevea\AppData\Local\Temp\RtmpARcEYQ\downloaded_packages # I now have the "metafor" package for performing meta-analysis in my R space. # In order to use it, I have to employ the library() command to load it: > library(metafor) Loading required package: Matrix Loading 'metafor' package (version 2.4-0). For an overview and introduction to the package please type: help(metafor). Warning message: package ‘metafor’ was built under R version 4.0.4 # Here are the values for mu and vc we got using the optimizer: > pars > [1] 0.26012031 0.01087881 # Here, we use metafor to get the same estimates: > rma(Z, V, method='ML') Random-Effects Model (k = 755; tau^2 estimator: ML) tau^2 (estimated amount of total heterogeneity): 0.0109 (SE = 0.0012) tau (square root of estimated tau^2 value): 0.1043 I^2 (total heterogeneity / total variability): 50.86% H^2 (total variability / sampling variability): 2.03 Test for Heterogeneity: Q(df = 754) = 1493.7347, p-val < .0001 Model Results: estimate se zval pval ci.lb ci.ub 0.2601 0.0057 45.6237 <.0001 0.2489 0.2713 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > # Note that the variance component (tau^2 in the output) and the # mean ("estimate" in the output) match what we got using the optimizer. # Metafor, behind the scenes, was doing the same thing that we did.