# Today we worked on iterative solutions for maximum likelihood
# estimation when there are interdependent parameters involved
# in the likelihood. We illustrated the problem with an example
# of random-effects meta-analysis, in which our goal is to estimate
# the mean and variance of a population of true correlations.
# The sample data set we used was a large (N=755) sample of studies
# that correlated the General Aptitude Test Battery's General Ability
# scale with various job-related outcomes.

# Here's the data set. You'll notice that the "V" column is 1/(N-3)
# for each study. The "Z" column contains correlations that have
# been transformed so that their sampling variance IS 1/(N-3).

> gatb <- read.csv(file.choose())
> head(gatb)
  X  N     Z          V
1 1 38 0.321 0.02857143
2 2 31 0.192 0.03571429
3 3 32 0.266 0.03448276
4 4 27 0.255 0.04166667
5 5 23 0.288 0.05000000
6 6 47 0.161 0.02272727

# Here, we illustrate the idea that V is just 1/(N-3) by
# calculating that for the first case in the data set:

> 1/35
[1] 0.02857143
 
 
# Here's a function that implements the log likelihood:

> loglike <- function(Z, V, pars) {
+    mu <- pars[1]
+    vc <- pars[2]
+ 
+    return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) )
+ }
> attach(gatb)

# Using that log-likelihood function, we can see that the
# combination mu=.3, vc=.1...

> pars <- c(.3, .1)
> loglike(Z, V, pars)
[1] 720.0979

# ...is less likely than mu=.2, vc=.05:

> pars <- c(.2, .05)
> loglike(Z, V, pars)
[1] 847.9547
 
# We create a form of the log-likelihood that allows us
# to try different values for mu:

> loglikeMu <- function(Z, V, pars) {
+    vc <- pars[2]
+    mu <- x[i]
+    return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) )
+ }

# And here's a version that lets us try different values for
# the variance component:

> loglikeVC <- function(Z, V, pars) {
+    mu <- pars[1]
+    vc <- x[i]
+    return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) )
+ }
 
# We pick a pair of more-or-less arbitrary starting values:
 
> pars
[1] 0.20 0.05

# We create a range of mu's to consider, while keeping the
# vc=.05:

> x <- seq(.1,.3,.01)
> y <- NULL

# We calculate the log-likelihood for each trial value of mu:

> for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars)
> plot(x,y,type='l')

# We can see that the curve peaks somewhere near 0.26, so we
# try a narrower range of values in that neighborhood:

> x <- seq(.25,.27,.001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars)
> plot(x,y,type='l')

# We narrow it down further:

> x <- seq(.263,.265,.0001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars)
> plot(x,y,type='l')

# We can see that the likelihood peaks at mu=.2641 and
# mu=.2642, so we use .26415 as our estimated value of mu:

> length(x)
[1] 21
> cbind(x,y)
           x        y
 [1,] 0.2630 871.8065
 [2,] 0.2631 871.8078
 [3,] 0.2632 871.8090
 [4,] 0.2633 871.8101
 [5,] 0.2634 871.8111
 [6,] 0.2635 871.8119
 [7,] 0.2636 871.8126
 [8,] 0.2637 871.8133
 [9,] 0.2638 871.8138
[10,] 0.2639 871.8141
[11,] 0.2640 871.8144
[12,] 0.2641 871.8146
[13,] 0.2642 871.8146
[14,] 0.2643 871.8145
[15,] 0.2644 871.8143
[16,] 0.2645 871.8140
[17,] 0.2646 871.8136
[18,] 0.2647 871.8131
[19,] 0.2648 871.8124
[20,] 0.2649 871.8116
[21,] 0.2650 871.8107
> pars[1] <- .26415
> pars
[1] 0.26415 0.05000

# Now, we do the same thing for the vc, using the special
# log-likelihood function that lets us vary the vc:

> x <- seq(.01,.09,.001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars)
> plot(x,y,type='l')

# We narrow it down:

> x <- seq(.001,.019,.0001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars)
> plot(x,y,type='l')

# And narrow it down some more:

> x <- seq(.010,.013,.0001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars)
> plot(x,y,type='l')

# The log-likelihood peaks at vc=0.0110, so we use that
# as our new estimate for the variance component:

> length(x)
[1] 31
> cbind(x,y)
           x        y
 [1,] 0.0100 992.9724
 [2,] 0.0101 993.0363
 [3,] 0.0102 993.0925
 [4,] 0.0103 993.1413
 [5,] 0.0104 993.1829
 [6,] 0.0105 993.2172
 [7,] 0.0106 993.2446
 [8,] 0.0107 993.2650
 [9,] 0.0108 993.2787
[10,] 0.0109 993.2858
[11,] 0.0110 993.2864
[12,] 0.0111 993.2806
[13,] 0.0112 993.2686
[14,] 0.0113 993.2504
[15,] 0.0114 993.2262
[16,] 0.0115 993.1961
[17,] 0.0116 993.1602
[18,] 0.0117 993.1186
[19,] 0.0118 993.0714
[20,] 0.0119 993.0187
[21,] 0.0120 992.9606
[22,] 0.0121 992.8972
[23,] 0.0122 992.8286
[24,] 0.0123 992.7550
[25,] 0.0124 992.6763
[26,] 0.0125 992.5927
[27,] 0.0126 992.5042
[28,] 0.0127 992.4110
[29,] 0.0128 992.3131
[30,] 0.0129 992.2107
[31,] 0.0130 992.1037
> 
> pars[2] <- .011
> pars
[1] 0.26415 0.01100

# But mu and vc are interdependent, so now that we've changed
# the vc, our estimate for mu might change:

> x <- seq(.24,.28,.001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars)
> plot(x,y,type='l')
> length(x)
[1] 41
> cbind(x,y)
          x        y
 [1,] 0.240 987.3125
 [2,] 0.241 987.9140
 [3,] 0.242 988.4849
 [4,] 0.243 989.0253
 [5,] 0.244 989.5350
 [6,] 0.245 990.0141
 [7,] 0.246 990.4627
 [8,] 0.247 990.8806
 [9,] 0.248 991.2680
[10,] 0.249 991.6247
[11,] 0.250 991.9509
[12,] 0.251 992.2464
[13,] 0.252 992.5114
[14,] 0.253 992.7457
[15,] 0.254 992.9495
[16,] 0.255 993.1227
[17,] 0.256 993.2653
[18,] 0.257 993.3772
[19,] 0.258 993.4586
[20,] 0.259 993.5094
[21,] 0.260 993.5296
[22,] 0.261 993.5192
[23,] 0.262 993.4782
[24,] 0.263 993.4066
[25,] 0.264 993.3044
[26,] 0.265 993.1716
[27,] 0.266 993.0082
[28,] 0.267 992.8142
[29,] 0.268 992.5896
[30,] 0.269 992.3345
[31,] 0.270 992.0487
[32,] 0.271 991.7323
[33,] 0.272 991.3853
[34,] 0.273 991.0078
[35,] 0.274 990.5996
[36,] 0.275 990.1609
[37,] 0.276 989.6915
[38,] 0.277 989.1916
[39,] 0.278 988.6610
[40,] 0.279 988.0999
[41,] 0.280 987.5081
> pars
[1] 0.26415 0.01100

# We narrow the range of trial values for mu:

> x <- seq(.259,.261,.0001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeMu(Z,V,pars)
> plot(x,y)
> plot(x,y,type='l')

# Now the function peaks at mu=.2602, so we take that as our
# new maximum likelihood estimate for mu:

> cbind(x,y)
           x        y
 [1,] 0.2590 993.5094
 [2,] 0.2591 993.5128
 [3,] 0.2592 993.5159
 [4,] 0.2593 993.5187
 [5,] 0.2594 993.5211
 [6,] 0.2595 993.5233
 [7,] 0.2596 993.5252
 [8,] 0.2597 993.5267
 [9,] 0.2598 993.5280
[10,] 0.2599 993.5289
[11,] 0.2600 993.5296
[12,] 0.2601 993.5299
[13,] 0.2602 993.5300
[14,] 0.2603 993.5297
[15,] 0.2604 993.5291
[16,] 0.2605 993.5282
[17,] 0.2606 993.5270
[18,] 0.2607 993.5255
[19,] 0.2608 993.5237
[20,] 0.2609 993.5216
[21,] 0.2610 993.5192
> pars
[1] 0.26415 0.01100
> pars[1] <- .2602
> pars
[1] 0.2602 0.0110

# But now the variance component might change again:

> x <- seq(.010,.012,.0001)
> y <- NULL
> for(i in 1:length(x)) y[i] <- loglikeVC(Z,V,pars)
> plot(x,y,type='l')
> cbind(x,y)
           x        y
 [1,] 0.0100 993.2709
 [2,] 0.0101 993.3288
 [3,] 0.0102 993.3792
 [4,] 0.0103 993.4223
 [5,] 0.0104 993.4581
 [6,] 0.0105 993.4870
 [7,] 0.0106 993.5089
 [8,] 0.0107 993.5240
 [9,] 0.0108 993.5325
[10,] 0.0109 993.5344
[11,] 0.0110 993.5300
[12,] 0.0111 993.5192
[13,] 0.0112 993.5023
[14,] 0.0113 993.4793
[15,] 0.0114 993.4504
[16,] 0.0115 993.4157
[17,] 0.0116 993.3752
[18,] 0.0117 993.3292
[19,] 0.0118 993.2776
[20,] 0.0119 993.2206
[21,] 0.0120 993.1583

# Our new value for vc is .0109:

> pars[2] <- .0109
> pars
[1] 0.2602 0.0109
 
# We could keep doing that until nothing changed, but it would
# become very tedious (and actually wouldn't work for some problems),
# so instead, we use an optimizer. The one I have chosen, nlminb(),
# is actually a minimizer rather than a maximizer, so I create a
# function that is the NEGATIVE of the log-likelihood. (This is sloppy
# programming: I assumed that Z and V are present as variables called
# Z and V).
 
> neglike <- function(pars) {
+    mu <- pars[1]
+    vc <- pars[2]
+    return(sum( 1/2*log(V+vc) + 1/2*(Z-mu)^2/(V+vc)))
+ }

# Note that the neglike() function is just -1 times the loglikelihood function:

> loglikeMu
function(Z, V, pars) {
   vc <- pars[2]
   mu <- x[i]
   return( sum( -1/2*log(V+vc) - 1/2*(Z-mu)^2/(V+vc) ) )
}
<bytecode: 0x00000000050bb5b8>
 
# The optimizer tells us that the maximum likelihood estimates
# for mu and vc are 0.26012031 and 0.01087881:

> nlminb(pars, neglike)
$par
[1] 0.26012031 0.01087881

$objective
[1] -993.5346

$convergence
[1] 0

$iterations
[1] 8

$evaluations
function gradient 
      24       29 

$message
[1] "relative convergence (4)"

Warning messages:
1: In log(V + vc) : NaNs produced
2: In nlminb(pars, neglike) : NA/NaN function evaluation
3: In log(V + vc) : NaNs produced
4: In nlminb(pars, neglike) : NA/NaN function evaluation

# Those warning messages are just indicating that somewhere during
# the course of iterating, the program tried the impossible task
# of taking the log of a negative number. 

# The estimates round to the same values we had when we stopped our
# manual iteration process:

> pars
[1] 0.2602 0.0109

# The following R code was automaticall generted when I used the "Packages"
# menu.

> utils:::menuInstallPkgs()
--- Please select a CRAN mirror for use in this session ---
Warning in install.packages(lib = .libPaths()[1L], dependencies = NA, type = type) :
  'lib = "C:/Program Files/R/R-4.0.3/library"' is not writable
trying URL 'https://ftp.osuosl.org/pub/cran/bin/windows/contrib/4.0/metafor_2.4-0.zip'
Content type 'application/zip' length 2988491 bytes (2.9 MB)
downloaded 2.9 MB

package ‘metafor’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
        C:\Users\jvevea\AppData\Local\Temp\RtmpARcEYQ\downloaded_packages

# I now have the "metafor" package for performing meta-analysis in my R space.
# In order to use it, I have to employ the library() command to load it:

> library(metafor)
Loading required package: Matrix
Loading 'metafor' package (version 2.4-0). For an overview 
and introduction to the package please type: help(metafor).
Warning message:
package ‘metafor’ was built under R version 4.0.4 

# Here are the values for mu and vc we got using the optimizer:

> pars
> [1] 0.26012031 0.01087881

# Here, we use metafor to get the same estimates:

> rma(Z, V, method='ML')

Random-Effects Model (k = 755; tau^2 estimator: ML)

tau^2 (estimated amount of total heterogeneity): 0.0109 (SE = 0.0012)
tau (square root of estimated tau^2 value):      0.1043
I^2 (total heterogeneity / total variability):   50.86%
H^2 (total variability / sampling variability):  2.03

Test for Heterogeneity:
Q(df = 754) = 1493.7347, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub 
  0.2601  0.0057  45.6237  <.0001  0.2489  0.2713  *** 

---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> 

# Note that the variance component (tau^2 in the output) and the
# mean ("estimate" in the output) match what we got using the optimizer.
# Metafor, behind the scenes, was doing the same thing that we did.