# We have previously seen that R's probability functions that # start with 'r' randomly sample from the distribution. For example, # here we sample three draws from a uniform distribution over the # interval (0, 1): > runif(3) [1] 0.1632170 0.7090183 0.8799919 # And here we take three draws from a standard normal distribution # (which is to say, a normal distribution with a mean of 0 and a # standard deviation of 1): > rnorm(3) [1] -1.3311472 0.5296140 -0.8114408 # These functions also have a form with a 'p' prefix. Such functions # give the probability of drawing a value less than the specified value. # For example, the probability that a single draw from a uniform (0,1) # distribution falls below 1/2 is 1/2: > punif(1/2) [1] 0.5 # The probability that a single draw from a standard normal distribution # falls below 0 is 1/2: > pnorm(0) [1] 0.5 # In our fifth Powerpoint slide today, we were interested in the probability # that an IQ drawn from a normal distribution with mean=100 and standard # deviation=12 falls above 136. The pnorm() function can give us the probability # that it falls BELOW 136: > pnorm(136, 100, 12) [1] 0.9986501 # So the probability that it falls ABOVE 136 is one minus that probability: > 1 - pnorm(136, 100, 12) [1] 0.001349898 # In the Powerpoint, we saw that the probability of one randomly observed # IQ falling above 136 of a second IQ falling above 136 was .0027. Here, # we verify the result using the complex form of the addition rule: > (1 - pnorm(136, 100, 12)) + (1 - pnorm(136, 100, 12)) - (1 - pnorm(136, 100, 12))^2 [1] 0.002697974 # But we can also easily get the same answer by simulating two IQs: > iq1 <- rnorm(100000, 100, 12) > iq2 <- rnorm(100000, 100, 12) > mean(iq1>136) [1] 0.00133 # In class, I accidentally used the simulation to combine the events # with the AND concept rather than the OR, getting an answer that was # too small: > mean(iq1>136 & iq2>136) [1] 1e-05 # Here's what I should have done. Note that the answer is near our analytical # result of .0027: > mean(iq1>136 | iq2>136) [1] 0.00252 # Sometimes simulation can be easier than doing a problem analytically. For # example, if we want the probability that a roll of two dice gives a total of 7, # we need to know that there are 6 ways we can get that result: 6,1 1,6 5,2 2,5 4,3 3,4. # Each of those occurs with probability 1/6*1/6 = 1/36, so the addition rule for # these mutually exclusive outcomes gives the answer 1/6: > 1/36 + 1/36 + 1/36 + 1/36 + 1/36 + 1/36 [1] 0.1666667 # But it's much easier to get that answer by simulating two rolls and # simply seeing how often they sum to 7: > roll1 <- sample(1:6, 1000000, replace=TRUE) > roll2 <- sample(1:6, 1000000, replace=TRUE) > mean( roll1 + roll2 == 7) [1] 0.166217 # Figuring out the probability of rolling a 7 or an 11 is also easy: > mean( roll1 + roll2 == 7 | roll1 + roll2 == 11) [1] 0.22173 # We can also use simulation to learn about the properties of # probability distributions. We consider the chi-square distribution, # which is characterized by a single parameter, the degrees of freedom. # I give myself a place to track the degrees of freedom that I use... > df <- NULL # ...as well as the mean, variance, standard deviation, pskew, and mskew # of a simulated large sample from the distribution: > mn <- NULL > v <- NULL > s <- NULL > ps <- NULL > ms <- NULL > df NULL # That will make it easy to create a table that helps me see the pattern # later on. # We start with one degree of freedom: > df[1] <- 1 > df [1] 1 > junk <- rchisq(1000000, 1) > mean(junk) [1] 1.001402 > mn[1] <- mean(junk) > var(junk) [1] 2.009905 > v[1] <- var(junk) > s[1] <- sd(junk); sd(junk) [1] 1.417711 # Visually, the distribution is strongly positively skewed: > hist(junk) # And that is reflected in our skew statistics: > ps[1] <- pskew(junk); pskew(junk) [1] 1.154306 > ms[1] <- mskew(junk); mskew(junk) [1] 2.849668 # Now we consider 2 df: > df[2] <- 2 > df [1] 1 2 > junk <- rchisq(1000000, 2) > mn[2] <- mean(junk); mean(junk) [1] 2.003956 > v[2] <- var(junk); var(junk) [1] 4.018177 > s[2] <- sd(junk); sd(junk) [1] 2.004539 > ps[2] <- pskew(junk); pskew(junk) [1] 0.9190081 > ms[2] <- mskew(junk); mskew(junk) [1] 2.001402 > hist(junk) # Here's 3 df: > df[3] <- 3 > df [1] 1 2 3 > junk <- rchisq(1000000, 3) > mn[3] <- mean(junk); mean(junk) [1] 2.998769 > v[3] <- var(junk); var(junk) [1] 5.981074 > s[3] <- sd(junk); sd(junk) [1] 2.445623 > ps[3] <- pskew(junk); pskew(junk) [1] 0.7752378 > ms[3] <- mskew(junk); mskew(junk) [1] 1.623649 > hist(junk) # 5 df: > df[4] <- 5 > junk<-rchisq(1000000, 5) > mn[4] <- mean(junk); mean(junk) [1] 5.00189 > v[4] <- var(junk); var(junk) [1] 10.00091 > s[4] <- sd(junk); sd(junk) [1] 3.162421 > ps[4] <- pskew(junk); pskew(junk) [1] 0.6161249 > ms[4] <- mskew(junk); mskew(junk) [1] 1.25947 > hist(junk) # 10 df: > df[5] <- 10 > junk <- rchisq(1000000, 10) > mn[5] <- mean(junk); mean(junk) [1] 9.990885 > v[5] <- var(junk); var(junk) [1] 19.91036 > s[5] <- sd(junk); sd(junk) [1] 4.462103 > ps[5] <- pskew(junk); pskew(junk) [1] 0.4405679 > ms[5] <- mskew(junk); mskew(junk) [1] 0.8863015 > hist(junk) # And finally, we try 100 degrees of freedom: > df[6] <- 100 > junk <- rchisq(1000000, 100) > mn[6] <- mean(junk); mean(junk) [1] 100.0109 > v[6] <- var(junk); var(junk) [1] 200.0317 > s[6] <- sd(junk); sd(junk) [1] 14.14326 > ps[6] <- pskew(junk); pskew(junk) [1] 0.1405981 > ms[6] <- mskew(junk); mskew(junk) [1] 0.2848502 > hist(junk) # Here's a table of our results: > cbind(df, mn, v, s, ps, ms) df mn v s ps ms [1,] 1 1.001402 2.009905 1.417711 1.1543061 2.8496679 [2,] 2 2.003956 4.018177 2.004539 0.9190081 2.0014024 [3,] 3 2.998769 5.981074 2.445623 0.7752378 1.6236488 [4,] 5 5.001890 10.000907 3.162421 0.6161249 1.2594695 [5,] 10 9.990885 19.910361 4.462103 0.4405679 0.8863015 [6,] 100 100.010941 200.031693 14.143256 0.1405981 0.2848502 # We can see that the mean of a chi-square distribution is the same # as its degrees of freedom. The variance is twice the degrees of # freedom. Note that if we had tracked standard deviation instead # of variance, it would be hard to see the pattern; we don't instantly # recognize that 4.462 is roughly the square root of 20. We can also # see that the skew decreases as the df increases. # We illustrated using Bayes theorem using information about Covid rapid # tests. For the rapid antigen test, the sensitivity (i.e., the probability # of a positive test if you have the disease) is... > sens <- .562 # The specificity (the probability of a negative test if you DON'T # have the disease) is... > spec <- .995 # In Merced County, the current base rate for the disease is... > base <- .099 # We can use Bayes theorem to tell us what we really want to know, # which is the probability that I have Covid if I have tested positive: > sens*base / (sens*base + (1-spec)*(1-base)) [1] 0.9250952 # Note that this result is very sensitive to the base rate. If we # suppose that the prevalance of the disease is only 3%, the probability # that I have Covid goes down quite a bit: > base <- .03 > sens*base / (sens*base + (1-spec)*(1-base)) [1] 0.7766006 # And it goes down even more if the base rate is still lower: > base <- .01 > sens*base / (sens*base + (1-spec)*(1-base)) [1] 0.5316935 # That's important to understand, as many medical diagnostic tests are for # rare conditions, so that even with a positive test, the probability of # having the condition is quite low. # Another test, the rapid molecular test, has higher sensitivity but # slightly lower specificity: > sens <- .952 > spec <- .989 > base <- .099 # Counterintuitively, the probability of having Covid given a positive # result on this test is slightly lower than for the other test: > sens*base / (sens*base + (1-spec)*(1-base)) [1] 0.9048474 # Again, this is very sensitive to base rate: > base <- .01 > sens*base / (sens*base + (1-spec)*(1-base)) [1] 0.466438 > base <- .099 > sens*base / (sens*base + (1-spec)*(1-base)) [1] 0.9048474 >