# We have previously seen that R's probability functions that
# start with 'r' randomly sample from the distribution. For example,
# here we sample three draws from a uniform distribution over the
# interval (0, 1):

> runif(3)
[1] 0.1632170 0.7090183 0.8799919

# And here we take three draws from a standard normal distribution
# (which is to say, a normal distribution with a mean of 0 and a
# standard deviation of 1):

> rnorm(3)
[1] -1.3311472  0.5296140 -0.8114408

# These functions also have a form with a 'p' prefix. Such functions
# give the probability of drawing a value less than the specified value.
# For example, the probability that a single draw from a uniform (0,1)
# distribution falls below 1/2 is 1/2:

> punif(1/2)
[1] 0.5

# The probability that a single draw from a standard normal distribution
# falls below 0 is 1/2:

> pnorm(0)
[1] 0.5

# In our fifth Powerpoint slide today, we were interested in the probability
# that an IQ drawn from a normal distribution with mean=100 and standard
# deviation=12 falls above 136. The pnorm() function can give us the probability
# that it falls BELOW 136:

> pnorm(136, 100, 12)
[1] 0.9986501

# So the probability that it falls ABOVE 136 is one minus that probability:

> 1 - pnorm(136, 100, 12)
[1] 0.001349898

# In the Powerpoint, we saw that the probability of one randomly observed
# IQ falling above 136 of a second IQ falling above 136 was .0027. Here,
# we verify the result using the complex form of the addition rule:

> (1 - pnorm(136, 100, 12)) + (1 - pnorm(136, 100, 12)) - (1 - pnorm(136, 100, 12))^2
[1] 0.002697974
 
# But we can also easily get the same answer by simulating two IQs:

> iq1 <- rnorm(100000, 100, 12)
> iq2 <- rnorm(100000, 100, 12)
> mean(iq1>136)
[1] 0.00133

# In class, I accidentally used the simulation to combine the events
# with the AND concept rather than the OR, getting an answer that was
# too small:

> mean(iq1>136 & iq2>136)
[1] 1e-05

# Here's what I should have done. Note that the answer is near our analytical
# result of .0027:

> mean(iq1>136 | iq2>136)
[1] 0.00252

 
 
# Sometimes simulation can be easier than doing a problem analytically. For
# example, if we want the probability that a roll of two dice gives a total of 7,
# we need to know that there are 6 ways we can get that result: 6,1 1,6 5,2 2,5 4,3 3,4.
# Each of those occurs with probability 1/6*1/6 = 1/36, so the addition rule for
# these mutually exclusive outcomes gives the answer 1/6:

> 1/36 + 1/36 + 1/36 + 1/36 + 1/36 + 1/36
[1] 0.1666667

# But it's much easier to get that answer by simulating two rolls and
# simply seeing how often they sum to 7:

> roll1 <- sample(1:6, 1000000, replace=TRUE)
> roll2 <- sample(1:6, 1000000, replace=TRUE)
> mean( roll1 + roll2 == 7)
[1] 0.166217

# Figuring out the probability of rolling a 7 or an 11 is also easy:

> mean( roll1 + roll2 == 7 | roll1 + roll2 == 11)
[1] 0.22173
 
 
# We can also use simulation to learn about the properties of
# probability distributions. We consider the chi-square distribution,
# which is characterized by a single parameter, the degrees of freedom.

# I give myself a place to track the degrees of freedom that I use...

> df <- NULL

# ...as well as the mean, variance, standard deviation, pskew, and mskew
# of a simulated large sample from the distribution:

> mn <- NULL
> v <- NULL
> s <- NULL
> ps <- NULL
> ms <- NULL
> df
NULL

# That will make it easy to create a table that helps me see the pattern
# later on.

# We start with one degree of freedom:

> df[1] <- 1
> df
[1] 1
> junk <- rchisq(1000000, 1)
> mean(junk)
[1] 1.001402
> mn[1] <- mean(junk)
> var(junk)
[1] 2.009905
> v[1] <- var(junk)
> s[1] <- sd(junk); sd(junk)
[1] 1.417711

# Visually, the distribution is strongly positively skewed:

> hist(junk)

# And that is reflected in our skew statistics:

> ps[1] <- pskew(junk); pskew(junk)
[1] 1.154306
> ms[1] <- mskew(junk); mskew(junk)
[1] 2.849668

# Now we consider 2 df:

> df[2] <- 2
> df
[1] 1 2
> junk <- rchisq(1000000, 2)
> mn[2] <- mean(junk); mean(junk)
[1] 2.003956
> v[2] <- var(junk); var(junk)
[1] 4.018177
> s[2] <- sd(junk); sd(junk)
[1] 2.004539
> ps[2] <- pskew(junk); pskew(junk)
[1] 0.9190081
> ms[2] <- mskew(junk); mskew(junk)
[1] 2.001402
> hist(junk)

# Here's 3 df:

> df[3] <- 3
> df
[1] 1 2 3
> junk <- rchisq(1000000, 3)
> mn[3] <- mean(junk); mean(junk)
[1] 2.998769
> v[3] <- var(junk); var(junk)
[1] 5.981074
> s[3] <- sd(junk); sd(junk)
[1] 2.445623
> ps[3] <- pskew(junk); pskew(junk) 
[1] 0.7752378
> ms[3] <- mskew(junk); mskew(junk) 
[1] 1.623649
> hist(junk)

# 5 df:

> df[4] <- 5
> junk<-rchisq(1000000, 5)
> mn[4] <- mean(junk); mean(junk)
[1] 5.00189
> v[4] <- var(junk); var(junk)
[1] 10.00091
> s[4] <- sd(junk); sd(junk)
[1] 3.162421
> ps[4] <- pskew(junk); pskew(junk)
[1] 0.6161249
> ms[4] <- mskew(junk); mskew(junk)
[1] 1.25947
> hist(junk)

# 10 df:

> df[5] <- 10
> junk <- rchisq(1000000, 10)
> mn[5] <- mean(junk); mean(junk)
[1] 9.990885
> v[5] <- var(junk); var(junk)
[1] 19.91036
> s[5] <- sd(junk); sd(junk)
[1] 4.462103
> ps[5] <- pskew(junk); pskew(junk)
[1] 0.4405679
> ms[5] <- mskew(junk); mskew(junk)
[1] 0.8863015
> hist(junk)

# And finally, we try 100 degrees of freedom:

> df[6] <- 100
> junk <- rchisq(1000000, 100)
> mn[6] <- mean(junk); mean(junk)
[1] 100.0109
> v[6] <- var(junk); var(junk)
[1] 200.0317
> s[6] <- sd(junk); sd(junk)
[1] 14.14326
> ps[6] <- pskew(junk); pskew(junk)
[1] 0.1405981
> ms[6] <- mskew(junk); mskew(junk)
[1] 0.2848502
> hist(junk)

# Here's a table of our results:

> cbind(df, mn, v, s, ps, ms)
      df         mn          v         s        ps        ms
[1,]   1   1.001402   2.009905  1.417711 1.1543061 2.8496679
[2,]   2   2.003956   4.018177  2.004539 0.9190081 2.0014024
[3,]   3   2.998769   5.981074  2.445623 0.7752378 1.6236488
[4,]   5   5.001890  10.000907  3.162421 0.6161249 1.2594695
[5,]  10   9.990885  19.910361  4.462103 0.4405679 0.8863015
[6,] 100 100.010941 200.031693 14.143256 0.1405981 0.2848502
 
# We can see that the mean of a chi-square distribution is the same
# as its degrees of freedom. The variance is twice the degrees of
# freedom. Note that if we had tracked standard deviation instead
# of variance, it would be hard to see the pattern; we don't instantly
# recognize that 4.462 is roughly the square root of 20. We can also
# see that the skew decreases as the df increases.

# We illustrated using Bayes theorem using information about Covid rapid
# tests. For the rapid antigen test, the sensitivity (i.e., the probability
# of a positive test if you have the disease) is...

> sens <- .562

# The specificity (the probability of a negative test if you DON'T
# have the disease) is...

> spec <- .995

# In Merced County, the current base rate for the disease is...

> base <- .099
 
# We can use Bayes theorem to tell us what we really want to know,
# which is the probability that I have Covid if I have tested positive:

> sens*base / (sens*base + (1-spec)*(1-base))
[1] 0.9250952

# Note that this result is very sensitive to the base rate. If we
# suppose that the prevalance of the disease is only 3%, the probability
# that I have Covid goes down quite a bit:

> base <- .03
> sens*base / (sens*base + (1-spec)*(1-base))
[1] 0.7766006

# And it goes down even more if the base rate is still lower:

> base <- .01
> sens*base / (sens*base + (1-spec)*(1-base))
[1] 0.5316935

# That's important to understand, as many medical diagnostic tests are for
# rare conditions, so that even with a positive test, the probability of
# having the condition is quite low.

# Another test, the rapid molecular test, has higher sensitivity but
# slightly lower specificity:

> sens <- .952
> spec <- .989
> base <- .099

# Counterintuitively, the probability of having Covid given a positive
# result on this test is slightly lower than for the other test:

> sens*base / (sens*base + (1-spec)*(1-base))
[1] 0.9048474

# Again, this is very sensitive to base rate:

> base <- .01
> sens*base / (sens*base + (1-spec)*(1-base))
[1] 0.466438
> base <- .099
> sens*base / (sens*base + (1-spec)*(1-base))
[1] 0.9048474
>