# Here, we calculate the probability that Giants' player # Dickersin actually had Covid, given his positive test: > sens <- .99 > spec <- .95 > base <- .02 > sens * base / (sens*base + (1-spec)*(1-base)) [1] 0.2877907 # If we toss a fair coin 10 times, the probability of getting # 0 heads is: > (1/2)^10 [1] 0.0009765625 # There are 10 ways we can get ecactly 1 head. Since those ways # are mutually exclusive, we can apply the addition rule (which, # here, amounts to multiplying the probability of a sequence of # 10 tosses by the 10 ways we could get a single head): > 10*(1/2)^10 [1] 0.009765625 # But there are lots of ways we could get 2 heads out of 10 tosses: > > # HHTTTTTTTT > # HTHTTTTTTT > # HTTHTTTTTT > # HTTTHTTTTT > # HTTTTHTTTT > # HTTTTTHTTT > # HTTTTTTHTT > # HTTTTTTTHT > # HTTTTTTTTH > # THHTTTTTTT > # THTHTTTTTT > # THTTHTTTTT > # THTTTHTTTT > # THTTTTHTTT > # THTTTTTHTT > # THTTTTTTHT > # THTTTTTTTH > # TTHHTTTTTT > # TTHTHTTTTT > # TTHTTHTTTT > # TTHTTTHTTT > # TTHTTTTHTT > # TTHTTTTTHT > # TTHTTTTTTH > # TTTHHTTTTT > # TTTHTHTTTT > # TTTHTTHTTT > # TTTHTTTHTT > # TTTHTTTTHT > # TTTHTTTTTH > # TTTTHHTTTT > # TTTTHTHTTT > # TTTTHTTHTT > # TTTTHTTTHT > # TTTTHTTTTH > # TTTTTHHTTT > # TTTTTHTHTT > # TTTTTHTTHT > # TTTTTHTTTH > # TTTTTTHHTT > # TTTTTTHTHT > # TTTTTTHTTH > # TTTTTTTHHT > # TTTTTTTHTH > # TTTTTTTTHH # That's 45 ways. You can imagine how long we would need # to go on to enumerate all the possible ways we would get # 3 heads out of 10 tosses. We need a more efficient way # of counting the possible ways. # Fortunately, there's a combinatorics formula known as # "choose" that gives us the answer. "N choose k" tells # us how many ways we can get k successes out of N trials. # The formula is n!/(k!(n-k)!), where "!" means "factorial." # The factorial of a number is the number times the number - 1 # times the number - 2, all the way down to 1. For example, # 10 factorial is 10*9*8*7*6*5*4*3*2*1: > > factorial(10) [1] 3628800 # So here we find, as before, that we can get 2 heads out of # 10 tosses in 45 different ways: > factorial(10) / (factorial(2)*factorial(8)) [1] 45 # Each of those ways occurs with probability (1/2)^10, so # the probability of 2 heads out of 10 tosses of a fair coin is > 45 * (1/2)^10 [1] 0.04394531 # We don't have to work out the full "choose" formula... > factorial(10) / (factorial(2)*factorial(8)) [1] 45 # ...because R has a function that does it for us: > choose(10, 2) [1] 45 # Here, then, is the probability of getting 3 heads: > (1/2)^10 * choose(10,3) [1] 0.1171875 > choose(10,3) [1] 120 # Or 4 heads: > (1/2)^10 * choose(10,4) [1] 0.2050781 > choose(10,4) [1] 210 > choose(10,5) [1] 252 # 5 heads: > (1/2)^10 * choose(10,5) [1] 0.2460938 # 6 heads: > (1/2)^10 * choose(10,6) [1] 0.2050781 # And an even more efficient way to get these probability is # to use R's "dbinom" function. The syntax is dbinom(number of successes, # number of trials, probability of success on one trial): > dbinom(0, 10, 1/2) [1] 0.0009765625 > dbinom(1, 10, 1/2) [1] 0.009765625 > dbinom(2, 10, 1/2) [1] 0.04394531 > dbinom(5, 10, 1/2) [1] 0.2460938 # And we can apply that function to a vector of possible outcomes: > outcomes <- 0:10 > dbinom(outcomes, 10, 1/2) [1] 0.0009765625 0.0097656250 0.0439453125 0.1171875000 0.2050781250 [6] 0.2460937500 0.2050781250 0.1171875000 0.0439453125 0.0097656250 [11] 0.0009765625 # We can use that information to make a table of the probability distribution: > cbind(outcomes, dbinom(outcomes, 10, 1/2)) outcomes [1,] 0 0.0009765625 [2,] 1 0.0097656250 [3,] 2 0.0439453125 [4,] 3 0.1171875000 [5,] 4 0.2050781250 [6,] 5 0.2460937500 [7,] 6 0.2050781250 [8,] 7 0.1171875000 [9,] 8 0.0439453125 [10,] 9 0.0097656250 [11,] 10 0.0009765625 # Note that the probability of gett 8 or more heads in 10 # tosses is a little bit over .05: > sum(dbinom(8:10, 10, 1/2)) [1] 0.0546875 # Here, "successes" denote the probability associated with # each of the possible "outcomes." But if we were using the # coin toss experiment to estimate the true probability of # heads, each "outcome" would lead to an estimated probability # of success ("probs") equal to the outcome divided by 10. # So if we consider the first two columns of the following # table, it's a probability distribution: possible outcomes, # associate probabilities. But if we consider the third and # second columns, it's a sampling distribution: possible value # of a statistic, associated probability. > successes <- dbinom(outcomes, 10, 1/2) > probs <- outcomes/10 > cbind(outcomes, successes, probs) outcomes successes probs [1,] 0 0.0009765625 0.0 [2,] 1 0.0097656250 0.1 [3,] 2 0.0439453125 0.2 [4,] 3 0.1171875000 0.3 [5,] 4 0.2050781250 0.4 [6,] 5 0.2460937500 0.5 [7,] 6 0.2050781250 0.6 [8,] 7 0.1171875000 0.7 [9,] 8 0.0439453125 0.8 [10,] 9 0.0097656250 0.9 [11,] 10 0.0009765625 1.0 # The probability distribution of a statistic is called # a SAMPLING DISTRIBUTION.