# It can be useful to replicate posted R sessions yourself. However,
# if you try to copy and paste, you will encounter problems:

> 
> > 3 * 3
Error: unexpected '>' in ">"
> [1] 9
Error: unexpected '[' in "["

# R interpreted the system prompt as a greater than symbol, and had no idea
# what to do with "> [1] 9". On a Windows machine, this issue can be overcome
# by using "paste commands only" from the edit menu:

> 3 * 3
[1] 9
 
# Here's a way to read a csv (comma separated values) file from a web source:

> Peabody <- read.csv("http://faculty.ucmerced.edu/jvevea/classes/202a/data/Peabody.csv")

# R has interpreted the line number as data and included it as a variable:

> Peabody
   Peabody
1       69
2       72
3       94
4       64
5       80
6       77
7       96
8       86
9       89
10      69
11      92
12      71
13      81
14      90
15      84
16      76
17     100
18      57
19      61
20      84
21      81
22      65
23      87
24      92
25      89
26      79
27      91
28      65
29      91
30      81
31      86
32      85
33      95
34      93
35      83
36      76
37      84
38      90
39      95
40      67
 
# We can use "$" to extract the variable that actually interests us:
 
> Peabody$Peabody
 [1]  69  72  94  64  80  77  96  86  89  69  92  71  81  90  84  76 100  57  61
[20]  84  81  65  87  92  89  79  91  65  91  81  86  85  95  93  83  76  84  90
[39]  95  67

# I don't want to have to keep doing that for this variable, so I overwrite the
# two-column Peabody object with just the part I want to keep:

> Peabody <- Peabody$Peabody
> Peabody
 [1]  69  72  94  64  80  77  96  86  89  69  92  71  81  90  84  76 100  57  61
[20]  84  81  65  87  92  89  79  91  65  91  81  86  85  95  93  83  76  84  90
[39]  95  67

# Let's see what's in our data space...

> ls()
[1] "MyVariable" "Peabody"   

# ...and get rid of something from last time that we don't care about:

> rm(MyVariable)
> ls()
[1] "Peabody"

# R's "length" function tells us how many observations a variable has:

> length(Peabody)
[1] 40

# We can sort the variable into ascending sequence using the "sort" function:

> sort(Peabody)
 [1]  57  61  64  65  65  67  69  69  71  72  76  76  77  79  80  81  81  81  83
[20]  84  84  84  85  86  86  87  89  89  90  90  91  91  92  92  93  94  95  95
[39]  96 100

# R does pretty good stem-and-leaf plots. This can be useful for making decisions
# about how to group the data.

> stem(Peabody)

  The decimal point is 1 digit(s) to the right of the |

   5 | 7
   6 | 14
   6 | 55799
   7 | 12
   7 | 6679
   8 | 01113444
   8 | 566799
   9 | 00112234
   9 | 556
  10 | 0

# The mean of a variable is its arithmetic average:

> sum(Peabody) / length(Peabody)
[1] 81.675
> mean(Peabody)
[1] 81.675

# The median is the unique centermost observation in a data set with
# an odd number of cases, or the mean of the two centermost observations
# in a data set with an even number of cases:

> median(Peabody)
[1] 84

# As we've seen before, the "mode" function doesn't give us the central tendency
# definition of the mode:

> mode(Peabody)
[1] "numeric"

# Stem-and-leaf plots are not useful for very large data sets:

> junk <- rnorm(10000)
> stem(junk)

  The decimal point is at the |

  -4 | 2
  -3 | 7
  -3 | 332211000000
  -2 | 99998888888777777777776666666666655555555555555555555
  -2 | 44444444444444444444443333333333333333333333333333222222222222222222+114
  -1 | 99999999999999999999999999999999999999999999999999999988888888888888+385
  -1 | 44444444444444444444444444444444444444444444444444444444444444444444+944
  -0 | 99999999999999999999999999999999999999999999999999999999999999999999+1512
  -0 | 44444444444444444444444444444444444444444444444444444444444444444444+1633
   0 | 00000000000000000000000000000000000000000000000000000000000000000000+1656
   0 | 55555555555555555555555555555555555555555555555555555555555555555555+1470
   1 | 00000000000000000000000000000000000000000000000000000000000000000000+874
   1 | 55555555555555555555555555555555555555555555555555555555555555555555+417
   2 | 00000000000000000000000000000000000000000000001111111111111111111111+76
   2 | 55555555555555555555666666677777777888899
   3 | 0001112233
   3 | 8

> rm(junk)
 
# R's "hist" function doesn't do a particularly good job:

> hist(Peabody)
> help(hist)
starting httpd help server ... done
> hist(Peabody,nclass=5)
> hist(Peabody,nclass=4)

# Here, we force R to put the bars where we want them. (See today's Powerpoint
# for an alternative to the "seq" command.)

> hist(Peabody,breaks=seq(54.5,104.5,5))

# R has put the X-axis labels in what are now particularly strange locations.
# We can correct this by suppressing the creation of axes...

> hist(Peabody, breaks=seq(54.5,104.5,5), axes=F)

# ...and drawing them in a way that makes more sense:

> axis(side=1,at=seq(57,102,5))
> axis(side=2,at=seq(0,8,2))
 
# I like to force an aspect ratio in which the plot field is about
# 2/3 as tall as it is wide. Here, "par" tells R that I want to set a
# parameter, and "pin" indicates that the parameter I want to set is plot inches.

> par(pin=c(6,4))
> hist(Peabody, breaks=seq(54.5,104.5,5), axes=F)

# This time, I add "pos=0" to place the axis label exactly at the bottom
# of the histobars:

> axis(side=1,at=seq(57,102,5), pos=0)
> axis(side=2,at=seq(0,8,2))

# Finally, I give the bars a horizontal line to sit on:

> abline(h=0)
>