###################################################################### # Introduction to Probability in R ##################################################################### ##### A few reminders about R ##### # R is case-sensitive # Use # to start a comment line. # Command-enter to evaluate # It's not cool to use = for assignment. Use <- # R likes vectors. To create a fector, use "c(...)". For example: # nums<-c(2,3,67,11) # Like Mathematica, R is happiest when used as a functional language. # It too likes to operate on lists as a single entity whenver possible # The basic R is ok but to make R really useful you need to use packages. ###################################################################### ##### R makes it very easy to generate random values. ##### # For example, suppose we want to simulate flipping a coin a large number of times. # To do so, we can *sample* from the *list* c("H","T") a large number of times, # with replacement (so we get to use "H" and "T" over and #over again). N <- 100 flips <- sample( c("H","T"), N, replace=T) flips # Question: How many H and T did we get? # One way to find the answer: Use the "table" function. table(flips) # To find out more about the table function, use ?: ?table # You can get the max value from a table like this: max(table(flips)) # Let's find the proportion of H's: propH <- mean(flips=="H") propH # If we did this repeatedly, how would the proportion of H's vary? M <- 10000 # number of times to repeat vals <- rep(0,M) for(i in 1:M){ flips <- sample(c("H","T"), N, replace=T) vals[i] <- mean(flips == "H") } vals # We can plot a histogram easily. hist(vals, breaks = 100) min(vals) max(vals) #################################################################### ## This is an example of the *Central Limit Theorem* in action! ## #################################################################### # Here's another way to generate a histogram, this time using the more advanced ggplot package. # The ggplot commands work best with what R calls a *data frame.* library(ggplot2) var.df <- data.frame(p = vals) ggplot(var.df, aes(p)) + geom_histogram(color="blue", fill="white", binwidth=.01) + ggtitle("Proportion of H's in fair coin flip") ##### Rolling Dice ##### # The sample function can simulate rolling a die: sample(1:6, 1) sample(1:6, 1) sample(1:6, 1) # The sample function can also simulate rolling many dice: sample(1:6, 5, replace=T) # we can write our own function to make this a bit simpler to use: myDice <- function(n){ sample(1:6, n, replace=T) } myDice(8) # Suppose we want the sum of many dice: dice <- function(n=2){ vals <- myDice(n) sum(vals) } # The sum of two dice: dice() dice() dice() # The sum of ten dice: dice(10) dice(10) dice(10) # How about a distribution of the sum of two dice? M <- 10000 # number of repitions. vals <- rep(0,M) for(i in 1:M){ vals[i] <- dice() } hist(vals, main="Histogram of Totals of two dice") # Here's a slightly more illustrative plot using ggplot: vals.df <- data.frame(table(vals)) ggplot(vals.df) + geom_segment(aes(x=vals, xend=vals, y=0, yend=Freq)) + geom_point(aes(x=vals, y=Freq), size=7, color="red") + ggtitle("Histogram of totals of two dice")