# R demonstrations for level one statistics # # Copyright (C) 2002, P.S.P. Cowpertwait # You may use freely, modify, and distribute # under the GNU public licence. # # To use: # 1) Download this file to a directory # on your computer # 2) Start R # 3) From within R, type the command: # > source ('dirname/demos.R') # where `dirname' is the name of the directory # you downloaded this file to. # 4) To run a function type the name # of the function with parameters # in parenthesis (as in lecture demos) # Lecture 2 and 3 # Function to return a plot of the Normal distribution # for a given mean and sd. # For example, to get the plot of a # Normal RV with mean 10 and sd 5 use: # plot.norm(mean = 10, sd = 5) plot.norm <- function (mean = 0, sd = 1, nsd=3) { x <- seq(mean-nsd*sd, mean+nsd*sd, len=10000) plot(x, dnorm(x, mean, sd), typ='l', lty='solid', col='red', xlab='Normal variate', ylab='Normal density') } # plot the Binomial distribution plot.binom <- function (n = 10, p = 0.5) { barplot(dbinom(0:n, size = n, prob = p), col = 'lightblue', xlab = 'x', ylab='probability', names.arg = as.character(0:n)) } # Compare two Normal distributions # N(m1, s1) and N(m2, s2) side-by-side # on same plot comp.norm <- function (m1 = 0, s1 = 1, m2 = 0, s2 = 1) { x1 <- seq(m1 - 3 * s1, m1 + 3 * s1, len=1000) x2 <- seq(m2 - 3 * s2, m2 + 3 * s2, len=1000) y1 <- dnorm(x1, m1, s1) y2 <- dnorm(x2, m2, s2) plot(x1, dnorm(x1, m1, s1), xlab='x', ylab='density', typ='l', col='red', , ylim = range(y1, y2), xlim = range(x1, x2)) points(x2, dnorm(x2, m2, s2), typ='l', col='blue') } # The function below is used to show that # a Normal distribution N(np, np(1-p)) # approximates a Binomial B(n,p) distribution # (when np > 10). # Random variables can be generated or `simulated' # by prefixing the distribution name with `r', # e.g. rnorm(100) simulates 100 N(0,1) RVs # and rbinom(100, size=10, p=1/3) simulates # 100 B(10, 1/3) RVs binom.hist <- function(n = 100, p = 0.5, replicate = 100) { # simulate B(n,p) RVs x <- rbinom(replicate, size=n, prob=p) # create x values for the plot xvalues <- seq(min(x), max(x), len=1000) # plot `prob' histogram (fr = F gives `prob') hist(x, fr=F, col='lightblue') # add Normal density curve points ((xvalues-0.5), dnorm(xvalues, mean=mean(x), sd=sd(x)), type='l', lty=2, col='darkblue') } # Lectures 4 and 5 # Function to show the sample mean is Normal # when the population is Normal plot.rnorm.mean <- function(popmean = 0, popsd = 1, n = 20, repl=1000) { # generate a random sample of size n*repl # from an N(popmean, popsd) distribution # and put result in a `n' by `repl' matrix x x <- matrix( rnorm(n*repl, mean=popmean, sd=popsd), nr = n ) # calculate the sample mean of the columns: x.mean <- apply( x, 2, mean) xvalues <- seq(min(x.mean), max(x.mean), len=1000) # plot `prob' histogram old.par <- par(no.readonly = TRUE) par(col.sub = 'blue') par(col.main='red', cex.main=0.9, col.sub = 'darkgreen', cex.sub=1.4, col.lab = 'blue', cex.lab=1.4) hist.values <- hist(x.mean, fr=F, plot=F) y.max <- max(c(hist.values$density, dnorm(xvalues, mean=popmean, sd=popsd/sqrt(n)))) hist(x.mean, fr=F, col='lightblue', main="", ylim = c(0, y.max), xlab='Sample Means ') title(main=expression(paste("Normal Population: Distribution of Sample Mean", " ", bar(X), " ~ N(", mu,", ", frac(sigma, sqrt(n)), ")")), cex.main=1.5) title( sub=paste('(Random sample of size', as.character(n), 'taken from a Normal population with mean', as.character(popmean), 'and sd', as.character(popsd),')')) # add Normal density curve points (xvalues, dnorm(xvalues, mean=popmean, sd=popsd/sqrt(n)), type='l', lty=5, col='red') par(old.par) list(mean = mean(x.mean), sd=sd(x.mean) ) } # Examples showing that a linear combination # of Normal RVs is a Normal RV # e.g. if X ~ N(15, 10) and Y ~ N(20, 10) # then L = 4X - 3Y ~ N(0, 50) # # R commands (type them at the R prompt:>): # generate a random sample of 1000 values for X and Y: # x <- rnorm(1000, mean=15, sd=10) # y <- rnorm(1000, mean=20, sd=10) # qqnorm(x); qqline(x) # qqnorm(y); qqline(y) # hist(x) # hist(y) # L <- 4 * x - 3 * y # hist(L) # qqnorm(L); qqline(L) # L is a Normal RV # mean(L); sd(L) # the mean and sd are not # exactly 0 and 50 due # to L being a random variable # # Repeat for a larger sample # to get a mean and sd closer # to underlying values of 0 and 50: # x <- rnorm(1000000, mean=15, sd=10) # y <- rnorm(1000000, mean=20, sd=10) # L <- 4 * x - 3 * y # mean(L); sd(L) # Function to show the sample mean is approximately Normal # when the population is not Normal # (the exponential distribution is used for the underlying population) plot.rexp.mean <- function(popmean = 1, n = 20, repl=1000) { # generate a random sample of size n*repl # from an exp(popmean, popsd) distribution # and put result in a `n' by `repl' matrix x x <- matrix( rexp(n*repl, rate=1/popmean), nr = n ) popsd <- popmean # calculate the sample mean of the columns: x.mean <- apply( x, 2, mean) xvalues <- seq(min(x.mean), max(x.mean), len=1000) # plot `prob' histogram old.par <- par(no.readonly=TRUE) par(col.main='red', cex.main=0.9, col.sub = 'darkgreen', cex.sub=1.4, col.lab = 'blue', cex.lab=1.4) hist.values <- hist(x.mean, fr=F, plot=F) y.max <- max(c(hist.values$density, dnorm(xvalues, mean=popmean, sd=popsd/sqrt(n)))) hist(x.mean, fr=F, col='lightblue', main="", ylim = c(0, y.max), xlab='Sample Means') title(main=expression(paste("Central Limit Theorem: Sample Mean ", bar(X) %->% "N(", mu, " , ", frac(sigma, sqrt(n)), ")", " as n " %->% infinity)), cex.main=1.5) title( sub=paste('(Random sample of size', as.character(n), 'taken from a Non-Normal population with mean', as.character(popmean),')')) # add Normal density curve points (xvalues, dnorm(xvalues, mean=popmean, sd=popsd/sqrt(n)), type='l', lty=2, col='red') par(old.par) list(mean = mean(x.mean), sd=sd(x.mean) ) } # Lecture 6 # plot normal population plot.norm.pop <- function (popmean = 0, popsd = 1) { write (file='', "Use add.rmean() to add random sample and sample mean") mu <<- popmean; sigma <<- popsd x <- seq(mu - 3 * sigma, mu + 3 * sigma, len=1000) plot(x, dnorm(x, mean=mu, sd=sigma), typ='l', lty = 1, col='red', xlab='Normal variate', ylab='Normal density') segments(mu,0, mu, dnorm(mu, mean=mu, sd=sigma), lty=2, col='red') text(mu, dnorm(mu, mean=mu, sd=sigma)/2, expression(mu), pos=2, col='red', cex=2) } # add mean of random sample to plot add.rmean <- function (n = 20) { write (file='', "Use add.ci() to add a confidence interval") oldpar <- par(no.readonly=T) par(cex.sub = 1.2, col.sub = 'blue') samplesize <<- n rsamp <- rnorm(n, mean=mu, sd=sigma) rmean <<- mean(rsamp) points (rmean,0, col='blue', cex=1.5, pch=21, bg='darkblue') points (rsamp, rep(0, n), cex=0.8, col='blue', pch='|') text(rmean, 0, expression(bar(x)), col='blue', pos=3, cex=2) title( sub = paste('(Random sample of size ', n, ' and sample mean)') ) par(oldpar) } # add confidence interval to plot add.ci <- function(p=0.95) { oldpar <- par(no.readonly=T) par(lwd=2) marginerr <<- abs( qnorm( (1-p)/2 )) * sigma/sqrt(samplesize) segments(rmean, 0, rmean+marginerr, 0, col='darkblue') segments(rmean, 0, rmean-marginerr, 0, col='darkblue') par(oldpar) } # Lecture 7: Hypothesis tests # add point in hypothesis test # and return p-value of one-sided test # xbar is observed sample mean # popmean is the population mean (hypothesised value) plot.htest <- function(popmean, popsd, n, xbar, nsd=3) { plot.norm(popmean, popsd/sqrt(n), nsd=nsd) title(main=expression(paste('Under ', H[0], ', ', bar(X), " ~ N(", mu,", ", frac(sigma, sqrt(n)), ")")), cex.main=1.5) segments(popmean,0, popmean, dnorm(popmean, mean=popmean, sd=popsd/sqrt(n)), lty=2, col='red') text(popmean, dnorm(popmean, mean=popmean, sd=popsd/sqrt(n))/2, expression(mu), pos=2, col='red', cex=2) segments(xbar, -10, xbar, dnorm(xbar, mean=popmean, sd=popsd/sqrt(n)), col='blue') xpos <- ifelse(xbar < popmean, 4, 2) ppos <- ifelse(xbar < popmean, 2, 4) pvalue <- ifelse(xbar < popmean, pnorm(xbar, mean=popmean, sd=popsd/sqrt(n)), 1-pnorm(xbar, mean=popmean, sd=popsd/sqrt(n))) pvalue <- round(pvalue, 5) text(xbar, 0, expression(bar(x)), pos=xpos, col='darkblue', cex=1) text(xbar, 0, paste('p = ', pvalue), pos=ppos, col='darkblue', cex=1) list(pvalue = pvalue) } # Lecture 8: t-tests when population sd is unknown # A plot comparing the t-distribution # to the Normal distribution # nsd is just the `width' of the plot plot.tdist <- function(n = 10, nsd = 4) { write (file='', "Use add.critical.points() to add a critical point") # the no. of degrees of freedom df <<- n - 1 x <- seq(-nsd, nsd, len=10000) # plot normal curve plot(x, dnorm(x), typ='l', lty='dashed', col='blue', xlab='Normal variate (blue); t variate (red)', ylab='density') # plot t-dist points (x, dt(x, df), type = 'l', col = 'red') # add title title (main = paste('Plot of Normal and t-distributions for sample size', n)) } # add critical points to the above plot add.critical.points <- function(p = 0.95, twosided = TRUE) { # critical points: z <- ifelse(twosided, abs( qnorm( (1-p)/2 ) ), abs( qnorm( 1-p ) )) t <- ifelse(twosided, abs( qt( (1-p)/2, df ) ), abs( qt( 1-p, df ) )) # add lines if (twosided == T) { segments (-z, -10, -z, dnorm(z), col='blue') segments (-t, -10, -t, dt(t, df), col='red') } segments (z, -10, z, dnorm(z), col='blue') segments (t, -10, t, dt(t, df), col='red') # add labels if (twosided == T) { text (-z, 0, expression(-z^{"*"}), pos=4, col='blue') text (-t, 0, expression(-t^{"*"}), pos=2, col='red') } text (z, 0, expression(z^{"*"}), pos=2, col='blue') text (t, 0, expression(t^{"*"}), pos=4, col='red') list(z = z, t = t) } # Lecture 9-10 # data used in lectures read.engine.data <- function() c(224.120, 224.001, 224.017, 223.982, 223.989, 223.961, 223.960, 224.089, 223.987, 223.976, 223.902, 223.980, 224.098, 224.057, 223.913, 223.999) read.tuition.data <- function() { x1 <<- c(24,43,58,71,43,49,61,44,67) x2 <<- c(46,10,17,60,53,42,37) } read.spanish.data <- function() { x <<- c(30,28,31,26,20,30,34,15,28,20,30,29,31,29,34,20,26,25,31,29) y <<- c(29,30,32,30,16,25,31,18,33,25,32,28,34,32,32,27,28,29,32,32) } # Some commands used in the lecture # First read `spanish' data in using `read.spanish.data()' # Then try out the following # (R commands are preceded by `>' - remove the comments) #> t.test(y,x, alt="greater", paired=T) # # Paired t-test #data: y and x #t = 2.0244, df = 19, p-value = 0.02861 #alternative hypothesis: true difference in means is greater than 0 #95 percent confidence interval: # 0.2114938 Inf #sample estimates: #mean of the differences # 1.45 # plot data #> plot(x,y) # Fit regression (linear) model #> xy.lm <- lm(y ~ x) # Add fitted line #> abline(reg=xy.lm, col='blue') # identify some `unusual' values # (click mouse near values of interest) #> identify(x,y, col='red') #[1] 5 6 # To get regression output use: #> summary(xy.lm) #Call: #lm(formula = y ~ x) # #Residuals: # Min 1Q Median 3Q Max #-7.3378 -1.7259 0.6189 1.9896 3.7310 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 8.5100 3.7977 2.241 0.0379 * #x 0.7414 0.1369 5.415 3.82e-05 *** #--- #Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 # #Residual standard error: 3.006 on 18 degrees of freedom #Multiple R-Squared: 0.6196, Adjusted R-squared: 0.5985 #F-statistic: 29.32 on 1 and 18 DF, p-value: 3.815e-05 # 95% confidence interval for slope #> ci <- c( 0.7414 + qt(0.025, df=18) * 0.1369, # 0.7414 - qt(0.025, df=18) * 0.1369) #> ci #[1] 0.4537838 1.0290162 # the CI for the slope does not contain zero # therefore there is statistical evidence that # the slope is positive and a linear relationship # exists between pre- and post-summer school test scores # Plot t dist with test statistic and 5% critical point # (shows where the test value lies in the distribution) plot.t <- function(t, df = 10, onesided = T, nsd = 5) { seg.len <- 50 seg.a <- a.seg <- ifelse (onesided, qt(.95, df), qt(.975, df)) b.seg <- nsd oldpar <- par(no.readonly = T) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) x <- seq(-nsd,nsd, len=10000) y <- dt(x, df=df) y.lim <- c(-0.3*max(y), max(y)) par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) plot(x, y, typ='l', xlab="", ylab="", ylim = y.lim) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) abline(h = 0) segments(a.seg, 0, a.seg, dt(a.seg, df=df)) x.seg <- seq(a.seg, b.seg, len=seg.len) for (i in 1:seg.len) segments(x.seg[i], 0, x.seg[i], dt(x.seg[i], df), lty = 'dotted', col='red') text(a.seg, 0, "t*", pos = 1, cex = 2, col='red') text(abs(t), 0, "t", pos = 1, cex = 2, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) points(abs(t), 0, cex = 1.5, pch = 4, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) { if (onesided) { maintitle <- "5% Critical Point t* for One-Sided t-Test and Test Statistic t" plabel <- "Probability p = 0.05" } else { maintitle <- "5% Critical Point t* for Two-Sided t-Test and Test Statistic t" plabel <- "Probability p = 0.025" } } text(1.9*seg.a, dt(seg.a, df=df), plabel, pos = 3, cex = 1.2, col='red') title (main = maintitle, cex = 1.2) segments(1.5*seg.a, dt(seg.a, df=df), 1.2*seg.a, 0.5*dt(1.2*seg.a,df=df)) par(oldpar) if (onesided) { t.pvalue <- 1 - pt( abs(t), df ) t.star <- qt(.95, df) } else { t.pvalue <- 2*(1 - pt( abs(t), df )) t.star <- qt(.975, df) } list( t.star = t.star, t.ratio = t, pvalue = t.pvalue) } # Lecture 11 # Plot Normal dist with z-test statistic and 5% critical point # for testing proportions # p.0 is hypothesied value # p.sample is sample estimate # n is sample size plot.z <- function(p.sample, p.0, n, onesided = F, nsd = 5) { z <- abs ( p.sample - p.0 ) / sqrt ( p.0 * (1 - p.0) / n ) seg.len <- 50 seg.a <- a.seg <- ifelse (onesided, qnorm(.95), qnorm(.975)) b.seg <- nsd oldpar <- par(no.readonly = T) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) x <- seq(-nsd,nsd, len=10000) y <- dnorm(x) y.lim <- c(-0.3*max(y), max(y)) par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) plot(x, y, typ='l', xlab="", ylab="", ylim = y.lim) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) abline(h = 0) segments(a.seg, 0, a.seg, dnorm(a.seg)) x.seg <- seq(a.seg, b.seg, len=seg.len) for (i in 1:seg.len) segments(x.seg[i], 0, x.seg[i], dnorm(x.seg[i]), lty = 'dotted', col='red') text(a.seg, 0, "z*", pos = 1, cex = 2, col='red') text(abs(z), 0, "z", pos = 1, cex = 2, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) points(abs(z), 0, cex = 1.5, pch = 4, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) { if (onesided) { maintitle <- "5% Critical Point z* for one-sided z-test for proportion" plabel <- "Probability = 0.05" } else { maintitle <- "5% Critical Point z* for two-sided z-test for proportion" plabel <- "Probability = 0.025" } } text(1.9*seg.a, dnorm(seg.a), plabel, pos = 3, cex = 1.2, col='red') title (main = maintitle, cex = 1.2) segments(1.5*seg.a, dnorm(seg.a), 1.2*seg.a, 0.5*dnorm(1.2*seg.a)) par(oldpar) if (onesided) { z.pvalue <- 1 - pnorm( abs(z)) z.star <- qnorm(.95) } else { z.pvalue <- 2*(1 - pnorm( abs(z))) z.star <- qnorm(.975) } list( z.star = z.star, z.stat = z, pvalue = z.pvalue) } # Plot Normal dist with z-test statistic and 5% critical point # for testing difference in two proportions plot.z.diff <- function(p1, p2, n1, n2, onesided = F, nsd = 5) { z <- abs ( p1 - p2 ) / sqrt ( p1*(1-p1)/n1 + p2*(1-p2)/n2 ) seg.len <- 50 seg.a <- a.seg <- ifelse (onesided, qnorm(.95), qnorm(.975)) b.seg <- nsd oldpar <- par(no.readonly = T) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) x <- seq(-nsd,nsd, len=10000) y <- dnorm(x) y.lim <- c(-0.3*max(y), max(y)) par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) plot(x, y, typ='l', xlab="", ylab="", ylim = y.lim) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) abline(h = 0) segments(a.seg, 0, a.seg, dnorm(a.seg)) x.seg <- seq(a.seg, b.seg, len=seg.len) for (i in 1:seg.len) segments(x.seg[i], 0, x.seg[i], dnorm(x.seg[i]), lty = 'dotted', col='red') text(a.seg, 0, "z*", pos = 1, cex = 2, col='red') text(abs(z), 0, "z", pos = 1, cex = 2, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) points(abs(z), 0, cex = 1.5, pch = 4, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) { if (onesided) { maintitle <- "5% Critical Point z* for one-sided z-test for difference in proportions" plabel <- "Probability = 0.05" } else { maintitle <- "5% Critical Point z* for two-sided z-test for difference in proportions" plabel <- "Probability = 0.025" } } text(1.9*seg.a, dnorm(seg.a), plabel, pos = 3, cex = 1.2, col='red') title (main = maintitle, cex = 1.2) segments(1.5*seg.a, dnorm(seg.a), 1.2*seg.a, 0.5*dnorm(1.2*seg.a)) par(oldpar) if (onesided) { z.pvalue <- 1 - pnorm( abs(z)) z.star <- qnorm(.95) } else { z.pvalue <- 2*(1 - pnorm( abs(z))) z.star <- qnorm(.975) } list( z.star = z.star, z.stat = z, pvalue = z.pvalue) } # The function below is used to show that # a Normal distribution N(p, sqrt(p(1-p)/n)) # approximates a sample proportion for large n # (under central limit theorem) prop.hist <- function(n = 100, p = 0.5, replicate = 1000) { # simulate B(n,p) RVs divided by n x <- rbinom(replicate, size=n, prob=p)/n # create x values for the plot xvalues <- seq(min(x), max(x), len=1000) #y.max <- max(dnorm(xvalues, dnorm(xvalues, mean=p, sd=sqrt(p*(1-p)/n)))) y.his <- hist(x, fr=F, plot=F) y.max <- 1.2*max(y.his$density) # plot `prob' histogram (fr = F gives `prob') hist(x, fr=F, col='lightblue', xlab='', main='', ylim=c(0,y.max)) title(main=expression(paste("Sample proportion ", hat(p) %->% "N(", p, " , ", sqrt(frac(p*(1-p), n)), ")", " as n " %->% infinity)), cex.main=1.5) title( sub=expression(hat(p)), cex.sub=1.5) # add Normal density curve points (xvalues, dnorm(xvalues, mean=p, sd=sqrt(p*(1-p)/n)), type='l', lty=2, col='darkblue') } # Lecture 12 # Chi-squared test and 5% critical point plot.chisq <- function(chisq = -1, df = 10) { seg.len <- 50 seg.a <- qchisq(.95, df) seg.b <- max(chisq, qchisq(.99999, df)) oldpar <- par(no.readonly = T) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) x <- seq(1, seg.b, len=10000) y <- dchisq(x, df=df) x <- seq(1e-5, seg.b, len=10000) y.lim <- c(-0.3*max(y), max(y)) par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) plot(x, y, typ='l', xlab="", ylab="", ylim = y.lim) par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) abline(h = 0) abline(v = 0) segments(seg.a, 0, seg.a, dchisq(seg.a, df=df)) x.seg <- seq(seg.a, seg.b, len=seg.len) for (i in 1:seg.len) segments(x.seg[i], 0, x.seg[i], dchisq(x.seg[i], df), lty = 'dotted', col='red') text(seg.a, 0, expression(paste((chi^2),"*")), pos = 1, cex = 2, col='red') if (chisq >= 0) text(chisq, 0, expression(chi^2), pos = 1, cex = 2, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=2) if (chisq >= 0) points(chisq, 0, cex = 1.5, pch = 4, col='darkblue') par (yaxt = "n", bty = "n", xaxt = "n", lwd=1) maintitle <- expression(paste("Chi-Squared: 5% Critical Point (", chi^2, ")* and test statistic ", chi^2)) plabel <- "5%" text(1.1*seg.a, 1.1*dchisq(seg.a, df=df), plabel, pos = 3, cex = 1.2, col='red') title (main = maintitle, cex = 1.2) segments(1.05*seg.a, 0.25*dchisq(seg.a, df=df), 1.1*seg.a, dchisq(seg.a,df=df)) par(oldpar) chisq.pvalue <- 1 - pchisq( chisq, df ) chisq.star <- qchisq(.95, df) list( chisq.star = chisq.star, pvalue = chisq.pvalue) } # Lecture 13-14: Regression # Plot x-y values with fitted regression line and # confidence interval for slope # with added prediction interval plot.reg <- function(x, y, p = .95, x.future = NULL) { # save par settings oldpar <- par(no.readonly = T) # make some changes to graphics params: par(cex.main = 1.5, col.main='blue', cex.sub = 1.5, col.sub='blue', cex.lab=1.5) # load library for CI function library(MASS) # put data into data frame xy.dat <<- data.frame(x=x, y=y) # fit regression model xy.lm <<- lm (y ~ x, data=xy.dat) # get predicted value for x.future x.new <<- c(x.future,x) x.l <<- sort.list(x.new) x.pos <- sort.list(x.l != 1)[1] x.new <<- data.frame(x = sort(x.new)) if (!is.null(x.future)) { xy.pred <<- as.data.frame(predict(xy.lm, newdata = x.new, level=p, interval="prediction", type="response")) ylim <- range( c(xy.dat$y, xy.pred$upr, xy.pred$lwr) ) xlim <- range( c(x.future, xy.dat$x) ) } else { xlim <- range( xy.dat$x ) ylim <- range( xy.dat$y ) } # plot data plot(x, y, xlab='x', ylab='y', xlim = xlim, ylim = ylim) # add fitted line to plot abline (reg = xy.lm, col = 'blue') # add predicted value (if required) if (!is.null(x.future)) { points(x.future, xy.pred$fit[x.pos], pch=8, col='red') segments(x.future, xy.pred$fit[x.pos], x.future, xy.pred$lwr[x.pos], col='red') segments(x.future, xy.pred$fit[x.pos], x.future, xy.pred$upr[x.pos], col='red') points(x.new$x, xy.pred$lwr, type='l', col='red', lty='dashed') points(x.new$x, xy.pred$upr, type='l', col='red', lty='dashed') } # extract coefficients a <- signif(coef(xy.lm)[1], 3) b <- signif(coef(xy.lm)[2], 3) # get CIs ci <- confint(xy.lm, level=p) # ci percent rounded ci.per <- 100*p # CI for slope ci.b <- ci[2,] # add title with fitted line # and CI for slope title(main = paste('Regression of y on x with fitted line: y = ', a, ' + ', b, 'x','\n', ci.per, '% CI for slope: (', signif(ci.b[1], 3), ', ', signif(ci.b[2], 3), ')', sep='')) # add subtitle with CI for slope if (!is.null(x.future)) { par(cex.main = 1.5, col.main='blue', cex.sub = 1.5, col.sub='red', cex.lab=1.5) title(sub = paste("Predicted y = ", signif(xy.pred$fit[x.pos], 3), "; ", ci.per, '% CI: (', signif(xy.pred$lwr[x.pos], 3), ', ', signif(xy.pred$upr[x.pos], 3), ')', sep='')) } # return summary information if (!is.null(x.future)) { write(file='', paste("Predicted value = ", signif(xy.pred$fit[x.pos], 5), "\n", sep='')) write(file='', paste(100*p, "% CI lower bound = ", signif(xy.pred$lwr[x.pos], 5), "\n", sep='')) write(file='', paste(100*p, "% CI upper bound = ", signif(xy.pred$upr[x.pos], 5), "\n", sep='')) } summary(xy.lm) } read.manatees <- function() { x <<- c(447,460,481,498,513,512,526,559,585,614,645,675,711,719) y <<- c( 13, 21, 24, 16, 24, 20, 15, 34, 33, 33, 39, 43, 50, 47) } read.hr <- function() { x <<- c( 94, 96, 95, 95, 94, 95, 94, 104, 104, 106, 108, 110, 113, 113, 118, 115, 121, 127, 131 ) y <<- c(.473,.753,.929,.939,.832,.983,1.049,1.178,1.176,1.292,1.403,1.499,1.529,1.599,1.749, 1.746,1.897,2.040,2.231) } ###### end of lecture demos #######