ILLUSTRATING THE COMPUTATIONS ON PAGES 31-34 IN NOTES USING KISHI DATA IN R. THE CAT COMMAND LIST THINGS WHICH CAN BE

A MIX OF TEXT AND VARIABLES

HERE IS THE SCRIPT

data<-read.table("f:/s505/kishi.dat") #no names

attach(data)

kcal<-V1; ni<-V2; niq<-V3; nbal<-V4 # rename the variables

#multiple commmands on line

#separated by ;

# run the regression of nbal on ni and save some quantities

regout<-lm(nbal ~ni)

sum<-summary(regout)

coeff<-coefficients(regout) #coeff[1] = b0 and coeff[2] = b1

b0=coeff[2]

b1 = coeff[2]

covb<-vcov(regout) #this gives the variance-covariance

#matrix of the estimated coefficients

s2b0<-covb[1,1]

s2b1<-covb[2,2]

sb0b1<-covb[1,2]

seb0<-sqrt(s2b0) # get standard errors; e.g. seb0=s{b0}

seb1<-sqrt(s2b1)

sse<-deviance(regout) # this computes the sum of squared residuals

dfe<-df.residual(regout)

n<-dfe+2 # gets sample size. If missing values only cases with

# both x and y get used so this n accounts for that.

mse<-sse/dfe

cat(b0, b1, s2b0, s2b1,sb0b1, mse,"\n") # list things

# COMPUTING PREDICTION INTERVALS AND CONFIDENCE INTERVALS FOR THE

# INTAKE-BALANCE DATA DIRECTLY. THIS SHOWS HOW TO CALCULATE WITHIN THE

# DATA STEP IN R AND TAKE ADVANTAGE OF SOME BUILT IN FEATURES OF IT

# GET 95% CONFIDENCE INTERVAL AND PREDICTION INTERVAL AT INTAKE = 31.6

# WHICH IS THE INTAKE FOR FIRST OBSERVATION IN THE SAMPLE.

alpha<-.05

Xh<-31.6

tval = qt(1- (alpha/2),dfe) # t value with alpha/2 to the right

yhat = b0 + b1*Xh

seyhat = sqrt(s2b0 + (Xh**2)*s2b1 + 2*Xh*sb0b1)

l95m= yhat - tval*seyhat

u95m = yhat + tval*seyhat

sepred = sqrt(seyhat**2 + mse)

l95i = yhat - tval*sepred

u95i = yhat + tval*sepred

cat("intervals at X = ", Xh,"\n")

cat("confidence interval",yhat,seyhat,l95m,u95m,"\n")

cat("prediction interval",yhat,sepred,l95i,u95i,"\n")

cat(" Simultaneous 95% ci's on beta0 and beta 1","\n")

tval2 = qt(1-(alpha/4),dfe)

lbeta0s=b0-(tval2*seb0)

ubeta0s=b0+(tval2*seb0)

lbeta1s=b1-(tval2*seb1)

ubeta1s=b1+(tval2*seb1)

cat(lbeta0s,ubeta0s,lbeta1s,ubeta1s,"\n")

cat("SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE

FOR BALANCE AT intake = 30,60 and 80","\n")

cat("using Bonferroni","\n")

tbon = qt(1-(alpha/2*3),dfe)

yhat30 = b0 + b1*30

se30 = sqrt(s2b0 + (30**2)*s2b1 + 2*30*sb0b1)

l30m = yhat30 - tbon*se30

u30m = yhat30 + tbon*se30

yhat60 = b0 + b1*60

se60 = sqrt(s2b0 + (60**2)*s2b1 + 2*60*sb0b1)

l60m = yhat60 - tbon*se60

u60m = yhat60 + tbon*se60

yhat80 = b0 + b1*80

se80 = sqrt(s2b0 + (80**2)*s2b1 + 2*80*sb0b1)

l80m = yhat80 - tbon*se80

u80m = yhat80 + tbon*se80

cat(l30m, u30m, l60m, u60m, l80m, u80m,"\n")

cat("USING SCHEFFE","\n")

f= qf(1-alpha,2,dfe)

mult = sqrt(2*f)

l30ms = yhat30 - mult*se30

u30ms = yhat30 + mult*se30

l60ms = yhat60 - mult*se60

u60ms = yhat60 + mult*se60

l80ms = yhat80 - mult*se80

u80ms = yhat80 + mult*se80

cat(l30ms, u30ms, l60ms, u60ms, l80ms, u80ms,"\n")

WHAT COMES OUT OF CONSOLE WINDOW IF RUN INTERACTIVELY

> data<-read.table("f:/s505/kishi.dat") #no names

> attach(data)

> kcal<-V1; ni<-V2; niq<-V3; nbal<-V4 # rename the variables

> #multiple commmands on line separated by ;

> # run the regression of nbal on ni and save some quantities

> regout<-lm(nbal ~ni)

> sum<-summary(regout)

> coeff<-coefficients(regout) #coeff[1] = b0 and coeff[2] = b1

> b0=coeff[2]

> b1 = coeff[2]

> covb<-vcov(regout) #this gives the variance-covariance

#matrix of the estimated coefficients

> s2b0<-covb[1,1]

> s2b1<-covb[2,2]

> sb0b1<-covb[1,2]

> seb0<-sqrt(s2b0) # get standard errors; e.g. seb0=s{b0}

> seb1<-sqrt(s2b1)

> sse<-deviance(regout) # this computes the sum of squared residuals

> dfe<-df.residual(regout)

> n<-dfe+2 # gets sample size. If missing values only cases with

> # both x and y get used so this n accounts for that.

> mse<-sse/dfe

> cat(b0, b1, s2b0, s2b1,sb0b1, mse,"\n") # list things

0.4161642 0.4161642 3.293353 0.0008880204 -0.05106117 11.0774

> # COMPUTING PREDICTION INTERVALS AND CONFIDENCE INTERVALS FOR THE

> # INTAKE-BALANCE DATA DIRECTLY. THIS SHOWS HOW TO CALCULATE WITHIN THE

> # DATA STEP IN R AND TAKE ADVANTAGE OF SOME BUILT IN FEATURES OF IT

>

> # GET 95% CONFIDENCE INTERVAL AND PREDICTION INTERVAL AT INTAKE = 31.6

> # WHICH IS THE INTAKE FOR FIRST OBSERVATION IN THE SAMPLE.

> alpha<-.05

> Xh<-31.6

> tval = qt(1- (alpha/2),dfe) # t value with alpha/2 to the right

> yhat = b0 + b1*Xh

> seyhat = sqrt(s2b0 + (Xh**2)*s2b1 + 2*Xh*sb0b1)

> l95m= yhat - tval*seyhat

> u95m = yhat + tval*seyhat

> sepred = sqrt(seyhat**2 + mse)

> l95i = yhat - tval*sepred

> u95i = yhat + tval*sepred

> cat("intervals at X = ", Xh,"\n")

intervals at X = 31.6

> cat("confidence interval",yhat,seyhat,l95m,u95m,"\n")

confidence interval 13.56695 0.9762317 11.57033 15.56357

> cat("prediction interval",yhat,sepred,l95i,u95i,"\n")

prediction interval 13.56695 3.46849 6.473092 20.66081

>

> cat(" Simultaneous 95% ci's on beta0 and beta 1","\n")

Simultaneous 95% ci's on beta0 and beta 1

> tval2 = qt(1-(alpha/4),dfe)

> lbeta0s=b0-(tval2*seb0)

> ubeta0s=b0+(tval2*seb0)

> lbeta1s=b1-(tval2*seb1)

> ubeta1s=b1+(tval2*seb1)

> cat(lbeta0s,ubeta0s,lbeta1s,ubeta1s,"\n")

-3.873648 4.705977 0.3457223 0.486606

>

> cat("SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE

+ FOR BALANCE AT intake = 30,60 and 80","\n")

SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE

FOR BALANCE AT intake = 30,60 and 80

>

> cat("using Bonferroni","\n")

using Bonferroni

>

> tbon = qt(1-(alpha/2*3),dfe)

> yhat30 = b0 + b1*30

> se30 = sqrt(s2b0 + (30**2)*s2b1 + 2*30*sb0b1)

> l30m = yhat30 - tbon*se30

> u30m = yhat30 + tbon*se30

> yhat60 = b0 + b1*60

> se60 = sqrt(s2b0 + (60**2)*s2b1 + 2*60*sb0b1)

> l60m = yhat60 - tbon*se60

> u60m = yhat60 + tbon*se60

> yhat80 = b0 + b1*80

> se80 = sqrt(s2b0 + (80**2)*s2b1 + 2*80*sb0b1)

> l80m = yhat80 - tbon*se80

> u80m = yhat80 + tbon*se80

> cat(l30m, u30m, l60m, u60m, l80m, u80m,"\n")

11.40117 14.40101 24.49524 26.27679 32.38102 35.03758

>

> cat("USING SCHEFFE","\n")

USING SCHEFFE

>

> f= qf(1-alpha,2,dfe)

> mult = sqrt(2*f)

> l30ms = yhat30 - mult*se30

> u30ms = yhat30 + mult*se30

> l60ms = yhat60 - mult*se60

> u60ms = yhat60 + mult*se60

> l80ms = yhat80 - mult*se80

> u80ms = yhat80 + mult*se80

> cat(l30ms, u30ms, l60ms, u60ms, l80ms, u80ms,"\n")

10.28429 15.51789 23.83195 26.94008 31.39194 36.02665

IF THE TEXT FILE WITH THE COMMANDS IS IN f:/s505/r/kishi.R and YOU USE

source("f:/s505/r/kishi.R")

HERE IS WHAT YOU GET IN THE CONSOLE. THERE ARE WAYS TO

LABEL THE OUTPUT WITH THE VARIABLE NAMES THAT I’M NOT SHOWING YOU NOW.

------

0.4161642 0.4161642 3.293353 0.0008880204 -0.05106117 11.0774

intervals at X = 31.6

confidence interval 13.56695 0.9762317 11.57033 15.56357

prediction interval 13.56695 3.46849 6.473092 20.66081

Simultaneous 95% ci's on beta0 and beta 1

-3.873648 4.705977 0.3457223 0.486606

SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE

FOR BALANCE AT intake = 30,60 and 80

using Bonferroni

11.40117 14.40101 24.49524 26.27679 32.38102 35.03758

USING SCHEFFE

10.28429 15.51789 23.83195 26.94008 31.39194 36.02665