ILLUSTRATING THE COMPUTATIONS ON PAGES 31-34 IN NOTES USING KISHI DATA IN R. THE CAT COMMAND LIST THINGS WHICH CAN BE
A MIX OF TEXT AND VARIABLES
HERE IS THE SCRIPT
data<-read.table("f:/s505/kishi.dat") #no names
attach(data)
kcal<-V1; ni<-V2; niq<-V3; nbal<-V4 # rename the variables
#multiple commmands on line
#separated by ;
# run the regression of nbal on ni and save some quantities
regout<-lm(nbal ~ni)
sum<-summary(regout)
coeff<-coefficients(regout) #coeff[1] = b0 and coeff[2] = b1
b0=coeff[2]
b1 = coeff[2]
covb<-vcov(regout) #this gives the variance-covariance
#matrix of the estimated coefficients
s2b0<-covb[1,1]
s2b1<-covb[2,2]
sb0b1<-covb[1,2]
seb0<-sqrt(s2b0) # get standard errors; e.g. seb0=s{b0}
seb1<-sqrt(s2b1)
sse<-deviance(regout) # this computes the sum of squared residuals
dfe<-df.residual(regout)
n<-dfe+2 # gets sample size. If missing values only cases with
# both x and y get used so this n accounts for that.
mse<-sse/dfe
cat(b0, b1, s2b0, s2b1,sb0b1, mse,"\n") # list things
# COMPUTING PREDICTION INTERVALS AND CONFIDENCE INTERVALS FOR THE
# INTAKE-BALANCE DATA DIRECTLY. THIS SHOWS HOW TO CALCULATE WITHIN THE
# DATA STEP IN R AND TAKE ADVANTAGE OF SOME BUILT IN FEATURES OF IT
# GET 95% CONFIDENCE INTERVAL AND PREDICTION INTERVAL AT INTAKE = 31.6
# WHICH IS THE INTAKE FOR FIRST OBSERVATION IN THE SAMPLE.
alpha<-.05
Xh<-31.6
tval = qt(1- (alpha/2),dfe) # t value with alpha/2 to the right
yhat = b0 + b1*Xh
seyhat = sqrt(s2b0 + (Xh**2)*s2b1 + 2*Xh*sb0b1)
l95m= yhat - tval*seyhat
u95m = yhat + tval*seyhat
sepred = sqrt(seyhat**2 + mse)
l95i = yhat - tval*sepred
u95i = yhat + tval*sepred
cat("intervals at X = ", Xh,"\n")
cat("confidence interval",yhat,seyhat,l95m,u95m,"\n")
cat("prediction interval",yhat,sepred,l95i,u95i,"\n")
cat(" Simultaneous 95% ci's on beta0 and beta 1","\n")
tval2 = qt(1-(alpha/4),dfe)
lbeta0s=b0-(tval2*seb0)
ubeta0s=b0+(tval2*seb0)
lbeta1s=b1-(tval2*seb1)
ubeta1s=b1+(tval2*seb1)
cat(lbeta0s,ubeta0s,lbeta1s,ubeta1s,"\n")
cat("SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE
FOR BALANCE AT intake = 30,60 and 80","\n")
cat("using Bonferroni","\n")
tbon = qt(1-(alpha/2*3),dfe)
yhat30 = b0 + b1*30
se30 = sqrt(s2b0 + (30**2)*s2b1 + 2*30*sb0b1)
l30m = yhat30 - tbon*se30
u30m = yhat30 + tbon*se30
yhat60 = b0 + b1*60
se60 = sqrt(s2b0 + (60**2)*s2b1 + 2*60*sb0b1)
l60m = yhat60 - tbon*se60
u60m = yhat60 + tbon*se60
yhat80 = b0 + b1*80
se80 = sqrt(s2b0 + (80**2)*s2b1 + 2*80*sb0b1)
l80m = yhat80 - tbon*se80
u80m = yhat80 + tbon*se80
cat(l30m, u30m, l60m, u60m, l80m, u80m,"\n")
cat("USING SCHEFFE","\n")
f= qf(1-alpha,2,dfe)
mult = sqrt(2*f)
l30ms = yhat30 - mult*se30
u30ms = yhat30 + mult*se30
l60ms = yhat60 - mult*se60
u60ms = yhat60 + mult*se60
l80ms = yhat80 - mult*se80
u80ms = yhat80 + mult*se80
cat(l30ms, u30ms, l60ms, u60ms, l80ms, u80ms,"\n")
WHAT COMES OUT OF CONSOLE WINDOW IF RUN INTERACTIVELY
> data<-read.table("f:/s505/kishi.dat") #no names
> attach(data)
> kcal<-V1; ni<-V2; niq<-V3; nbal<-V4 # rename the variables
> #multiple commmands on line separated by ;
> # run the regression of nbal on ni and save some quantities
> regout<-lm(nbal ~ni)
> sum<-summary(regout)
> coeff<-coefficients(regout) #coeff[1] = b0 and coeff[2] = b1
> b0=coeff[2]
> b1 = coeff[2]
> covb<-vcov(regout) #this gives the variance-covariance
#matrix of the estimated coefficients
> s2b0<-covb[1,1]
> s2b1<-covb[2,2]
> sb0b1<-covb[1,2]
> seb0<-sqrt(s2b0) # get standard errors; e.g. seb0=s{b0}
> seb1<-sqrt(s2b1)
> sse<-deviance(regout) # this computes the sum of squared residuals
> dfe<-df.residual(regout)
> n<-dfe+2 # gets sample size. If missing values only cases with
> # both x and y get used so this n accounts for that.
> mse<-sse/dfe
> cat(b0, b1, s2b0, s2b1,sb0b1, mse,"\n") # list things
0.4161642 0.4161642 3.293353 0.0008880204 -0.05106117 11.0774
> # COMPUTING PREDICTION INTERVALS AND CONFIDENCE INTERVALS FOR THE
> # INTAKE-BALANCE DATA DIRECTLY. THIS SHOWS HOW TO CALCULATE WITHIN THE
> # DATA STEP IN R AND TAKE ADVANTAGE OF SOME BUILT IN FEATURES OF IT
>
> # GET 95% CONFIDENCE INTERVAL AND PREDICTION INTERVAL AT INTAKE = 31.6
> # WHICH IS THE INTAKE FOR FIRST OBSERVATION IN THE SAMPLE.
> alpha<-.05
> Xh<-31.6
> tval = qt(1- (alpha/2),dfe) # t value with alpha/2 to the right
> yhat = b0 + b1*Xh
> seyhat = sqrt(s2b0 + (Xh**2)*s2b1 + 2*Xh*sb0b1)
> l95m= yhat - tval*seyhat
> u95m = yhat + tval*seyhat
> sepred = sqrt(seyhat**2 + mse)
> l95i = yhat - tval*sepred
> u95i = yhat + tval*sepred
> cat("intervals at X = ", Xh,"\n")
intervals at X = 31.6
> cat("confidence interval",yhat,seyhat,l95m,u95m,"\n")
confidence interval 13.56695 0.9762317 11.57033 15.56357
> cat("prediction interval",yhat,sepred,l95i,u95i,"\n")
prediction interval 13.56695 3.46849 6.473092 20.66081
>
> cat(" Simultaneous 95% ci's on beta0 and beta 1","\n")
Simultaneous 95% ci's on beta0 and beta 1
> tval2 = qt(1-(alpha/4),dfe)
> lbeta0s=b0-(tval2*seb0)
> ubeta0s=b0+(tval2*seb0)
> lbeta1s=b1-(tval2*seb1)
> ubeta1s=b1+(tval2*seb1)
> cat(lbeta0s,ubeta0s,lbeta1s,ubeta1s,"\n")
-3.873648 4.705977 0.3457223 0.486606
>
> cat("SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE
+ FOR BALANCE AT intake = 30,60 and 80","\n")
SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE
FOR BALANCE AT intake = 30,60 and 80
>
> cat("using Bonferroni","\n")
using Bonferroni
>
> tbon = qt(1-(alpha/2*3),dfe)
> yhat30 = b0 + b1*30
> se30 = sqrt(s2b0 + (30**2)*s2b1 + 2*30*sb0b1)
> l30m = yhat30 - tbon*se30
> u30m = yhat30 + tbon*se30
> yhat60 = b0 + b1*60
> se60 = sqrt(s2b0 + (60**2)*s2b1 + 2*60*sb0b1)
> l60m = yhat60 - tbon*se60
> u60m = yhat60 + tbon*se60
> yhat80 = b0 + b1*80
> se80 = sqrt(s2b0 + (80**2)*s2b1 + 2*80*sb0b1)
> l80m = yhat80 - tbon*se80
> u80m = yhat80 + tbon*se80
> cat(l30m, u30m, l60m, u60m, l80m, u80m,"\n")
11.40117 14.40101 24.49524 26.27679 32.38102 35.03758
>
> cat("USING SCHEFFE","\n")
USING SCHEFFE
>
> f= qf(1-alpha,2,dfe)
> mult = sqrt(2*f)
> l30ms = yhat30 - mult*se30
> u30ms = yhat30 + mult*se30
> l60ms = yhat60 - mult*se60
> u60ms = yhat60 + mult*se60
> l80ms = yhat80 - mult*se80
> u80ms = yhat80 + mult*se80
> cat(l30ms, u30ms, l60ms, u60ms, l80ms, u80ms,"\n")
10.28429 15.51789 23.83195 26.94008 31.39194 36.02665
IF THE TEXT FILE WITH THE COMMANDS IS IN f:/s505/r/kishi.R and YOU USE
source("f:/s505/r/kishi.R")
HERE IS WHAT YOU GET IN THE CONSOLE. THERE ARE WAYS TO
LABEL THE OUTPUT WITH THE VARIABLE NAMES THAT I’M NOT SHOWING YOU NOW.
------
0.4161642 0.4161642 3.293353 0.0008880204 -0.05106117 11.0774
intervals at X = 31.6
confidence interval 13.56695 0.9762317 11.57033 15.56357
prediction interval 13.56695 3.46849 6.473092 20.66081
Simultaneous 95% ci's on beta0 and beta 1
-3.873648 4.705977 0.3457223 0.486606
SIMULTANEOUS CONFIDENCE INTERVALS ON THE MEAN/EXPECTED VALUE
FOR BALANCE AT intake = 30,60 and 80
using Bonferroni
11.40117 14.40101 24.49524 26.27679 32.38102 35.03758
USING SCHEFFE
10.28429 15.51789 23.83195 26.94008 31.39194 36.02665