D.G. Bonett (3/2016)
R Functions
Part I. Sample Size Requirements
Function 1: Sample size to estimate one mean
sizeCImean1 <- function(alpha,var, w) {
# Computes sample size required to estimate a mean with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# var: planning value of response variable variance
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(4*var*(z/w)^2)
return(n)
}
Example:
sizeCImean1(.05, 264.39, 10)
[1] 41
Function 2: Sample size to estimate a mean difference (2-group design)
sizeCImean2 <- function(alpha,var, w) {
# Computes sample size required to estimate a difference in means with
# desired precision in a 2-group design
# Arguments:
# alpha: alpha level for 1-alpha confidence
# var: planning value of average within-group response variable variance
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(8*var*(z/w)^2)
return(n)
}
Example:
sizeCImean2(.05,37.1,5)
[1] 47
Function 3: Sample size to estimate a mean difference (within-subjects design)
sizeCImeanWS <- function(alpha,var, corr, w) {
# Computes sample size required to estimate a difference in means with
# desired precision in a within-subjects design
# Arguments:
# alpha: alpha level for 1-alpha confidence
# var: planning value of average within-group response variable variance
# corr: planning value of correlation between response variables
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(8*(1-corr)*var*(z/w)^2)
return(n)
}
Example:
sizeCImeanWS(.05, 35.15, .867, 2.5)
[1] 23
Function 4: Sample size to estimate a linear contrast of means (between-subjects design)
sizeCImeanBS <- function(alpha, var, w, h) {
# Computes sample size required to estimate a linear contrast of means in a
# between-subjects design with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# var: planning value of average within-group variance
# w: desired confidence interval width
# h: vector of contrast coefficients (zeros can be omitted)
# Returns:
# required sample size per group
z <- qnorm(1 - alpha/2)
k <- length(h)
n <- ceiling(4*var*(t(h)%*%h)*(z/w)^2)
return(n)
}
Example:
h =c(.5, .5, -1)
sizeCImeanBS(.05, 5.62, 2.0, h)
[,1]
[1,] 33
Function 5: Sample size to estimate a linear contrast of means (within-subjects design)
sizeCImeanWS <- function(alpha, var, corr, w, h) {
# Computes sample size required to estimate a linear contrast of means in a
# within-subjects design with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# var: planning value of largest variance
# corr: planning value of smallest correlation
# w: desired confidence interval width
# h: vector of contrast coefficients (zeros can be omitted)
# Returns:
# required sample size per group
z <- qnorm(1 - alpha/2)
k <- length(h)
n <- ceiling(4*(1-corr)*var*(t(h)%*%h)*(z/w)^2)
return(n)
}
Example:
h =c(.5, .5, -.5, -.5)
sizeCImeanWS(.05, 275, .85, 4.0, h)
[,1]
[1,] 40
Function 6: Sample size to estimate aPearson correlation
sizeCIcorr <- function(alpha, corr, w) {
# Computes sample size required to estimate a correlation with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# corr: planning value of correlation
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n1 <- ceiling(4*(1 - corr^2)^2*(z/w)^2 + 3)
zr <- log((1 + corr)/(1 - corr))/2
se <- sqrt(1/(n1 - 3))
LL0 <- zr - z*se
UL0 <- zr + z*se
LL <- (exp(2*LL0) - 1)/(exp(2*LL0) + 1)
UL <- (exp(2*UL0) - 1)/(exp(2*UL0) + 1)
n <- ceiling((n1 - 3)*((UL - LL)/w)^2 + 3)
return(n)
}
Example:
sizeCIcorr(.05, .52, .2)
[1] 207
Function 7: Sample size to estimate a slope
sizeCIslope <- function(alpha, varresid, varx, w) {
# Computes sample size required to estimate slope with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# varresid: planning value of within group variance
# varx: variance of fixed predictor variable
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(4*(varresid/varx)*(z/w)^2 + 1)
return(n)
}
Example:
sizeCIslope(.05, .92, 5.9, .25)
[1] 40
Function 8: Sample size to estimate a squared multiple correlation
sizeCImultcorr <- function(alpha, q, sqrcor, w) {
# Computes sample size required to estimate a squared multiple correlation
# with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# q: number of predictor variables
# sqrcor: squared multiple correlation planning value
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(16*(sqrcor*(1-sqrcor)^2)*(z/w)^2 + q + 2)
return(n)
}
Example:
sizeCImultcorr(.05, 2, .3, .2)
[1] 230
Function 9: Sample size to estimate a proportion
sizeCIprop1 <- function(alpha,p, w) {
# Computes sample size required to estimate a proportion with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# p: planning value of proportion
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(4*p*(1-p)*(z/w)^2)
return(n)
}
Example:
sizeCIprop1(.05, .2, .1)
[1] 246
Function 10: Sample size to estimate a proportion difference (2-group design)
sizeCIprop2 <- function(alpha,p1, p2, w) {
# Computes sample size per group required to estimate a difference
#of proportionsin 2-group design with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# p1: planning value of proportion for group 1
# p2: planning value of proportion for group 2
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(4*(p1*(1-p1) + p2*(1-p2))*(z/w)^2)
return(n)
}
Example:
sizeCIprop2(.05, .2, .4, .2)
[1] 154
Function 11: Sample size to estimate a linear contrast of proportions (between-subjects design)
sizeCIpropBS <- function(alpha,p, h, w) {
# Computes sample size per group required to estimate a linear contrast
# of proportions in between-subjects design with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# p: vector of proportion planning values
# h: vector of contrast coefficients
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(4*t(h)%*%diag(p*(1-p))%*%h)*(z/w)^2)
return(n)
}
Example:
p = c(.25, .30, .50, .50)
h = c(.5, .5, -.5, -.5)
sizeCIpropBS(.05, p, h, .2)
[,1]
[1,] 87
Function 12: Sample size to estimate a proportion difference (within-subjects design)
sizeCIpropWS <- function(alpha, p1, p2, corr, w) {
# Computes sample size required to estimate a difference of proportions
# in 2-level within-subjects design with desired precision
# Arguments:
# alpha: alpha level for 1-alpha confidence
# p1: planning value of proportion for group 1
# p2: planning value of proportion for group 2
# corr: planning value of correlation between measurements
# w: desired confidence interval width
# Returns:
# required sample size
z <- qnorm(1 - alpha/2)
n <- ceiling(4*(p1*(1-p1) + p2*(1-p2) - 2*corr*sqrt(p1*p2*(1-p1)*(1-p2)))*(z/w)^2)
return(n)
}
Example:
sizeCIpropWS(.05, .2, .3, .8, .1)
[1] 118
Part II: Confidence Intervals
Function 13: Confidence interval for a linear contrast of means (between-subjects design)
CIMeanBS <- function(alpha, m, sd, n, h) {
# Computes confidence interval and test statistic for a linear contrast of means
# in a between-subjects design
# Arguments:
# alpha: alpha level for 1-alpha confidence
# m: vector of sample means
# sd: vector of sample standard deviations
# n: vector of sample sizes
# h: vector of contrast coefficients
# Returns:
# estimate, SE, df, t-value, p-value, lower limit, upper limit
# for both equal variance and unequal variance methods
est <- t(h)%*%m
k <- length(m)
df1 <- sum(n) - k
v1 <-sum((n-1)*sd^2)/df1
se1 <- sqrt(v1*t(h)%*%solve(diag(n))%*%h)
t1 <- est/se1
p1 <- 2*(1 - pt(abs(t1),df1))
tcrit1 <- qt(1 - alpha/2, df1)
ll1 <- est - tcrit1*se1
ul1 <- est + tcrit1*se1
v2 <- diag(sd^2)%*%(solve(diag(n)))
se2 <- sqrt(t(h)%*%v2%*%h)
t2 <- est/se2
df2 <- (se2^4)/sum(((h^4)*(sd^4)/(n^2*(n-1))))
p2 <- 2*(1 - pt(abs(t2),df2))
tcrit2 <- qt(1 - alpha/2, df2)
ll2 <- est - tcrit2*se2
ul2 <- est + tcrit2*se2
out1 <- t(c(est, se1, t1, df1, p1, ll1, ul1))
out2 <- t(c(est, se2, t2, df2, p2, ll2, ul2))
out <- rbind(out1, out2)
colnames(out) <- c("Estimate", "SE", "t", "df", "p-value", "LL", "UL")
rownames(out) <- c("Equal Variances Assumed:", "Equal Variances Not Assumed:")
return(out)
}
Example:
m=c(11.833, 14.25, 18.167)
sd=c(2.037, 2.4168, 2.623)
n=c(12,12,12)
h = c(.5, .5, -1)
CIMeanBS(.05,m,sd,n,h)
Estimate SE t df p-value LL UL
Equal Variances Assumed: -5.125002 0.8384117 -6.112751 33.00000 6.926020e-07 -6.830763 -3.419241
Equal Variances Not Assumed: -5.125002 0.8840108 -5.797443 19.13865 1.345002e-05 -6.974351 -3.275653
Function 14: Confidence interval for a Pearson correlation
CIcorr <- function(alpha, corr, n) {
# Computes a confidence interval for a Pearson correlation
# Arguments:
# alpha: alpha level for 1-alpha confidence
# corr: sample value of correlation
# n: sample size
# Returns:
# confidence interval
z <- qnorm(1 - alpha/2)
se <- sqrt(1/((n - 3)))
zr <- log((1 + corr)/(1 - corr))/2
LL0 <- zr - z*se
UL0 <- zr + z*se
LL <- (exp(2*LL0) - 1)/(exp(2*LL0) + 1)
UL <- (exp(2*UL0) - 1)/(exp(2*UL0) + 1)
CI <- c(LL, UL)
return(CI)
}
Example:
CIcorr(.05, .802, 200)
[1] 0.7463000 0.8465456
Function 15: Confidence interval for semipartial correlation
CIsemipartcorr <- function(alpha, part, mult, n) {
# Computes a confidence interval for a semipartial correlation
# Arguments:
# alpha: alpha value for 1-alpha confidence
# part: sample semipartial correlation
# mult: sample squared multiple correlation in full model
# n: sample size
# Returns:
# confidence interval
z <- qnorm(1 - alpha/2)
mult0 <- mult - part^2
zr <- log((1 + part)/(1 - part))/2
a <- (mult^2 - 2*mult + mult0 - mult0^2 + 1)/(1 - part^2)^2
se <- sqrt(a/(n - 3))
LL0 <- zr - z*se
UL0 <- zr + z*se
LL <- (exp(2*LL0) - 1)/(exp(2*LL0) + 1)
UL <- (exp(2*UL0) - 1)/(exp(2*UL0) + 1)
CI <- c(LL, UL)
return(CI)
}
Example:
CIsemipartcorr(.05, .582, .699, 20)
[1] 0.2525662 0.7905182
Function 16: Confidence interval for average Pearson correlation
MetaAveCor <- function(alpha, n, cor) {
# Computes confidence interval for mean of Pearson correlations
# from two or more studies
# Arguments:
# alpha: alpha level for 1-alpha confidence
# n: m x 1 vector of sample sizes
# cor: m x 1 vector of sample correlations
# Returns:
# total sample size, estimate of mean correlation,
# standard error, lower and upper confidence limits
m <- length(n)
nt = sum(n)
z <- qnorm(1 - alpha/2)
var.cor <- (1 - cor^2)^2/(n - 3)
ave.cor <- sum(cor)/m
var.ave.cor <- sum(var.cor)/m^2
se.ave.cor <- sqrt(var.ave.cor)/(1 - ave.cor^2)
z.ave <- log((1 + ave.cor)/(1 - ave.cor))/2
ll0 <- z.ave - z*se.ave.cor
ul0 <- z.ave + z*se.ave.cor
ll = (exp(2*ll0) - 1)/(exp(2*ll0) + 1)
ul = (exp(2*ul0) - 1)/(exp(2*ul0) + 1)
out <- t(c(nt, ave.cor, se.ave.cor, ll, ul))
colnames(out) <- c("Total Sample", "Ave Correlation", "SE", "LL", "UL")
return (out)
}
Example:
n = c(210, 150, 180)
cor = c(.46, .39, .52)
MetaAveCor(.05, n, cor)
Total Sample Ave Correlation SE LL UL
[1,] 540 0.4566667 0.04397277 0.3858428 0.5221394
Function 17: Confidence interval for oneproportion
CIprop1 <- function(alpha, f, n) {
# Computes Agresti-Coull confidence interval for a population proportion
# Arguments:
# alpha: alpha level for 1-alpha confidence
# f: number of participants with trait
# n: sample size
# Returns:
# confidence interval
z <- qnorm(1 - alpha/2)
p <- (f+2)/(n+4)
se <- sqrt(p*(1-p)/(n+4))
LL <- p - z*se
UL <- p + z*se
CI <- c(LL, UL)
return(CI)
}
Example:
CIprop1(.05, 12, 100)
[1] 0.06901848 0.20021229
Function 18: Confidence interval for a proportion difference
CIprop2 <- function(alpha, f1, f2, n1, n2) {
# Computes Agresti-Caffo confidence interval for difference of
# population proportions in 2-group design
# Arguments:
# alpha: alpha level for 1-alpha confidence
# f1: number of participants in group 1 with trait
# f2: number of participants in group 2 with trait
# n1: sample size of group 1
# n2: sample size of group 2
# Returns:
# confidence interval
z <- qnorm(1 - alpha/2)
p1 <- (f1+1)/(n1+2)
p2 <- (f2+1)/(n2+2)
se <- sqrt(p1*(1-p1)/(n1+2) + p2*(1-p2)/(n2+2))
LL <- p1 - p2 - z*se
UL <- p1 - p2 + z*se
CI <- c(LL, UL)
return(CI)
}
Example:
CIprop2(.05, 35, 21, 150, 150)
[1] 0.004375769 0.179834757
Function 19: Confidence interval for a linear contrast of proportions (between-subjects design)
CIpropBS <- function(alpha, f, n, h) {
# Computes Price-Bonett confidence interval for a linear contrast
# of population proportions in a between-subjects design
# Arguments:
# alpha: alpha level for 1-alpha confidence
# f: vector of sample frequency counts
# n: vector of sample sizes
# h: vector of contrast coefficients
# Returns:
# confidence interval
z <- qnorm(1 - alpha/2)
m <- length(h) - length(which(h==0))
p <- (f+2/m)/(n+4/m)
se <- sqrt(t(h)%*%diag(p*(1-p))%*%solve(diag(n+4/m))%*%h)
LL <- t(h)%*%p - z*se
UL <- t(h)%*%p + z*se
CI <- c(LL, UL)
return(CI)
}
Example:
f = c(26, 24, 38)
n = c(60, 60, 60)
h = c(-.5, -.5, 1)
CIpropBS(.05,f,n,h)
[1] 0.06294259 0.36097046
Function 20: Confidence interval for a proportion difference (within-subjects design)
CIpropWS <- function(alpha, f12, f21, n) {
# Computes Bonett-Price confidence interval for difference of
# population proportions in 2-level within-subjects design
# Arguments:
# alpha: alpha level for 1-alpha confidence
# f12: number of sample participants who have trait
# in condition 1 but not condition 2
# f21: number of sample participants who have trait
# in condition 2 but not in condition 1
# n: sample size
# Returns:
# confidence interval
z <- qnorm(1 - alpha/2)
p12 <- (f12+1)/(n+2)
p21 <- (f21+1)/(n+2)
se <- sqrt(((p12 + p21) - (p12-p21)^2)/(n+2))
LL <- p12 - p21 - z*se
UL <- p12 - p21 + z*se
CI <- c(LL, UL)
return(CI)
}
Example:
CIpropWS(.05, 26, 4, 40)
[1] 0.3126438 0.7349752
Part III–Miscellaneous Procedures
Function 21: Generate Random Sample
RandomSample <- function(popsize, samsize){
# Generates a random sample of participant IDs
# Args:
# popsize: study population size
# samsize: sample sample
# Returns:
# participant IDs to sample
out <- sort(sample(popsize, samplesize, replace = FALSE, prob = NULL))
return(out)
}
Example:
RandomSample(3000, 25)
[1] 37 94 134 186 212 408 485 697 722 781 998 1055 1182 1224 1273
[16] 1335 1452 1552 1783 1817 2149 2188 2437 2850 2936
Function 22: Randomize Sample into Groups
Randomize <- function(k, n){
# Randomly assigns a sample of participants into k groups
# Args:
# k: number of groups
# n: kx1 vector of sample sizes per group
# Returns:
# assignment of participant IDs to groups
out <- sample(rep(1:k, n))
return(out)
}
Randomize(3,n)
[1] 1 2 3 2 2 1 3 3 1 3 3 1 3 1 2 3 1 2 2 1 3 3 2 2 3 3 2 3 2 1 1 3 3 3 1
Create Bar Chart with CI Lines
Example 1: (3 group bar chart)
[Install ggplot2 package]
library(ggplot2)
data <- read.table(text = "group mean LL UL
Treatment_A7.43 5.03 9.83
Treatment_B2.72 0.62 4.82
Treatment_C5.27 2.97 7.57
", header = T)
ggplot(data, aes(x = group, y = mean)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = LL, ymax = UL), width = .25)
Example 2: (clustered bar chart for 2x2 design)
library(ggplot2)
data <- read.table(text = "Gender Treatment Mean LL UL
Male A 7.43 5.03 9.83
Female A 2.72 0.62 4.82
Male B 4.27 1.97 6.57
Female B 5.27 2.97 7.57
", header = T)
dodge <- position_dodge(width=0.9)
ggplot(data, aes(fill = Gender, y = Mean, x = Treatment)) +
geom_bar(stat = "identity", position = dodge) +
geom_errorbar(aes(ymin = LL, ymax = UL), position = dodge, width = .25)