Part I. Sample Size Requirements

D.G. Bonett (3/2016)

R Functions

Part I. Sample Size Requirements

Function 1: Sample size to estimate one mean

sizeCImean1 <- function(alpha,var, w) {

# Computes sample size required to estimate a mean with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# var: planning value of response variable variance

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(4*var*(z/w)^2)

return(n)

}

Example:

sizeCImean1(.05, 264.39, 10)

[1] 41

Function 2: Sample size to estimate a mean difference (2-group design)

sizeCImean2 <- function(alpha,var, w) {

# Computes sample size required to estimate a difference in means with

# desired precision in a 2-group design

# Arguments:

# alpha: alpha level for 1-alpha confidence

# var: planning value of average within-group response variable variance

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(8*var*(z/w)^2)

return(n)

}

Example:

sizeCImean2(.05,37.1,5)

[1] 47

Function 3: Sample size to estimate a mean difference (within-subjects design)

sizeCImeanWS <- function(alpha,var, corr, w) {

# Computes sample size required to estimate a difference in means with

# desired precision in a within-subjects design

# Arguments:

# alpha: alpha level for 1-alpha confidence

# var: planning value of average within-group response variable variance

# corr: planning value of correlation between response variables

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(8*(1-corr)*var*(z/w)^2)

return(n)

}

Example:

sizeCImeanWS(.05, 35.15, .867, 2.5)

[1] 23

Function 4: Sample size to estimate a linear contrast of means (between-subjects design)

sizeCImeanBS <- function(alpha, var, w, h) {

# Computes sample size required to estimate a linear contrast of means in a

# between-subjects design with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# var: planning value of average within-group variance

# w: desired confidence interval width

# h: vector of contrast coefficients (zeros can be omitted)

# Returns:

# required sample size per group

z <- qnorm(1 - alpha/2)

k <- length(h)

n <- ceiling(4*var*(t(h)%*%h)*(z/w)^2)

return(n)

}

Example:

h =c(.5, .5, -1)

sizeCImeanBS(.05, 5.62, 2.0, h)

[,1]

[1,] 33

Function 5: Sample size to estimate a linear contrast of means (within-subjects design)

sizeCImeanWS <- function(alpha, var, corr, w, h) {

# Computes sample size required to estimate a linear contrast of means in a

# within-subjects design with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# var: planning value of largest variance

# corr: planning value of smallest correlation

# w: desired confidence interval width

# h: vector of contrast coefficients (zeros can be omitted)

# Returns:

# required sample size per group

z <- qnorm(1 - alpha/2)

k <- length(h)

n <- ceiling(4*(1-corr)*var*(t(h)%*%h)*(z/w)^2)

return(n)

}

Example:

h =c(.5, .5, -.5, -.5)

sizeCImeanWS(.05, 275, .85, 4.0, h)

[,1]

[1,] 40

Function 6: Sample size to estimate aPearson correlation

sizeCIcorr <- function(alpha, corr, w) {

# Computes sample size required to estimate a correlation with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# corr: planning value of correlation

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n1 <- ceiling(4*(1 - corr^2)^2*(z/w)^2 + 3)

zr <- log((1 + corr)/(1 - corr))/2

se <- sqrt(1/(n1 - 3))

LL0 <- zr - z*se

UL0 <- zr + z*se

LL <- (exp(2*LL0) - 1)/(exp(2*LL0) + 1)

UL <- (exp(2*UL0) - 1)/(exp(2*UL0) + 1)

n <- ceiling((n1 - 3)*((UL - LL)/w)^2 + 3)

return(n)

}

Example:

sizeCIcorr(.05, .52, .2)

[1] 207

Function 7: Sample size to estimate a slope

sizeCIslope <- function(alpha, varresid, varx, w) {

# Computes sample size required to estimate slope with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# varresid: planning value of within group variance

# varx: variance of fixed predictor variable

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(4*(varresid/varx)*(z/w)^2 + 1)

return(n)

}

Example:

sizeCIslope(.05, .92, 5.9, .25)

[1] 40

Function 8: Sample size to estimate a squared multiple correlation

sizeCImultcorr <- function(alpha, q, sqrcor, w) {

# Computes sample size required to estimate a squared multiple correlation

# with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# q: number of predictor variables

# sqrcor: squared multiple correlation planning value

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(16*(sqrcor*(1-sqrcor)^2)*(z/w)^2 + q + 2)

return(n)

}

Example:

sizeCImultcorr(.05, 2, .3, .2)

[1] 230

Function 9: Sample size to estimate a proportion

sizeCIprop1 <- function(alpha,p, w) {

# Computes sample size required to estimate a proportion with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# p: planning value of proportion

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(4*p*(1-p)*(z/w)^2)

return(n)

}

Example:

sizeCIprop1(.05, .2, .1)

[1] 246

Function 10: Sample size to estimate a proportion difference (2-group design)

sizeCIprop2 <- function(alpha,p1, p2, w) {

# Computes sample size per group required to estimate a difference

#of proportionsin 2-group design with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# p1: planning value of proportion for group 1

# p2: planning value of proportion for group 2

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(4*(p1*(1-p1) + p2*(1-p2))*(z/w)^2)

return(n)

}

Example:

sizeCIprop2(.05, .2, .4, .2)

[1] 154

Function 11: Sample size to estimate a linear contrast of proportions (between-subjects design)

sizeCIpropBS <- function(alpha,p, h, w) {

# Computes sample size per group required to estimate a linear contrast

# of proportions in between-subjects design with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# p: vector of proportion planning values

# h: vector of contrast coefficients

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(4*t(h)%*%diag(p*(1-p))%*%h)*(z/w)^2)

return(n)

}

Example:

p = c(.25, .30, .50, .50)

h = c(.5, .5, -.5, -.5)

sizeCIpropBS(.05, p, h, .2)

[,1]

[1,] 87

Function 12: Sample size to estimate a proportion difference (within-subjects design)

sizeCIpropWS <- function(alpha, p1, p2, corr, w) {

# Computes sample size required to estimate a difference of proportions

# in 2-level within-subjects design with desired precision

# Arguments:

# alpha: alpha level for 1-alpha confidence

# p1: planning value of proportion for group 1

# p2: planning value of proportion for group 2

# corr: planning value of correlation between measurements

# w: desired confidence interval width

# Returns:

# required sample size

z <- qnorm(1 - alpha/2)

n <- ceiling(4*(p1*(1-p1) + p2*(1-p2) - 2*corr*sqrt(p1*p2*(1-p1)*(1-p2)))*(z/w)^2)

return(n)

}

Example:

sizeCIpropWS(.05, .2, .3, .8, .1)

[1] 118

Part II: Confidence Intervals

Function 13: Confidence interval for a linear contrast of means (between-subjects design)

CIMeanBS <- function(alpha, m, sd, n, h) {

# Computes confidence interval and test statistic for a linear contrast of means

# in a between-subjects design

# Arguments:

# alpha: alpha level for 1-alpha confidence

# m: vector of sample means

# sd: vector of sample standard deviations

# n: vector of sample sizes

# h: vector of contrast coefficients

# Returns:

# estimate, SE, df, t-value, p-value, lower limit, upper limit

# for both equal variance and unequal variance methods

est <- t(h)%*%m

k <- length(m)

df1 <- sum(n) - k

v1 <-sum((n-1)*sd^2)/df1

se1 <- sqrt(v1*t(h)%*%solve(diag(n))%*%h)

t1 <- est/se1

p1 <- 2*(1 - pt(abs(t1),df1))

tcrit1 <- qt(1 - alpha/2, df1)

ll1 <- est - tcrit1*se1

ul1 <- est + tcrit1*se1

v2 <- diag(sd^2)%*%(solve(diag(n)))

se2 <- sqrt(t(h)%*%v2%*%h)

t2 <- est/se2

df2 <- (se2^4)/sum(((h^4)*(sd^4)/(n^2*(n-1))))

p2 <- 2*(1 - pt(abs(t2),df2))

tcrit2 <- qt(1 - alpha/2, df2)

ll2 <- est - tcrit2*se2

ul2 <- est + tcrit2*se2

out1 <- t(c(est, se1, t1, df1, p1, ll1, ul1))

out2 <- t(c(est, se2, t2, df2, p2, ll2, ul2))

out <- rbind(out1, out2)

colnames(out) <- c("Estimate", "SE", "t", "df", "p-value", "LL", "UL")

rownames(out) <- c("Equal Variances Assumed:", "Equal Variances Not Assumed:")

return(out)

}

Example:

m=c(11.833, 14.25, 18.167)

sd=c(2.037, 2.4168, 2.623)

n=c(12,12,12)

h = c(.5, .5, -1)

CIMeanBS(.05,m,sd,n,h)

Estimate SE t df p-value LL UL

Equal Variances Assumed: -5.125002 0.8384117 -6.112751 33.00000 6.926020e-07 -6.830763 -3.419241

Equal Variances Not Assumed: -5.125002 0.8840108 -5.797443 19.13865 1.345002e-05 -6.974351 -3.275653

Function 14: Confidence interval for a Pearson correlation

CIcorr <- function(alpha, corr, n) {

# Computes a confidence interval for a Pearson correlation

# Arguments:

# alpha: alpha level for 1-alpha confidence

# corr: sample value of correlation

# n: sample size

# Returns:

# confidence interval

z <- qnorm(1 - alpha/2)

se <- sqrt(1/((n - 3)))

zr <- log((1 + corr)/(1 - corr))/2

LL0 <- zr - z*se

UL0 <- zr + z*se

LL <- (exp(2*LL0) - 1)/(exp(2*LL0) + 1)

UL <- (exp(2*UL0) - 1)/(exp(2*UL0) + 1)

CI <- c(LL, UL)

return(CI)

}

Example:

CIcorr(.05, .802, 200)

[1] 0.7463000 0.8465456

Function 15: Confidence interval for semipartial correlation

CIsemipartcorr <- function(alpha, part, mult, n) {

# Computes a confidence interval for a semipartial correlation

# Arguments:

# alpha: alpha value for 1-alpha confidence

# part: sample semipartial correlation

# mult: sample squared multiple correlation in full model

# n: sample size

# Returns:

# confidence interval

z <- qnorm(1 - alpha/2)

mult0 <- mult - part^2

zr <- log((1 + part)/(1 - part))/2

a <- (mult^2 - 2*mult + mult0 - mult0^2 + 1)/(1 - part^2)^2

se <- sqrt(a/(n - 3))

LL0 <- zr - z*se

UL0 <- zr + z*se

LL <- (exp(2*LL0) - 1)/(exp(2*LL0) + 1)

UL <- (exp(2*UL0) - 1)/(exp(2*UL0) + 1)

CI <- c(LL, UL)

return(CI)

}

Example:

CIsemipartcorr(.05, .582, .699, 20)

[1] 0.2525662 0.7905182

Function 16: Confidence interval for average Pearson correlation

MetaAveCor <- function(alpha, n, cor) {

# Computes confidence interval for mean of Pearson correlations

# from two or more studies

# Arguments:

# alpha: alpha level for 1-alpha confidence

# n: m x 1 vector of sample sizes

# cor: m x 1 vector of sample correlations

# Returns:

# total sample size, estimate of mean correlation,

# standard error, lower and upper confidence limits

m <- length(n)

nt = sum(n)

z <- qnorm(1 - alpha/2)

var.cor <- (1 - cor^2)^2/(n - 3)

ave.cor <- sum(cor)/m

var.ave.cor <- sum(var.cor)/m^2

se.ave.cor <- sqrt(var.ave.cor)/(1 - ave.cor^2)

z.ave <- log((1 + ave.cor)/(1 - ave.cor))/2

ll0 <- z.ave - z*se.ave.cor

ul0 <- z.ave + z*se.ave.cor

ll = (exp(2*ll0) - 1)/(exp(2*ll0) + 1)

ul = (exp(2*ul0) - 1)/(exp(2*ul0) + 1)

out <- t(c(nt, ave.cor, se.ave.cor, ll, ul))

colnames(out) <- c("Total Sample", "Ave Correlation", "SE", "LL", "UL")

return (out)

}

Example:

n = c(210, 150, 180)

cor = c(.46, .39, .52)

MetaAveCor(.05, n, cor)

Total Sample Ave Correlation SE LL UL

[1,] 540 0.4566667 0.04397277 0.3858428 0.5221394

Function 17: Confidence interval for oneproportion

CIprop1 <- function(alpha, f, n) {

# Computes Agresti-Coull confidence interval for a population proportion

# Arguments:

# alpha: alpha level for 1-alpha confidence

# f: number of participants with trait

# n: sample size

# Returns:

# confidence interval

z <- qnorm(1 - alpha/2)

p <- (f+2)/(n+4)

se <- sqrt(p*(1-p)/(n+4))

LL <- p - z*se

UL <- p + z*se

CI <- c(LL, UL)

return(CI)

}

Example:

CIprop1(.05, 12, 100)

[1] 0.06901848 0.20021229

Function 18: Confidence interval for a proportion difference

CIprop2 <- function(alpha, f1, f2, n1, n2) {

# Computes Agresti-Caffo confidence interval for difference of

# population proportions in 2-group design

# Arguments:

# alpha: alpha level for 1-alpha confidence

# f1: number of participants in group 1 with trait

# f2: number of participants in group 2 with trait

# n1: sample size of group 1

# n2: sample size of group 2

# Returns:

# confidence interval

z <- qnorm(1 - alpha/2)

p1 <- (f1+1)/(n1+2)

p2 <- (f2+1)/(n2+2)

se <- sqrt(p1*(1-p1)/(n1+2) + p2*(1-p2)/(n2+2))

LL <- p1 - p2 - z*se

UL <- p1 - p2 + z*se

CI <- c(LL, UL)

return(CI)

}

Example:

CIprop2(.05, 35, 21, 150, 150)

[1] 0.004375769 0.179834757

Function 19: Confidence interval for a linear contrast of proportions (between-subjects design)

CIpropBS <- function(alpha, f, n, h) {

# Computes Price-Bonett confidence interval for a linear contrast

# of population proportions in a between-subjects design

# Arguments:

# alpha: alpha level for 1-alpha confidence

# f: vector of sample frequency counts

# n: vector of sample sizes

# h: vector of contrast coefficients

# Returns:

# confidence interval

z <- qnorm(1 - alpha/2)

m <- length(h) - length(which(h==0))

p <- (f+2/m)/(n+4/m)

se <- sqrt(t(h)%*%diag(p*(1-p))%*%solve(diag(n+4/m))%*%h)

LL <- t(h)%*%p - z*se

UL <- t(h)%*%p + z*se

CI <- c(LL, UL)

return(CI)

}

Example:

f = c(26, 24, 38)

n = c(60, 60, 60)

h = c(-.5, -.5, 1)

CIpropBS(.05,f,n,h)

[1] 0.06294259 0.36097046

Function 20: Confidence interval for a proportion difference (within-subjects design)

CIpropWS <- function(alpha, f12, f21, n) {

# Computes Bonett-Price confidence interval for difference of

# population proportions in 2-level within-subjects design

# Arguments:

# alpha: alpha level for 1-alpha confidence

# f12: number of sample participants who have trait

# in condition 1 but not condition 2

# f21: number of sample participants who have trait

# in condition 2 but not in condition 1

# n: sample size

# Returns:

# confidence interval

z <- qnorm(1 - alpha/2)

p12 <- (f12+1)/(n+2)

p21 <- (f21+1)/(n+2)

se <- sqrt(((p12 + p21) - (p12-p21)^2)/(n+2))

LL <- p12 - p21 - z*se

UL <- p12 - p21 + z*se

CI <- c(LL, UL)

return(CI)

}

Example:

CIpropWS(.05, 26, 4, 40)

[1] 0.3126438 0.7349752

Part III–Miscellaneous Procedures

Function 21: Generate Random Sample

RandomSample <- function(popsize, samsize){

# Generates a random sample of participant IDs

# Args:

# popsize: study population size

# samsize: sample sample

# Returns:

# participant IDs to sample

out <- sort(sample(popsize, samplesize, replace = FALSE, prob = NULL))

return(out)

}

Example:

RandomSample(3000, 25)

[1] 37 94 134 186 212 408 485 697 722 781 998 1055 1182 1224 1273

[16] 1335 1452 1552 1783 1817 2149 2188 2437 2850 2936

Function 22: Randomize Sample into Groups

Randomize <- function(k, n){

# Randomly assigns a sample of participants into k groups

# Args:

# k: number of groups

# n: kx1 vector of sample sizes per group

# Returns:

# assignment of participant IDs to groups

out <- sample(rep(1:k, n))

return(out)

}

Randomize(3,n)

[1] 1 2 3 2 2 1 3 3 1 3 3 1 3 1 2 3 1 2 2 1 3 3 2 2 3 3 2 3 2 1 1 3 3 3 1

Create Bar Chart with CI Lines

Example 1: (3 group bar chart)

[Install ggplot2 package]

library(ggplot2)

data <- read.table(text = "group mean LL UL

Treatment_A7.43 5.03 9.83

Treatment_B2.72 0.62 4.82

Treatment_C5.27 2.97 7.57

", header = T)

ggplot(data, aes(x = group, y = mean)) +

geom_bar(stat = "identity", position = position_dodge()) +

geom_errorbar(aes(ymin = LL, ymax = UL), width = .25)

Example 2: (clustered bar chart for 2x2 design)

library(ggplot2)

data <- read.table(text = "Gender Treatment Mean LL UL

Male A 7.43 5.03 9.83

Female A 2.72 0.62 4.82

Male B 4.27 1.97 6.57

Female B 5.27 2.97 7.57

", header = T)

dodge <- position_dodge(width=0.9)

ggplot(data, aes(fill = Gender, y = Mean, x = Treatment)) +

geom_bar(stat = "identity", position = dodge) +

geom_errorbar(aes(ymin = LL, ymax = UL), position = dodge, width = .25)