## Code for Imputing Mixed Data

## author: Irene Helenowski, Ph.D. ()

## function for assigning binary values based on quantiles of underlying normally distributed

## values.

bind.fxn=function(x){

l.x=length(x)

l.x2=l.x/2

y=x[1:l.x2]

z=x[(l.x2+1):l.x]

p0=1-mean(y, na.rm=T)

q.p0=quantile(z,p0, na.rm=T)

z2=rep(NA,l.x2); z.o=as.numeric(na.omit(z))

ind.0=as.numeric(na.omit((1:l.x2)[y==0]))

ind.1=as.numeric(na.omit((1:l.x2)[y==1]))

z2[ind.0]=z.o[z.o<q.p0]

z2[ind.1]=z.o[z.o>q.p0]

z2

}

## function for imputing mixed data

mv.mix.fxn=function(y,n,b, n2.init, cc, maxits=100, m.imp, j.simul){

APPENDIX (continued)

## determining conditional probabilities in binary variables that

## will be used to compute quantiles of the underlying normal variables.

y1=y[,1:n]

y2=y[,((n+1):(n+b))]

## determining entry positions of missing values

m.ind=apply(is.na(y2),1,sum)

m.ind2=(1:dim(y2)[1])[m.ind>0]

y2.m=y2[m.ind2,]

t.y2=table(y2[,1],y2[,2])

p.y2=c(t.y2[1]/sum(t.y2[1,]),t.y2[2]/sum(t.y2[2,]),

t.y2[1]/sum(t.y2[,1]), t.y2/sum(t.y2[,2]))

## computing eCDF values for continuous data

e1a=mv.ecdf(y1)

e1a[e1a==1]=round(1-10^-5,20)

e1a[e1a==0]=round(10^-5,20)

## obtaining N(0,1) values based on computed eCDF values

n1=qnorm(e1a)

## reordering values of underlying normal variables to correspond to the binary variables

n2=apply(n2.init,2,bind.fxn)

n2.1=n2[,1]; n2.2=n2[,2]

q.y2=c(quantile(n2.2[y2[,1]==0], p.y2[1], na.rm=T),

quantile(n2.2[y2[,1]==1], p.y2[2], na.rm=T),

quantile(n2.1[y2[,2]==0], p.y2, na.rm=T),

quantile(n2.1[y2[,2]==1], p.y2[4], na.rm=T))

k=dim(y)[2]; n=dim(y)[1]

## setting up matrices to store imputed data, their pairwise correlations,

## means of imputed continuous variables, and proportions of imputed binary variables

imp.yds=matrix(0,n,k*m*j)

imp.cor2=matrix(0,j.simul,60)

imp.m1=matrix(0,j.simul,10)

imp.m2=matrix(0,j.simul,10)

imp.p3=matrix(0,j.simul,10)

imp.p4=matrix(0,j.simul,10)

APPENDIX (continued)

for (j in 1:j.simul){

for (m in 1:m.imp){

## setting conditions for re-iteration

abd.cc=numeric(6); i=0

while (sum(abd.cc)<6 & i < maxits) {

i=i+1

## imputing normal data underlying original continuous and binary variables

## using the R norm package

z.n=cbind(n1,n2)

s <- prelim.norm(as.matrix(z.n)) #do preliminary manipulations

thetahat <- em.norm(s, showits=FALSE) #find the mle

rngseed(sample(10:1000,1)) #set random number generator seed

z.imp <- imp.norm(s,thetahat,as.matrix(z.n)) #impute missing data

pz.imp=pnorm(z.imp)

## back-transforming imputed normal data onto the scale of the original continuous

## data via the Barton and Schruben (1993) method

x1=dim(y1)[2]

na.test=apply(is.na(y1), 2, sum)

na.test2=(1:x1)[as.numeric(na.test)>0]

x2=matrix(na.test2,length(na.test2),1)

y.imp1.add=apply(x2,1,bart.k,test.data=y1, u.c=pz.imp, u.msg=as.matrix(e1a))

## back-transforming imputed normal data to binary data via quantiles

z.imp2=z.imp[m.ind2,((n+1):(n+b))]

y2.2=y2[m.ind2,]

yz.imp2=cbind(y2.2, z.imp2)

y.imp2.add=t(apply(yz.imp2,1,bin2,q.y2=q.y2))

y.imp2=y2

y.imp2[m.ind2,]=y.imp2.add

y.imp=cbind(y.imp1, y.imp2)

## computing correlations of imputed data

imp.cor.m=cor(y.imp,use='pairwise.complete.obs')

APPENDIX (continued)

imp.cor=l.mat(imp.cor.m)[l.mat(imp.cor.m)!=0]

abd=abs(imp.cor-init.cor)

dc=sign(imp.cor-init.cor)

abd.cc=(abd<cc)

## printing the ith iteration, mth imputation, jth simulation run, and how many pairwise

## correlations meet the convergence criteria

print(paste("ith iteration: ",format(i), " --*-- mth imputation: ",format(m),

" --*-- jth simulation run: ",format(j),

" --*-- close pairs: ",format(sum(abd.cc)),

" ** ",format(sign(dc[1])),

" ** ",format(sign(dc[4])),

" ** ",format(sign(dc[5])),

sep = ""))

}

## storing imputed data correlations and proportions involving imputed binary data

imp.yds[((j-1)*m.imp+(m-1)*dim(y)[2]+1):((j-1)*m.imp+(m-1)*dim(y)[2]+dim(y)[2])

ik.dim=length(imp.cor)

imp.cor2[j,(((m-1)*k.dim+1):(k.dim*m))]=imp.cor

imp.m1[j,m]=mean(y.imp[,1])

imp.m2[j,m]=mean(y.imp[,2])

imp.p3[j,m]=mean(y.imp[,3])

imp.p4[j,m]=mean(y.imp[,4])

}

}

## outputting results

print(list(imp.yds, imp.cor2, imp.m1, imp.m2, imp.p3,imp.p4))

}