/*LICENSE

This software is provided under the standard MIT License (below).

Copyright (c) 2011, The President and Fellows of Harvard College

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.*/

*AUTHORS OF THIS PROGRAM:;

*RAY GRINER, PROGRAMMER, AND JUDITH LOK, ASSISTANT PROFESSOR OF BIOSTATISTICS;

*HARVARD SCHOOL OF PUBLIC HEALTH, DEPARTMENT OF BIOSTATISTICS.;

* . .;

*

*******************************************************************;

* File: nonopt_macro_public.sas

* Version: 1.0

* Date: March, 2011

* Description: Macro for calculating non-optimal estimates of the

* psis. The important output dataset is BOOTPLOTDATA. This is

* ONE RECORD PER MONTH PER BOOTSTRAP REPLICATE. The estimate of

* the outcome for the given month is on the dataset. The

* estimates of psi are also on the dataset. These are the same

* for every month for a given replicate. The macro automatically

* estimates psis using three models: a two parameter model (first

* two of the four parameters), a three parameter model (first three

* of the four parameters), a four parameter model. The psis are

* named as follows:

* psihat21 = Estimate of psi_1 for two-parameter model,

* psihat22 = Estimate of psi_2 for two-parameter model,

* psihat31 = Estimate of psi_1 for three-parameter model, etc...

*******************************************************************;

********************************************************************;

* This macro is used to calculate the Bootstrap confidence intervals;

* for the three different models used in the paper ;

* OUTLIB = Output libname for the SAS datasets. ;

* DATA = Input dataset ;

* NUMREPS = # of replicates for the bootstrap ;

* Q1 - Q4 = Elements of the q vector ;

* ECHOREP = Echo a message to the terminal as each bootstrap ;

* replicate is run? (Y/N). Only choose Y on UNIX/Linux ;

* OUTCOME = Outcome ;

* MAXWGT = Maximum value of the censoring weight to handle inform- ;

* ative dropouts. Weights exceeding this value are truncated, and ;

* a warning is printed to the log ;

* REGTYPE = Create doubly robust estimators (DR) or not? (NONDR) ;

* PARAM4 = (TRT2, LVLTR, LVLAK11) - Indicates whether the ;

* fourth parameter of the model is Trt2, LVL*Tr, or LVL*(A_k+11) ;

* For whichever is not set, the program will not calculate the ;

* predicted models associated with these variables and will ;

* therefore run faster ;

* TRT_MODEL_VARS - List of variables used as a predictor of Trt, ;

* Trt^2, LVL*Trt, LVL*A_k+11, and the outcome. This macro ;

* implements the simplest case where the same variables are used ;

* as predictors in a number of different models. If different ;

* predictors are desired for different models, the macro can be ;

* modified to incorporate this. ;

* LASTMONTH_MODEL_VARS - List of variables used to predict whether ;

* a given month is the last observed for the patient. ;

* FIRSTTRT_MODEL_VARS - List of variables used to predict the ;

* probability of treatment initiation ;

********************************************************************;

%macro nonopt_macro(OUTLIB=, DATA=,

NUMREPS=2, MAXWGT=15, PARAM4=,

ECHOREP=N, REGTYPE=,

OUTCOME=, TRT_MODEL_VARS=, LASTMONTH_MODEL_VARS=, FIRSTTRT_MODEL_VARS=);

%local ISERR INC_TRT2 INC_LVLTR INC_LVLAK11;

%let INC_TRT2=N; %let INC_LVLTR=N; %let INC_LVLAK11=N;

%let ISERR=0;

********************************************************************;

* Set elements of the Q vector. The analyst can change this if s/he

* believes another model is appropriate, but note that the corresponding

* TREFF macro variable should also be changed ;

********************************************************************;

%let Q1=(12*(1-predtrt));

%let Q2=(12*(month-pred_TtimesTrtK));

%let Q3=(12*(month*month-pred_T2timesTrtK));

%if &PARAM4=KTRT %then %let Q4=(12*(1-predtrt)*month);

%else %if &PARAM4=TRT2 %then %let Q4=(144*(1-predtrt2));

%else %if &PARAM4=LVLTR %then %let Q4=(12*(lvl-predrnatr));

%else %if &PARAM4=LVLAK11 %then %let Q4=(lvl-pred_lvlk11);

%else %put %upcase(error): PARAM4 (&PARAM4) not valid;;

%let INC_&PARAM4=Y;

%macro check_var_exists(var);

%if %length(&VAR)=0 %then %do;

%put %upcase(error): Parameter (&VAR) must not be blank!;

%let ISERR=1;

%end;

%mend check_var_exists;

%check_var_exists(OUTLIB);

%check_var_exists(OUTCOME);

%check_var_exists(REGTYPE);

%check_var_exists(PARAM4);

%check_var_exists(TRT_MODEL_VARS);

%check_var_exists(LASTMONTH_MODEL_VARS);

%check_var_exists(FIRSTTRT_MODEL_VARS);

%if &ISERR=1 %then %goto exit;

%if &REGTYPE=NONDR %then %do;

%let HY=(_outcome);

%let TREFF1=(12*(trt));

%let TREFF2=(12*TtimesTrtK);

%let TREFF3=(12*T2timesTrtK);

%if &PARAM4=KTRT %then %let TREFF4=(12*(trt)*month);

%else %if &PARAM4=TRT2 %then %let TREFF4=(144*(trt2));

%else %if &PARAM4=LVLTR %then %let TREFF4=(12*lvl_t_trt);

%else %if &PARAM4=LVLAK11 %then %let TREFF4=(lvl_T_T_lt_kp12);;

%end;

%else %if &REGTYPE=DR %then %do;

%let HY=(_outcome-pred_outcome_akm1);

%let TREFF1=(12*(trt-predtrt_akm1));

%let TREFF2=(12*(TtimesTrtK-pred_TtimesTrtK_akm1));

%let TREFF3=(12*(T2timesTrtK-pred_T2timesTrtK_akm1));

%if &PARAM4=KTRT %then %let TREFF4=(12*(trt-predtrt_akm1)*month);

%else %if &PARAM4=TRT2 %then %let TREFF4=(144*(trt2-predtrt2_akm1));

%else %if &PARAM4=LVLTR %then %let TREFF4=12*(lvl_t_trt-predrnatr_akm1);

%else %if &PARAM4=LVLAK11 %then %let TREFF4=(lvl_T_T_lt_kp12-pred_lvlk11_akm1);;

%end;

%else %do;

%put %upcase(error): REGTYPE [&REGTYPE] must equal DR or NONDR;

%goto exit;

%end;

*******************************************************************;

* This macro and the following dataset stores in the output library

* the values the input parameters were set to.

*******************************************************************;

%macro svvar(MVAR);

name="&MVAR"; value="&MVAR"; output;

%mend svvar;

data &OUTLIB..config;

length name $30. value $200.;

%svvar(OUTLIB); %svvar(NUMREPS); %svvar(MAXWGT);

%svvar(Q1); %svvar(Q2); %svvar(Q3); %svvar(Q4);

%svvar(TREFF1); %svvar(TREFF2); %svvar(TREFF3); %svvar(TREFF4);

%svvar(ECHOREP); %svvar(OUTCOME); %svvar(REGTYPE); %svvar(PARAM4);

%svvar(INC_TRT2); %svvar(INC_LVLTR); %svvar(INC_LVLAK11);

%svvar(TRT_MODEL_VARS); %svvar(LASTMONTH_MODEL_VARS); %svvar(FIRSTTRT_MODEL_VARS);

run;

*******************************************************************;

* Do all printing in this macro to make it easy to turn on and off ;

* Currently only set to print when REP=0 (i.e. on the actual data;

* set, but not on any of the bootstrap replicates ;

*******************************************************************;

%macro printset(data=, obs=, vars=, wherest=, title2=);

%if &REP=0 %then %do;

proc print data=&DATA %if %length(&OBS) %then (obs=&OBS) ;;

%if %length(&VARS) %then var &VARS ;;

%if %length(&WHEREST) %then where (&WHEREST) ;;

title "%upcase(&DATA)";

%if %length(&TITLE2) %then title2 "&TITLE2" ;;

run;

title;

%end;

%mend printset;

* Get a dataset of unique patids;

data bootstrap;

set &DATA (keep=patid);

by patid;

if first.patid;

run;

*******************************************************************************;

* Create new dataset that resamples with replacement from our original patids *;

*******************************************************************************;

data bootsamp;

do sampnum=1 to &numreps;

do i=1 to nobs;

x=ceil(ranuni(12)*nobs);

set bootstrap nobs=nobs point=x;

newpatid=i;

output;

end;

end;

stop;

run;

****************************************************************************;

* This macro does everything for each replicate one at a time. Note that

* many of the procedures within the macro still do things "by sampnum",

* which really isnt necessary since all records on BOOTANALDATA will have

* the same sampnum. (The reason for this is I first tried doing things

* "by sampnum" and ran into some SAS errors regarding resource constraints.)

****************************************************************************;

%macro dobootstrap();

%local rep;

%let rep=0;

*********************************************************************************;

* REP=0 is the actual dataset and REP=1 to numreps are the bootstrap replicates *;

*********************************************************************************;

%do rep=0 %to &numreps;

%if &REP=0 %then %do;

data bootanaldata;

set &DATA;

sampnum=0;

run;

** PRINTIND is a macro variable set to noprint for REP>0;

%let PRINTIND=;

%end;

%else %do;

options nomprint nonotes;

*********************************************************;

** PRINTIND is a macro variable set to noprint for REP>0;

** It is used to turn off the PROC REG/LOGISTIC output, which can create a

** large LST file if running a large number of reps;

*********************************************************;

%let PRINTIND=noprint;

********************************************************************************;

* Create new analysis dataset. Need to do it in PROC SQL, because ANALYSISDATA

* and BOOTSAMP both have more than one record per patid.

********************************************************************************;

proc sql;

create table bootanaldata as

select a.*, b.newpatid, b.sampnum

from &DATA a, bootsamp (where=(sampnum=&REP)) b

where a.patid=b.patid

order by sampnum, newpatid, month;

quit;

data bootanaldata;

set bootanaldata (drop=patid);

rename newpatid=patid;

run;

%end;

*************************************************************************************;

** Write a line to the log so I can see where different bootstrap replicates start **;

*************************************************************************************;

data _null_;

file print;

put "------";

put "REP=&REP";

put "------";

run;

%printset(data=&DATA, obs=100, vars=patid month &TRT_MODEL_VARS &LASTMONTH_MODEL_VARS &FIRSTTRT_MODEL_VARS);

* Add some covariates used to calculate p(nodropout) and p(treated);

data analysisdata;

set bootanaldata;

if (dayssincelastvisit=.) then dayssincelastvisit=0;

if (month<=23);

dayssincelastvisitsqrd=dayssincelastvisit*dayssincelastvisit;

dayssincelastvisitxlnx=0;

if (dayssincelastvisit>0) then dayssincelastvisitxlnx=dayssincelastvisit*log(dayssincelastvisit);

if (month=0) then monthxlnx=0;

if (month>0) then monthxlnx=month*log(month);

run;

*******************************************************************;

* Get probability of not dropping out. (We have not specified DESCENDING, so this is modeling the

* probability lastobservedmonth=0, i.e. not dropping out at this month.

*******************************************************************;

proc logistic data=analysisdata &PRINTIND;

model lastobservedmonth=&LASTMONTH_MODEL_VARS;

output out=analysisdata p=p_nodropout;

run;

%printset(data=analysisdata, obs=100, vars=patid sampnum &LASTMONTH_MODEL_VARS, wherest=%str(p_nodropout=.));

************************************************;

** Estimate probability of treatment initiation ;

************************************************;

proc logistic data=analysisdata descending &PRINTIND;

by sampnum;

model firsttreated=&FIRSTTRT_MODEL_VARS;

output out=p_treated p=p_treated;

where ((treated=0 or firsttreated=1) and month<=12);

run;

%printset(data=p_treated, obs=100, vars=sampnum patid &FIRSTTRT_MODEL_VARS, wherest=%str(p_treated=.));

data ps;

merge analysisdata (in=a)

p_treated (in=b keep=sampnum patid month p_treated);

by sampnum patid month;

if not a then put "ERROR: not a! " sampnum= patid= month=;

if month>23 then put "ERROR: month>23 ! " sampnum= patid= month=;

run;

**********************************************************************************;

* Create variables nodrop_0-nodrop_23. ;

* nodrop_j=Pr(Month j is not last month|past)=Pr(Not censored at month (j+1)|past);

**********************************************************************************;

proc transpose data=ps out=nodrops(drop=_name_) prefix=nodrop_;

by sampnum patid;

var p_nodropout;

id month;

run;

****************************************************************************************;

* This is a little tricky to get Wk1. Only [] is used to refer to the elements of arrays.

* nodrop_j = Pr(Not censored at month (j+1)|past)

* nodrop_arr[i] = nodrop_(i-1) {b/c nodrop_0 is 1st element of array, nodrop_1 is 2nd element, ...}

* = Pr(Not censored at month i|past)

* So when we divide by nodrop_arr[i] from i=k+1 to i=k+12, we are dividing by

* Pr(Not censored at month (k+1)|past) through Pr(Not censored at month (k+12)|past),

* which is what we want.

****************************************************************************************;

data ps;

merge ps

nodrops;

by sampnum patid;

if (month<=12);

array nodrop_arr nodrop_0-nodrop_24;

Wk1new=1;

*************************************************************************************;

* This is the new weight, using the product from (k+1) to (k+12).

*************************************************************************************;

do i=month+1 to month+12;

if nodrop_arr[i] ne . then Wk1new=Wk1new/nodrop_arr[i];

end;

if Wk1new&MAXWGT then do;

put "WARNING: Truncated weight at &MAXWGT. " sampnum= patid= month= Wk1new=;

Wk1new=&MAXWGT;

end;

drop nodrop_:;

run;

*Weights range from 1.016 to 7.94, median 1.45;

*For bootstrap, trim weights at say 10;

*but check how that affects the resulting SEs;

%if &REP=0 %then %do;

proc means data=ps min p25 median p75 max;

class sampnum;

var Wk1new;

run;

%end;

data predtrtandtrt2;

set ps;

if (monthy ne .);

if (treated=0 or firsttreated=1);

_outcome=(&OUTCOME);

*********************************************************;

* The next four lines define some redundant variables.

* This helps in debugging because I can compare side-by-side with the macro

* that uses the optimal qs (which uses these variables).

*********************************************************;

k=month;

trt_k=trt;

trt2=trt*trt;

treated_m=treated;

if trt_k ne 0 then TrtKne0=1;

else TrtKne0=0;

trt2_k=trt_k*trt_k;

if treated_m=1 then Ameq0=0;

else if treated_m=0 then Ameq0=1;

if treated_m=0 and trt_k ne 0 then Ameq0TrtKne0=1;

else Ameq0TrtKne0=0;

**************************************************;

* Added for newpsi4 - rgg

**************************************************;

if month_firsttrt=. then do;

TtimesTrtK=0;

T2timesTrtK=0;

end;

else do;

TtimesTrtK=trt_k*month_firsttrt;

T2timesTrtK=trt_k*month_firsttrt*month_firsttrt;

end;

ksqrd=k*k;

%if &INC_LVLTR=Y %then %do;

if trt_k=0 then lvl_t_trt_k=0;

else lvl_t_trt_k=trt_k*lvl_firsttrt;

lvl_t_trt=lvl_t_trt_k;

%end;

%if &INC_LVLAK11=Y %then %do;

*lvl_T_T_lt_kp12=lvl_firsttrt*TrtKne0;

if (month_firsttrt=. or lvl_firsttrt=.) then lvl_T_T_lt_kp12=0;

else lvl_T_T_lt_kp12=(month_firsttrt<k+12)*lvl_firsttrt;

if (lvl_T_T_lt_kp12>0) ne (TrtKne0) then put "ERROR: something wrong! ";

%end;

run;

proc logistic data=predtrtandtrt2 descending &PRINTIND;

by sampnum;

model TrtKne0=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=p_TrtKne0;

** Build the model only on patients with TREATED_M=0, but apply to everyone;

weight Ameq0;

run;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

model trt_k=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=predtrtcondno0_direct;

** Build the model only on patients with TREATED_M=0 and TRT_K ne 0;

weight Ameq0Trtkne0;

run;

quit;

************************************************;

** rgg - Added for newpsi4;

************************************************;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

model TtimesTrtK=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_TtimesTrtK_condno0;

** Build the model only on patients with TREATED_M=0 and TRT_K ne 0;

weight Ameq0Trtkne0;

run;

quit;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

model T2timesTrtK=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_T2timesTrtK_condno0;

** Build the model only on patients with TREATED_M=0 and TRT_K ne 0;

weight Ameq0Trtkne0;

run;

quit;

data predtrtandtrt2;

set predtrtandtrt2;

predtrt =p_TrtKne0*predtrtcondno0_direct;

pred_TtimesTrtK =p_TrtKne0*pred_TtimesTrtK_condno0;

pred_T2timesTrtK=p_TrtKne0*pred_T2timesTrtK_condno0;

run;

%printset(data=predtrtandtrt2, obs=100, vars=patid month predtrt month_firsttrt trt pred_TtimesTrtK pred_T2timesTrtK);

%if &REP=0 %then %do;

proc univariate data=predtrtandtrt2;

var predtrt;

*histogram predtrt;

run;

%end;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

model trt2_k=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=predtrt2condno0_direct;

** Build regression on TREATED_M=0 and TRT_K ne 0;

weight Ameq0TrtKne0;

run;

quit;

data predtrtandtrt2;

set predtrtandtrt2;

predtrt2=p_TrtKne0*predtrt2condno0_direct;

run;

%printset(data=predtrtandtrt2, obs=200, vars=_all_);

*****************************************************;

* Calculate predicted RNA(T)*Tr ;

*****************************************************;

%if &INC_LVLTR=Y %then %do;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

model lvl_t_trt_k = &TRT_MODEL_VARS;

weight Ameq0TrtKne0;

output out=predtrtandtrt2 p=p_rnatr_condno0;

run;

data predtrtandtrt2;

set predtrtandtrt2;

predrnatr=p_rnatr_condno0*p_TrtKne0;

run;

%printset(data=predtrtandtrt2, obs=100, vars=patid month lvl treated_m predrnatr lvl_t_trt_k);

%end;

*****************************************************;

* Calculate predicted E(lvl_T*A_k+11|A_m=0) ;

*****************************************************;

***************************;

* Get Pr(A_k+11=1|A_m=0) **;

***************************;

%if &INC_LVLAK11=Y %then %do;

proc reg data=predtrtandtrt2 &PRINTIND;

weight Ameq0TrtKne0;

model lvl_T_T_lt_kp12=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_lvl_T_cond_Tltkp12;

run;

data predtrtandtrt2;

set predtrtandtrt2;

pred_lvlk11=p_TrtKne0*pred_lvl_T_cond_Tltkp12;

run;

%end;

******************************************************;

* These are needed for the doubly robust estimators ;

* Unlike the above models, which are built using ;

* patients with treatment history up to A_k=0, these;

* are built using patients with treatment history up;

* to A_k-1=0, i.e. all obs in our dataset ;

******************************************************;

proc logistic data=predtrtandtrt2 descending &PRINTIND;

by sampnum;

model TrtKne0=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=p_nonzero_akm1;

run;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

weight TrtKne0;

model trt_k=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=predtrtcondno0_direct_akm1;

run;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

weight TrtKne0;

model TtimesTrtK=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_TtimesTrtK_condno0_akm1;

run;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

weight TrtKne0;

model T2timesTrtK=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_T2timesTrtK_condno0_akm1;

run;

%if &INC_LVLTR=Y %then %do;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

weight TrtKne0;

model lvl_t_trt_k = &TRT_MODEL_VARS;

output out=predtrtandtrt2 p=p_rnatr_condno0_akm1;

run;

%end;

%if &INC_TRT2=Y %then %do;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

weight TrtKne0;

model trt2_k=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=predtrt2condno0_direct_akm1;

run;

%end;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

model _outcome=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_outcome_akm1;

run;

%if &INC_LVLAK11=Y %then %do;

proc reg data=predtrtandtrt2 &PRINTIND;

by sampnum;

weight TrtKne0;

model lvl_T_T_lt_kp12=&TRT_MODEL_VARS;

output out=predtrtandtrt2 p=pred_lvl_T_cond_Tltkp12_akm1;

run;

%end;

data predtrtandtrt2;

set predtrtandtrt2;

predtrt_akm1 =p_nonzero_akm1*predtrtcondno0_direct_akm1;

pred_TtimesTrtK_akm1 = p_nonzero_akm1*pred_TtimesTrtK_condno0_akm1;

pred_T2timesTrtK_akm1 = p_nonzero_akm1*pred_T2timesTrtK_condno0_akm1;

%if &INC_TRT2=Y %then %do;

predtrt2_akm1 =p_nonzero_akm1*predtrt2condno0_direct_akm1;

%end;

%if &INC_LVLTR=Y %then %do;

predrnatr_akm1=p_nonzero_akm1*p_rnatr_condno0_akm1;

%end;

%if &INC_LVLAK11=Y %then %do;

pred_lvlk11_akm1=p_nonzero_akm1*pred_lvl_T_cond_Tltkp12_akm1;

%end;

run;

%if &REP=0 %then %do;

data &OUTLIB..savepred;

set predtrtandtrt2;

run;

proc means data=predtrtandtrt2 n nmiss;

title "Check for missing";

var _numeric_;

run;

%end;

%let DIMPSI=4;

%macro make_matrix();

%local QINDEX TRINDEX;

data matrix;

set predtrtandtrt2;

%do QINDEX=1 %to &DIMPSI;

vec&QINDEX=Wk1new*(treated-p_treated)*&HY*&Q&QINDEX;

%do TRINDEX=1 %to &DIMPSI;

mat&QINDEX.&TRINDEX=Wk1new*(treated-p_treated)*&Q&QINDEX*&TREFF&TRINDEX;

%end;

%end;

run;

proc means data=matrix;

by sampnum;

var vec1-vec4 mat11-mat14 mat21-mat24 mat31-mat34 mat41-mat44;

output out=matrixmean mean=;

title "MATRIX";

run;

%mend make_matrix;

%make_matrix;

%macro renamemm;

data matrixmean;

set matrixmean;

%local row col;

%do row=1 %to &DIMPSI;

rename vec&ROW=meanvec&ROW;

%do col=1 %to &DIMPSI;

rename mat&ROW.&COL=meanmat&ROW.&COL;

%end;

%end;

run;

%mend renamemm;

%renamemm;

data vectormean (keep=sampnum meanvec1-meanvec4)

matrixmean (drop=meanvec1-meanvec4);

set matrixmean;

run;

%let FIRSTSAMP=0;

proc sql noprint;

select max(sampnum) into :LASTSAMP from bootanaldata;

quit;

%if &ECHOREP=Y %then %do;

data _null_;

call system("echo Running sample: &REP \- &OUTLIB");

run;

%end;

data vectormean_i; set vectormean; where sampnum=&REP; run;

data matrixmean_i; set matrixmean; where sampnum=&REP; run;

*********************************************************;

* This macro solves the estimating equations in PROC IML ;

* The parameter NUM is the number of psis in the model ;

*********************************************************;

%macro solveeqs(num=);

%local row col;

proc iml;

use vectormean_i;

read all var {%do ROW=1 %to &NUM; meanvec&ROW %end;} into vector;

vector=t(vector);

%if &REP=0 %then print vector;;

close vectormean_i;

use matrixmean_i;

%do ROW=1 %to &NUM;

%do COL=1 %to &NUM;

read all var {meanmat&ROW.&COL} into m&ROW.&COL;

%end;

%end;

matrix=j(&NUM,&NUM,0);

*print matrix;

%do ROW=1 %to &NUM;

%do COL=1 %to &NUM;

matrix[&ROW, &COL]=m&ROW.&COl;

%end;

%end;

%if &REP=0 %then print matrix;;

close matrixmean_i;

psi=solve(matrix,vector);

*Check the difference with calculating inverse of matrix;

invmatrix=inv(matrix);

check=invmatrix*vector-psi;

%if &REP=0 %then %do;

print check;

print psi;

%end;

psi=t(psi);

detmat&NUM=det(matrix);

eigenvalues&NUM = eigval(matrix);

%if &REP=0 %then %do;

print psi;

print invmatrix;

print detmat&NUM;

print eigenvalues&NUM;

%end;

create psihat&NUM from psi [colname={%do ROW=1 %to &NUM; "psihat&NUM.&ROW" %end; }];

append from psi;

close psihat&NUM;

*End of proc IML;

abort;

%mend solveeqs;

%solveeqs(num=4);

%solveeqs(num=3);

%solveeqs(num=2);

*%printset(data=psihat6);

* Add a dummy variable so we have something to merge on;

data psihat4; set psihat4; dummy=1; run;

data psihat3; set psihat3; dummy=1; run;

data psihat2; set psihat2; dummy=1; run;

data psihat_i (drop=dummy);

merge psihat4 psihat3 psihat2;

by dummy;

if _N_>1 then put "ERROR: N>1";

run;

data psihat;

set %if &REP&FIRSTSAMP %then psihat ; psihat_i (in=a);

if a then sampnum=&REP;

run;

%end; /* do loop over REP=number of bootstrap samples */

%mend dobootstrap;

%dobootstrap;

options mprint notes;

data plotdata;

do sampnum=0 to &numreps;

do month=0 to 12;

output;

end;

end;

run;

data plotdata;

merge plotdata psihat;

by sampnum;

* If unit were years, this would be 1;

factor=12;

effectJRE2_12=factor*psihat21+psihat22*factor*factor*month/12;

label effectJRE2_12="Estimated treatment effect; 2 psi's";

effectJRE3_12=factor*psihat31+psihat32*factor*factor*month/12+psihat33*factor*factor*factor*month*month/144;

label effectJRE3_12="Estimated treatment effect; 3psi's";

effectJRE4_12=factor*psihat41+psihat42*factor*factor*month/12+psihat43*factor*factor*factor*month*month/144+psihat44*factor*factor;

label effectJRE4_12="Estimated treatment effect 4-par nonlinear in duration, if estimated";

label psihat21="Estimate of psi_1 in two parameter model"

psihat22="Estimate of psi_2 in two parameter model"

psihat31="Estimate of psi_1 in three parameter model"

psihat32="Estimate of psi_2 in three parameter model"

psihat33="Estimate of psi_3 in three parameter model"

psihat41="Estimate of psi_1 in four parameter model"

psihat42="Estimate of psi_2 in four parameter model"

psihat43="Estimate of psi_3 in four parameter model"

psihat44="Estimate of psi_4 in four parameter model"

sampnum ="Bootstrap replicate (0=original data)"

month ="Month";

drop factor;

run;

*Number of patients with a visit;

data analysisdata;

set &DATA;

if (treated=0 or firsttreated=1);

if (month<=12);

run;

proc sort data=analysisdata;

by month;

run;

proc means data=analysisdata;

by month;

var cd4;

output out=visits mean=meancd4 N=Nvisits;

run;

data &OUTLIB..bootplotdata;

set plotdata;

run;

proc sort data=plotdata; by month; run;

proc summary data=plotdata std min p5 median p95 max;

by month;

var effectJRE:;

output out=plotdata_summ mean= std= p5= median= p95= / autoname;

** only > 0 since sampnum=0 is the original dataset;

where sampnum>0;

run;

%let alphalev = .05;

%let a1 = %sysevalf(&alphalev/2*100);

%let a2 = %sysevalf((1 - &alphalev/2)*100);

* creating confidence interval, percentile method;

%let effectlist=effectJRE2_12 effectJRE3_12 effectJRE4_12;

proc univariate data = plotdata alpha = .05;

by month;

var &EFFECTLIST;

output out=pmethod mean = meaneffect pctlpts=&a1 &a2 pctlpre = &EFFECTLIST pctlname = _lb _ub ;

where sampnum>0;

run;

data plotdata;

merge plotdata_summ visits plotdata (where=(sampnum=0)) pmethod;

by month;

drop sampnum;

run;

proc print data=plotdata (drop=psihat:);

title "FINAL RESULTS";

run;

%exit:

%mend nonopt_macro;