*---------------------------- GEESIZE
------------------------------+
GEESIZE version 3.1 is a collection of
PROC IML routines designed to compute
the minimum sample sizes required in
preparation for an applied
research effort using correlated dependent
data. It is based on the macro GEESIZE,
version 2.1 by James Rochon, Ph.D., and has
been extended by Dipl.-Stat. Gerlinde
Dahmen to allow for an
independent working correlation structure.
For a detailed explanation see the
documentation file 'geesize3_1.txt'.
Disclaimer
----------
This software is provided by James
Rochon, Ph.D., and Dipl.-Stat. Gerlinde Dahmen as
a public service. It is provided 'as
is'. There are no warranties, expressed or implied,
as to merchantability or fitness for a
particular purpose or regarding the accuracy of
the materials or code contained herein.
************************************************************************
Date:
01/03/03 (Version 3.1)
Author:
James Rochon and Gerlinde Dahmen
Institut
fuer Medizinische Biometrie und Statistik
Universitaet
zu Luebeck, Deutschland
Original
version: Version 2.1
Date:
Author: James Rochon, Ph.D.
The
***********************************************************************;
*----------------------------- GEESIZE
----------------------------|
| The following program can be used to invoke the
macro. |
*--------------------------------------------------------------------|
*
* PROC IML;
*
*
%INCLUDE 'C:\...\GeeSize.sas' / NOSOURCE2;
*
* RUN BEGIN;
* _MU_ = 0; /* (SxT) matrix of expected
values */
* _STD_ = 1; /* (SxT) standard deviations */
* _PI_ = 1; /* (1xS) relative sizes in sub
pops */
* _TAU_ = 1; /* (SxT) prob of first t time points */
*
_TIMES_ = 0; /* (1xT) vector of time points */
* _RHO_ = 0; /* (scalar) autocorrelation */
* _PSI_ = 1; /* (scalar) damping parameter */
* _PHI_ = 1; /* (scalar) scale parameter */
* _X_ = 0; /* (STxr) design matrix */
* _H_ = 0; /* (hxr) hypothesis matrix */
* _h0_ = 0; /* (hx1) vector of constant
terms */
*
_LINK_ = 'Identity'; /* (Scalar) link function
{'Identity',
* 'Log', 'Recip', 'Logit', 'CLoglog' */
*
_VARI_ = 'Gaussian'; /* (Scalar) variance
function
* 'Gaussian', 'Poisson', 'Gamma' or
* 'Bernoulli'. */
*
*
_TYPE_I = 0.05;
/* Type I error rate */
*
_TYPE_II = 0.20; /* Type II error rate */
*
_PRINT_ = 1; /* Level of detail in printout */
* _FLAG_H = 0; /* (Scalar) header
flag */
*
*
_TNAME_ = { 'Time 1' 'Time 2'
'Time 3' 'Time 4' 'Time 5'
* 'Time 6' 'Time 7' 'Time 8'
'Time 9' 'Time 10'
* 'Time 11' 'Time 12' 'Time 13'
'Time 14' 'Time 15'
* 'Time 16' 'Time 17' 'Time 18' 'Time 19'
'Time 20' };
*
*
_SNAME_ = { 'SubPop 1' 'SubPop 2'
'SubPop 3' 'SubPop 4' 'SubPop 5'
* 'SubPop 6' 'SubPop 7' 'SubPop 8'
'SubPop 9' 'SubPop 10'
* 'SubPop 11' 'SubPop 12' 'SubPop 13'
'SubPop 14' 'SubPop 15'
* 'SubPop 16' 'SubPop 17' 'SubPop 18'
'SubPop 19' 'SubPop 20' };
*
* RUN
GEESIZE;
*
* quit;
*--------------------------------------------------------------------|
| Output description
|
*--------------------------------------------------------------------|
|
|
| _N_ ... (1x1)
The minimum sample size in EACH subpopulation.
|
| |
| _TOT_N ... (1x1)
The minimum TOTAL sample size across all sub-
|
| subpopulations
for the entire study. |
|
|
| _DF_HYP ... (1x1) The hypothesis degrees of freedom for
the Wald |
| chi-square
test statistic.
|
|
|
| _CRIT_ ... (1x1)
The critical value for significance from the
|
| Wald chi-square test
statistic. |
|
|
| _NC_ ... (1x1)
The value of the tablulated non-centrality
|
| parameter
from the Wald chi-square test statistic.
|
|
|
| _BETA_N ... (1x1) The computed type-II error rate at
the derived |
| sample size from the Wald
chi-square test statistic. |
|
|
| _STATS_ ... (1x9) An output vector gathering the most
important |
| input
and output parameters. It consists
of: |
| _RHO_, _PSI_, _PHI_, _N_, _TOT_N,
_DF_HYP, _CRIT_, |
| _NC_, and _BETA_N. |
|
|
+--------------------------------------------------------------------;
START BEGIN;
/* Initialize some
matrices at the beginning of the code
*/
_BLANK_ = ' '; /* (Scalar) blank character */
_MU_ = 0; /* (SxT) matrix of expected values */
_STD_ = 1; /* (SxT) standard deviations */
_PI_ = 1; /* (1xS) relative sizes in sub pops */
_TAU_ = 1; /* (SxT) prob of first t time points */
_TIMES_ = 0; /* (1xT) vector of time points */
_RHO_ = 0; /* (scalar) autocorrelation */
_PSI_ = 1; /* (scalar) damping parameter */
_PHI_ = 1; /* (scalar) scale parameter */
_X_ = 0; /* (STxr) design matrix */
_H_ = 0; /* (hxr) hypothesis matrix */
_h0_ = 0; /* (hx1) vector of constant terms */
_LINK_ = 'Identity'; /* (Scalar)
link function */
_VARI_ = 'Gaussian'; /* (Scalar)
variance function */
_TYPE_I = 0.05;
/* Type I error rate */
_TYPE_II = 0.20; /* Type II error rate */
_PRINT_ = 1; /* Level of detail in printout */
_FLAG_H = 0; /* (Scalar) header
flag */
_TNAME_ = { 'Time 1' 'Time
2' 'Time 3'
'Time 4' 'Time 5'
'Time 6'
'Time 7' 'Time 8'
'Time 9' 'Time 10'
'Time 11' 'Time 12' 'Time 13' 'Time 14' 'Time 15'
'Time 16' 'Time 17' 'Time 18' 'Time 19' 'Time 20' };
_SNAME_ = { 'SubPop 1' 'SubPop
2' 'SubPop 3' 'SubPop 4' 'SubPop 5'
'SubPop 6' 'SubPop 7' 'SubPop 8' 'SubPop 9' 'SubPop 10'
'SubPop 11' 'SubPop 12' 'SubPop 13' 'SubPop 14' 'SubPop 15'
'SubPop 16' 'SubPop 17' 'SubPop 18' 'SubPop 19' 'SubPop 20' };
RESET NONAME NOAUTONAME; /* Reset some options for IML */
FINISH; /* End of the BEGIN subroutine */
*---------------------------- HEADER
------------------------------;
START HEADER;
PRINT ,,
'+--------------------------------------------------------+',
'|
|',
'| G E E S I Z E |',
'|
|',
'|
Application of GEE Procedures to |',
'| |',
'|
Sample Size Calculations for |',
'|
|',
'|
Repeated Measures Experiments |',
'| |',
'|
|',
'|
V E R S I O N 3
. 1 |',
'| |',
'|
Copyright (c) 1999 and 2003 |',
'|
|',
'|
By James Rochon, Ph.D.,
|',
'| and Dipl.-Stat. Gerlinde
Dahmen |',
'|
|',
'+--------------------------------------------------------+';
_FLAG_H = 1; /* Trip the header flag */
FINISH;
/* End of the HEADER subroutine */
*---------------------------- DIVIDER
------------------------------;
START DIVIDER;
PRINT ,, '=
= = = = = = = = = = = = = = = = = = = = = =';
FINISH; /* End of the DIVIDER Subroutine */
*----------------------------- CHECKING
----------------------------;
START CHECKING;
/* Print out the header
and make it look impressive! */
IF _FLAG_H = 0
THEN RUN HEADER;
/* Some
basic housekeeping operations.
*/
E_FLAG = 0; /* Error flag */
_S_ = NROW(_MU_); /* No. subpopulations */
_T_ = NCOL(_MU_); /* No. time points
*/
_ST_ = _S_#_T_;
_LINK_ = UPCASE(COMPRESS(_LINK_));
IF (NROW(_LINK_)#NCOL(_LINK_)) > 1 THEN DO;
PRINT ,'CHECKING: The _LINK_ function is not of the correct',
'dimension. Only the (1,1) element will be used.';
_LINK_ = _LINK_[1,1];
END;
CHECK = _LINK_='IDENTITY' | _LINK_='LOGIT'
|
_LINK_='LOG' | _LINK_='RECIP'
|
_LINK_='CLOGLOG';
IF CHECK = 0 THEN DO;
E_FLAG = E_FLAG + 1;
/* Trip the error flag */
PRINT ,'CHECKING: The _LINK_ function is not',
'one of the appropriate
types';
END;
IF _MU_ = 0 THEN DO;
E_FLAG = E_FLAG + 1;
PRINT ,'CHECKING: The matrix of expected values _MU_',
'has not been specified';
END;
IF _LINK_ = 'LOGIT' | _LINK_ = 'CLOGLOG' THEN DO;
IF
ANY(_MU_ <= 0)
| ANY(_MU_ >= 1) THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _MU_ matrix takes on
inadmissible',
'values. 0 < _MU_ < 1 are the only appropriate
values';
END;
END;
ELSE IF _LINK_ = 'LOG'
| _LINK_ = 'RECIP' THEN DO;
IF
ANY(_MU_) <= 0
THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _MU_ matrix takes on
inadmissible',
'values. _MU_ > 0 are the only appropriate values';
END;
END;
IF (NROW(_PHI_)#NCOL(_PHI_)) > 1 THEN DO;
PRINT ,'CHECKING: The _PHI_ parameter should be a scalar.',
'Only the (1,1) element will
be used.';
_PHI_ = _PHI_[1,1];
END;
_VARI_ = UPCASE(COMPRESS(_VARI_));
IF (NROW(_VARI_)#NCOL(_VARI_)) > 1 THEN DO;
PRINT ,'CHECKING: The _VARI_ function is not
of the',
'correct dimension. Only the
(1,1) element will be used.';
_VARI_ = _VARI_[1,1];
END;
CHECK = _VARI_='GAUSSIAN' | _VARI_='POISSON' |
_VARI_='GAMMA' | _VARI_='BERNOULLI';
IF CHECK = 0 THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _VARI_ function is
not',
'one of the appropriate
types';
END;
IF _VARI_='GAUSSIAN' THEN DO;
IF ANY(_STD_ <= 0) THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: _STD_ contains invalid values.',
'_STD_ > 0 are the only legitimate values.';
END;
IF (NROW(_STD_)#NCOL(_STD_)
= 1) THEN /* Convert scalar */
_STD_ = J(_S_,_T_,_STD_); /* to (SxT) matrix */
ELSE
DO;
IF
NROW(_STD_) ^= _S_ | NCOL(_STD_) ^= _T_ THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the error flag
*/
PRINT ,'CHECKING: _STD_ is not of the correct dimension.',
'It should be a scalar or an (SxT) matrix.';
END; /* Of ELSE loop
*/
IF (ANY(_STD_
^= 1) & _PHI_ ^= 1) THEN
PRINT 'CHECKING: Note that you have specified values for',
'both variance parameters,
_STD_ AND _PHI_. You may',
'want to use just one of
these parameters';
END; /* Of IF(ANY ... loop */
END; /* Of IF _VARI_ loop
*/
IF _X_=0 THEN DO;
IF
_PRINT_ >= 1 THEN
PRINT 'CHECKING: The design matrix _X_ has not been specified.',
'The one-way ANOVA between-subjects design is assumed';
_X_ = I(_S_) @
J(_T_,1,1);
END;
_R_ = NCOL(_X_); /* No. predictor variables in X */
IF _R_ > _ST_ | NROW(_X_) ^=
_ST_ THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The design matrix _X_ is not of the',
'correct dimension. It should be an (STxR) matrix.';
END;
IF _TIMES_ = 0
THEN DO; /* Default value */
IF
_PRINT_ >= 1 THEN
PRINT 'CHECKING: The _TIMES_ vector is not specified',
'Equally-spaced time points are assumed';
_TIMES_ = (1:_T_); /* Equally spaced intervals */
END;
IF NROW(_TIMES_) > 1 | NCOL(_TIMES_) ^= _T_ THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _TIMES_ vector is not of the',
'correct dimension. It should be a (1xT) vector.';
END;
IF ALL(_PI_=1) THEN _PI_ = J(1,_S_,1);
ELSE DO;
IF NROW(_PI_) > 1 | NCOL(_PI_) ^= _S_ THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _PI_ vector is not of the',
'correct dimension. It should be a (1xS) vector.';
END;
IF
ANY(_PI_ <= 0)
THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _PI_ vector contains invalid',
'values. It should consist of strictly positive values.';
END;
END;
IF ANY(_TAU_^= 1) THEN DO;
IF
NROW(_TAU_) ^= _S_
| NCOL(_TAU_) ^= _T_ THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _TAU_ vector is not of the',
'correct dimension. It should be an (SxT) matrix.';
END;
IF
ANY(_TAU_ < 0)
| ANY(_TAU_ > 1) THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _TAU_ vector contains invalid values.',
'0 <= _TAU_ <= 1 are the only legitimate values.';
END;
_CHECK_ = _TAU_[,+];
IF
ANY(_CHECK_ ^= 1)
THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _TAU_ vector contains invalid values.',
'Within each row, the probabilities must sum to 1',
'across the columns.';
END; FREE _CHECK_;
END;
IF (NROW(_RHO_)#NCOL(_RHO_)) > 1 THEN DO;
PRINT ,'CHECKING: The _RHO_ parameter should be a scalar.',
'Only the (1,1) element will
be used.';
_RHO_ = _RHO_[1,1];
END;
IF (_RHO_ < 0 | _RHO_ >= 1) THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the error
flag */
PRINT ,'CHECKING: The _RHO_ vector takes on inadmissible',
'values. 0 <= _RHO_ < 1 are the only appropriate
values';
END;
IF (NROW(_PSI_)#NCOL(_PSI_)) > 1 THEN DO;
PRINT ,'CHECKING: The _PSI_ parameter should be a scalar.',
'Only the (1,1) element will
be used.';
_PSI_ = _PSI_[1,1];
END;
IF _PSI_ < 0
THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _PSI_ vector takes on inadmissible',
'values. _PSI_ >= 0 are the only appropriate
values';
END;
IF _PHI_ < 0
THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The _PHI_ vector takes on inadmissible',
'values. _PHI_ >= 0 are the only appropriate
values';
END;
IF _H_=0 THEN DO;
IF
_PRINT_ >= 1 THEN
PRINT 'CHECKING: An hypothesis
matrix _H_ has not been',
'specified. The default value will be used instead';
_H_ = J((_S_-1),1,1)
|| -I(_S_-1);
END;
_HH_ = NROW(_H_);
IF _HH_ > _R_ | NCOL(_H_) ^=
_R_ THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The hypothesis matrix _H_ is not of the',
'correct dimension. It should be an (_HH_x_R_) matrix';
END;
IF _h0_=0 THEN DO;
IF
_PRINT_ >= 1 THEN
PRINT 'CHECKING: The hypothesis matrix _H0_ has not been',
'specified. The default value will be used instead';
_H0_ = J(_HH_,1,0);
END;
IF NROW(_h0_) ^= _HH_ |
NCOL(_h0_) > 1 THEN DO;
E_FLAG = E_FLAG + 1; /* Trip the
error flag */
PRINT ,'CHECKING: The hypothesis matrix _h0_ is not of the',
'correct dimension. It should be an (_HH_x1) matrix';
END;
IF E_FLAG >= 1
THEN DO;
PRINT ,'Because of errors in the input matrices, GEESIZE is',
'now terminating. Consult the output for the details.';
PRINT E_FLAG ' error(s) have been detected';
STOP;
END;
FINISH; /* End of the CHECKING Subroutine */
*----------------------------- PRINTBAK
----------------------------;
START PRINTBAK;
/* Print back the model
parameters with the appropriate details */
_STATS_ = _S_ || _T_;
_CLNAME_ = {'No. SubPopns' 'No. Times'};
PRINT ,'Basic model parameters:';
PRINT _STATS_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_
FORMAT=8.0];
IF _PRINT_ >= 2
THEN DO;
_STATS_ = _TYPE_I || _TYPE_II;
_CLNAME_ = { 'Type I' 'Type
II'};
PRINT ,'Type I and type II error rates';
PRINT _STATS_ [ROWNAME=_BLANK_
COLNAME=_CLNAME_ FORMAT=8.2];
PRINT ,'The time points of the repeated measures';
PRINT _TIMES_ [ROWNAME=_BLANK_
COLNAME=_TNAME_ FORMAT=8.0];
END;
PRINT ,'The set of expected values by subpopulation over
time:';
PRINT _MU_ [ROWNAME=_SNAME_
COLNAME=_TNAME_ FORMAT=12.4];
IF _VARI_='GAUSSIAN' THEN DO;
PRINT ,
'The set of standard
deviations by subpopulation over time:';
PRINT _STD_ [ROWNAME=_SNAME_
COLNAME=_TNAME_ FORMAT=12.4];
END;
IF ANY(_PI_ ^= 1) THEN DO;
PRINT ,'Relative sample sizes in the different
subpopulations';
PRINT _PI_ [ROWNAME=_BLANK_
COLNAME=_SNAME_ FORMAT=8.2];
END;
IF ANY(_TAU_ ^= 1) THEN DO;
PRINT ,'The marginal probabilities in the censored sample
problem';
PRINT _TAU_ [ROWNAME=_SNAME_
COLNAME=_TNAME_ FORMAT=8.4];
END;
PRINT 'The link function and variance parameters:';
_STATS_ = _LINK_ || _VARI_;
_CLNAME_ = {'Link' 'Variance'};
PRINT _STATS_ [ROWNAME=_BLANK_
COLNAME=_CLNAME_];
FREE _STATS_;
PRINT 'The covariance parameters';
_STATS_ = _RHO_ || _PSI_ || _PHI_;
_CLNAME_ = { '
PRINT _STATS_ [ROWNAME=_BLANK_
COLNAME=_CLNAME_];
FREE _STATS_;
IF _PRINT_ >= 3
THEN DO;
PRINT 'The design matrix _X_ for
the GEE model';
PRINT _X_ [ROWNAME=_BLANK_ COLNAME=_BLANK_
FORMAT=10.4];
PRINT 'The hypothesis matrix _H_ of contrasts on BETA';
PRINT _H_ [ROWNAME=_BLANK_ COLNAME=_BLANK_
FORMAT=10.4];
IF ANY(_h0_^=0) THEN DO;
PRINT
'The conformable vector
of contant terms';
PRINT
_h0_ [ROWNAME=_BLANK_ COLNAME=_BLANK_ FORMAT =10.4];
END;
END;
FINISH; /* Of the PRINTBAK subroutine */
*----------------------------- PRELIMS -----------------------------;
START PRELIMS;
/* Construct the linear
predictor _ETA_ and the vector of
partial derivatives _DERIV_ according to the
link specified. */
IF _LINK_ = 'IDENTITY' THEN
DO; /* Identity */
_ETA_ = _MU_;
_DERIV_ = J(_S_,_T_,1); END;
ELSE IF _LINK_ = 'LOGIT'
THEN DO; /* Logit */
_ETA_ = LOG(_MU_/(1-_MU_));
_DERIV_ = _MU_ # (1-_MU_); END;
ELSE IF _LINK_ = 'CLOGLOG' THEN DO; /* C log-log */
_ETA_ = LOG(-LOG(1-_MU_));
_DERIV_ = EXP(_ETA_)#EXP(-EXP(_ETA_)); END;
ELSE IF _LINK_ = 'LOG' THEN DO;
/* Log link */
_ETA_ = LOG(_MU_);
_DERIV_ = _MU_; END;
ELSE IF _LINK_ = 'RECIP' THEN DO; /* Reciprocal*/
_ETA_ = 1
/ (_MU_);
_DERIV_ = -_MU_##2; END;
/* Next, compute the standard deviation _STD_ according to the
underlying stochastic model. */
IF
_VARI_ = 'BERNOULLI'
THEN _STD_ =
SQRT(_MU_#(1-_MU_));
ELSE IF _VARI_ = 'POISSON' THEN _STD_ = SQRT(_MU_);
ELSE IF _VARI_ = 'GAMMA' THEN _STD_ = _MU_;
/* compute the
correlation structure according to the values
of
_CORR_ = (1-_RHO_) # I(_T_)
+ J(_T_,_T_,_RHO_);
IF _PSI_ > 0
THEN DO; /* Not Exchangeable */
_DELTA_ = ABS(
(J(_T_,1,1)
@ _TIMES_) -
(_TIMES_` @ J(1,_T_,1) ) ); /* Time differences */
_CORR_ = _CORR_##(_DELTA_##_PSI_); /* Expon. damping */
FREE
_DELTA_;
END;
_EIG_ = EIGVAL(_CORR_);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'PRELIMS: This combination of
'resulted
in a correlation matrix which is not positive-definite',
'GEESIZE is now ending
...';
STOP;
END;
FREE _EIG_;
IF _PRINT_ >= 2
THEN DO;
PRINT ,'The correlation matrix among the repeated measures';
PRINT _CORR_ [ROWNAME=_TNAME_
COLNAME=_TNAME_ FORMAT=12.4];
END;
FINISH; /* Of the PRELIMS subroutine */
*-----------------------------
B_HAT -----------------------------;
START
B_HAT;
/* Begin, by computing the inverse of
the correlation matrix.
We must distinguish between the case where there is complete
follow up vs. that with censoring due to
staggered entry and
loss to follow-up. */
IF ALL(_TAU_= 1) THEN /* Complete FU
*/
_RINV_ = INV(_CORR_);
ELSE DO;
_RV_ = _CORR_;
_IV_ = I(nrow(_CORR_));
DO
_TP_ = 1 TO _T_; /* SWEEP _CORR_ for */
_RV_ = SWEEP(_RV_,_TP_); /* each time point. */
IF
_TP_ = 1 THEN DO;
_RINV_
= _RV_; /* Stack on top of */
_IINV_
= _IV_;
_RX_ = _CORR_;
END;
ELSE DO;
_RINV_
= _RINV_ // _RV_; /* each other. */
_IINV_
= _IINV_ // _IV_;
_RX_ = _RX_ // _CORR_;
END;
END; /* Of _TP_
loop */
END; /* Of ELSE loop */
/* Now, compute the GEE estimator
anticipated at the end
of the study.
Use (8.11) in McCullagh & Nelder (1983). */
_PRE_ = J(_R_,_R_,0); _POST_ = J(_R_,1,0);
_PREN_ = J(_R_,_R_,0);
_PREN2_= J(_R_,_R_,0);
_POSTN_ = J(_R_,1,0);
/* NEW for
differentiation between RW and R0 */
_START_ = 0; _STOP_ = 0;
DO SUBPOP = 1 TO _S_; /* Process ea. subpopulation in turn
*/
/* First, read the
appropriate submatrices of these quantities
*/
_START_ = _STOP_ + 1;
_STOP_ = _STOP_ + _T_;
_XS_ = _X_[_START_:_STOP_,]; /* Xs
*/
_WS_ = _DERIV_[SUBPOP,] / _STD_[SUBPOP,]; /* (dMU/dETA)(Ai^-1/2) */
_ES_ = _ETA_[SUBPOP,]; /* Deviations, Rs
*/
_ES_ = _ES_`;
/* (Tx1) vector */
/* Distinguish
between complete vs censored follow-up */
IF ALL(_TAU_= 1) THEN DO; /* Complete follow-up */
_RINV_ = INV(_CORR_);
_R0_ = _CORR_;
_RW_
= I(nrow(_CORR_)); /* NEW: Differentiation between RW and R0 */
_WSX_
= _RINV_ # (_WS_`*_WS_); /* (dMU/dETA)(A^-1/2)R_inv(A^-1/2)(dMU/dETA)
*/
_WSO_ = _RW_ # (_WS_`*_WS_); /* New:
(dMU/dETA)(A^-1/2)I(A^-1/2)(dMU/dETA) */
_WSI_
= _R0_ # (_WS_`*_WS_); /* New: (dMU/dETA)(A^-1/2)R0(A^-1/2)(dMU/dETA)
*/
_PRE_ = _PRE_ + _PI_[1,SUBPOP] #
(_XS_` * _WSX_ *
_XS_); /* omega_s Xs' Ws Xs
*/
_PREN_
= _PREN_ + _PI_[1,SUBPOP]
#
(_XS_` * _WSO_ *
_XS_); /* Same with working
independence */
_PREN2_
= _PREN2_ + _PI_[1,SUBPOP]
#
(_XS_` * _WSI_ *
_XS_);
_POST_
= _POST_ + _PI_[1,SUBPOP] #
(_XS_` * _WSX_ *
_ES_); /* omega_s Xs' Ws ETAs */
_POSTN_
= _POSTN_ + _PI_[1,SUBPOP] #
(_XS_` * _WSO_ *
_ES_); /* Same with working
independence */
END; /* Of complete follow-up loop
*/
ELSE DO; /* Censoring */
_STR_ = 0; _STP_ = 0; /* Initialize */
_PR_ = J(_R_,_R_,0); _PST_ = J(_R_,1,0);
_PRN_ = J(_R_,_R_,0); _PRN2_ = J(_R_,_R_,0);
_PSTN_ = J(_R_,1,0); /* NEW for differentiation between RW and R0 */
/* Now, we process each
time point in turn. */
DO
_TP_ = 1 TO _T_;
_STR_ = _STP_ + 1;
_STP_ =
_STP_ + _T_;
_RT_INV = _RINV_[_STR_:_STP_,]; /* Read Rt-inv */
_RT_INV
= _RT_INV[1:_TP_,1:_TP_];
_RWT_
= _IINV_[_STR_:_STP_,]; /* R0 and RW */
_RWT_
= _RWT_[1:_TP_,1:_TP_];
_R0T_
= _RX_[_STR_:_STP_,];
_R0T_
= _R0T_[1:_TP_,1:_TP_];
_XST_ = _XS_[1:_TP_,]; /* Xst */
_WST_ = _WS_[,1:_TP_];
_WIST_
= _R0T_ # (_WST_`*_WST_); /* W_outer for differentiation
between RW and R0 */
_WOST_
= _RWT_ # (_WST_`*_WST_); /* W_inner for differentiation
between RW and R0 */
_WXST_ = _RT_INV #
(_WST_`*_WST_);
_EST_ = _ES_[1:_TP_,];
_PR_ = _PR_ + _TAU_[SUBPOP,_TP_] #
(_XST_` * _WXST_ * _XST_); /* pi_st Xst' Wts Xst */
_PRN_ = _PRN_ + _TAU_[SUBPOP,_TP_]
# (_XST_` * _WOST_ * _XST_); /*
Same with working */
_PRN2_ = _PRN2_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WIST_ * _XST_); /* independence */
_PST_ = _PST_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WXST_
* _EST_); /* pi_st Xst' Wst ETAst
*/
_PSTN_ = _PSTN_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WOST_
* _EST_); /* pi_st Xst' Wst ETAst
*/
END; /* Of _TP_ loop */
FREE
_TP_ _STR_ _STP_ _RT_INV _XST_ _WXST_ _EST_ _R0T_ _RWT_ _WOST_ _WIST_;
_PRE_ = _PRE_ + _PI_[1,SUBPOP] # _PR_;
_POST_ = _POST_ + _PI_[1,SUBPOP] # _PST_;
_PREN_
= _PREN_ + _PI_[1,SUBPOP]
# _PRN_;
_PREN2_
= _PREN2_ + _PI_[1,SUBPOP]
# _PRN2_;
_POSTN_
= _POSTN_ + _PI_[1,SUBPOP]
# _PSTN_;
FREE _PR_ _PST_ _PRN_ _PRN2_ _PSTN_ ;
END; /* Of censored case loop */
END; /* Of SUBPOP loop */
FREE _START_ _STOP_ _XS_ _WS_ _ES_ _WSX_
_WSO_ _WSI_;
/* Now, invert the
(Xs'WsXs) matrix to generate BETA. This
is also contributes to the model based
covariance matrix. */
_EIG_ = EIGVAL(_PRE_);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'B_HAT: The quantity Ds`(Vs^-1)Ds is not full rank,',
'GEESIZE is now ending
...';
STOP;
END;
FREE
_EIG_;
/* Now, invert the
(Xs'WsXs) matrix to generate BETA. This
is also contributes to the model based
covariance matrix. */
_EIG_ = EIGVAL(_PREN_);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'B_HAT: The Fisher information matrix is not positive
definite,',
'GEESIZE is now ending ...';
STOP;
END; FREE
_EIG_;
_EIG_ = EIGVAL(_PREN2_);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'B_HAT: The OPG is not positive definite,',
'GEESIZE is now ending ...';
STOP;
END; FREE
_EIG_;
_EIG_ = EIGVAL(_PREN_ * inv(_PREN2_) * _PREN_);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'B_HAT: The robust variance matrix is not positive
definite,',
'GEESIZE is now ending ...';
STOP;
END; FREE
_EIG_;
_COV_B = SOLVE(_PRE_,I(_R_)); /* (Xs' Ws Xs)^-1
*/
_BETA_ = _COV_B * _POST_;
/* GEE Estimate of BETA
*/
_COV_B = _PHI_ # _COV_B;
/* Co (Ds'(Vs^-1)Ds)^1 */
_COV_BP = SOLVE(_PREN_,I(_R_));
_COV_BN = _COV_BP` * _PREN2_ * _COV_BP;
_BETAN_ = _COV_BP * _POSTN_;
_COV_BN = _PHI_ # _COV_BN;
FREE _PRE_ _PREN_ _PREN2_;
IF _PRINT_ >= 2
THEN DO;
PRINT ,'The GEE estimate of BETA anticipated at end of study';
_CLNAME_ = {'Estimate' };
PRINT 'Beta',
_BETA_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4];
PRINT 'Betan',
_BETAN_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4];
print 'COVB',_Cov_B;
print 'COVBN',_cov_BN;
END;
FINISH; /* Of the B_HAT subroutine */
*----------------------------- G_SIZE
----------------------------;
START G_SIZE;
/* This
module derives the values of the non-centrality parameter
and computes the corresponding sample
size. First, derive
the RHS in the expression for the
non-centrality parameter */
IF _PRINT_ >= 2
THEN DO;
PRINT ,'The hypothesis matrix _H_ of specific interest:';
PRINT _H_ [ROWNAME=_BLANK_ COLNAME=_BLANK_
FORMAT=12.4];
END;
_DF_HYP = NROW(_H_);
_XI_ = _H_*_BETA_ - _h0_;
_COV_XI = _H_*_COV_B*_H_`;
_XIN_ = _H_*_BETAN_ - _h0_;
_COV_XIN = _H_ * _COV_BN * _H_`;
/* Ensure
that the hypothesis matrix is full rank. */
_EIG_ = EIGVAL(_COV_XI);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'B_HAT: The hypothesis matrix H is not full rank,',
'resulting
in a non positive-definite covariance matrix',
'GEESIZE is now ending ...';
STOP;
END; FREE
_EIG_;
_EIG_ = EIGVAL(_COV_XIN);
IF ANY(_EIG_) <= 0 THEN DO;
PRINT ,'B_HAT: The hypothesis matrix H is not full rank,',
'resulting
in a non positive-definite robust covariance matrix',
'GEESIZE is now ending
...';
STOP;
END; FREE
_EIG_;
IF _PRINT_ >= 2
THEN DO;
PRINT ,'The estimate of the linear contrast H_BETA - h0';
_CLNAME_ = {'Estimate' };
PRINT
_XI_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4];
END;
IF _PRINT_ >= 2
THEN DO;
PRINT ,'The estimate of the linear contrast H_BETAN - h0';
_CLNAME_ = {'Estimate' };
PRINT _XIN_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_
FORMAT=12.4];
END;
/* Now, compute the
non-centrality parameter and generate n.
*/
_NONCEN_ = SOLVE(_COV_XI,I(_DF_HYP));
_NONCEN_ = _XI_`*_NONCEN_*_XI_;
FREE _XI_ _COV_XI;
_NONCENN_ = SOLVE(_COV_XIN,I(_DF_HYP));
_NONCENN_ = _XIN_`*_NONCENN_*_XIN_;
FREE _XIN_ _COV_XIN;
/* Now, find the critical
value and the tabulated non-cen value */
_CRIT_ = CINV((1-_TYPE_I),_DF_HYP); /* Critical value */
_NC_ = CNONCT(_CRIT_,_DF_HYP,_TYPE_II); /* Non-Cent parm */
_N_ = _NC_ / _NONCEN_; /* Unit size
*/
_N_ = CEIL(_N_); /* Round up */
SUB_SIZE = _N_ # _PI_; /* SubPop sizes
*/
SUB_SIZE = CEIL(SUB_SIZE); /* Rounded up
*/
_TOT_N = SUB_SIZE[,+]; /* Tot study size */
/*
Robust variance matrix */
_NN_ = _NC_ / _NONCENN_; /* Unit size
*/
_NN_ = CEIL(_NN_); /* Round up */
SUB_SIZEN = _NN_ # _PI_; /* SubPop sizes
*/
SUB_SIZEN = CEIL(SUB_SIZEN); /* Rounded up
*/
_TOT_NN = SUB_SIZEN[,+]; /* Tot study size */
/* Compute observed type II error
function at this sample size. */
_LAMBDA_ = _N_ # _NONCEN_;
/* Non-central parameter */
_BETA_N = PROBCHI(_CRIT_,_DF_HYP,_LAMBDA_);
FREE _NONCEN_ _LAMBDA_;
_STATS_ = _N_ || _TOT_N ||
_DF_HYP ||
_CRIT_ || _NC_ || _BETA_N;
/*
Robust variance matrix */
_LAMBDAN_ = _NN_ # _NONCENN_; /* Non-central parameter */
_BETA_NN = PROBCHI(_CRIT_,_DF_HYP,_LAMBDAN_);
FREE _NONCENN_ _LAMBDAN_;
_STATSN_ = _NN_ || _TOT_NN ||
_DF_HYP || _CRIT_ || _NC_
|| _BETA_NN;
/* Report the results of
the calculations as the user requests
*/
IF _PRINT_ > 0
THEN DO;
PRINT ,'The minimum required sample size at the specified',
'alpha and beta levels of
precision and associated statistics';
_CLNAME_ = { 'Unit Size' 'Total
N'
'Hyp df'
'Crit Val' 'NC Parm' 'Type II' };
PRINT _STATS_ [COLNAME=_CLNAME_
ROWNAME=_BLANK_ FORMAT=10.4];
PRINT
,'The minimum required
sample size at the specified',
'alpha
and beta levels of precision and associated statistics'
'with
independent working correlation';
_CLNAME_ = { 'Unit Size' 'Total
N'
'Hyp df'
'Crit Val' 'NC Parm' 'Type II' };
PRINT _STATSN_ [COLNAME=_CLNAME_
ROWNAME=_BLANK_ FORMAT=10.4];
IF ANY(_PI_ ^= 1) THEN DO;
PRINT ,'The sample sizes in the different subpopulations';
PRINT
SUB_SIZE [ROWNAME=_BLANK_ COLNAME=_SNAME_ FORMAT=8.0];
PRINT
SUB_SIZEN [ROWNAME=_BLANK_ COLNAME=_SNAME_ FORMAT=8.0];
END;
FREE _CLNAME_;
END;
_STATS_ = _RHO_ || _PSI_ || _PHI_ || _STATS_;
_STATSN_ = _RHO_ || _PSI_ || _PHI_ ||
_STATSN_;
IF _PRINT_ > 0
THEN RUN DIVIDER;
FINISH; /* Of the G_SIZE subroutine */
*----------------------------- GEESIZE
---------------------------;
START GEESIZE;
/* Check all the input
matrices and print any error messages. */
RUN CHECKING;
/* Print back the input
matrices as requested. */
IF _PRINT_ >= 1
THEN RUN PRINTBAK;
/* Perform
preliminary calculations and estimate of BETA. */
RUN PRELIMS;
RUN B_HAT;
/* Calculate
the required sample size and report the results. */
RUN G_SIZE;
FINISH; /* Of the GEESIZE subroutine */
OPTIONS
SOURCE;