*---------------------------- GEESIZE ------------------------------+ GEESIZE version 3.1 is a collection of PROC IML routines designed to compute the minimum sample sizes required in preparation for an applied research effort using correlated dependent data. It is based on the macro GEESIZE, version 2.1 by James Rochon, Ph.D., and has been extended by Dipl.-Stat. Gerlinde Dahmen to allow for an independent working correlation structure. For a detailed explanation see the documentation file 'geesize3_1.txt'. Disclaimer ---------- This software is provided by James Rochon, Ph.D., and Dipl.-Stat. Gerlinde Dahmen as a public service. It is provided 'as is'. There are no warranties, expressed or implied, as to merchantability or fitness for a particular purpose or regarding the accuracy of the materials or code contained herein. ************************************************************************ Date: 01/03/03 (Version 3.1) Author: James Rochon and Gerlinde Dahmen Institut fuer Medizinische Biometrie und Statistik Universitaet zu Luebeck, Deutschland Original version: Version 2.1 Date: 02/08/99 Author: James Rochon, Ph.D. Biostatistics Center The George Washington University ***********************************************************************; *----------------------------- GEESIZE ----------------------------| | The following program can be used to invoke the macro. | *--------------------------------------------------------------------| * * PROC IML; * * %INCLUDE 'C:\...\GeeSize.sas' / NOSOURCE2; * * RUN BEGIN; * _MU_ = 0; /* (SxT) matrix of expected values */ * _STD_ = 1; /* (SxT) standard deviations */ * _PI_ = 1; /* (1xS) relative sizes in sub pops */ * _TAU_ = 1; /* (SxT) prob of first t time points */ * _TIMES_ = 0; /* (1xT) vector of time points */ * _RHO_ = 0; /* (scalar) autocorrelation */ * _PSI_ = 1; /* (scalar) damping parameter */ * _PHI_ = 1; /* (scalar) scale parameter */ * _X_ = 0; /* (STxr) design matrix */ * _H_ = 0; /* (hxr) hypothesis matrix */ * _h0_ = 0; /* (hx1) vector of constant terms */ * _LINK_ = 'Identity'; /* (Scalar) link function {'Identity', * 'Log', 'Recip', 'Logit', 'CLoglog' */ * _VARI_ = 'Gaussian'; /* (Scalar) variance function * 'Gaussian', 'Poisson', 'Gamma' or * 'Bernoulli'. */ * * _TYPE_I = 0.05; /* Type I error rate */ * _TYPE_II = 0.20; /* Type II error rate */ * _PRINT_ = 1; /* Level of detail in printout */ * _FLAG_H = 0; /* (Scalar) header flag */ * * _TNAME_ = { 'Time 1' 'Time 2' 'Time 3' 'Time 4' 'Time 5' * 'Time 6' 'Time 7' 'Time 8' 'Time 9' 'Time 10' * 'Time 11' 'Time 12' 'Time 13' 'Time 14' 'Time 15' * 'Time 16' 'Time 17' 'Time 18' 'Time 19' 'Time 20' }; * * _SNAME_ = { 'SubPop 1' 'SubPop 2' 'SubPop 3' 'SubPop 4' 'SubPop 5' * 'SubPop 6' 'SubPop 7' 'SubPop 8' 'SubPop 9' 'SubPop 10' * 'SubPop 11' 'SubPop 12' 'SubPop 13' 'SubPop 14' 'SubPop 15' * 'SubPop 16' 'SubPop 17' 'SubPop 18' 'SubPop 19' 'SubPop 20' }; * * RUN GEESIZE; * * quit; *--------------------------------------------------------------------| | Output description | *--------------------------------------------------------------------| | | | _N_ ... (1x1) The minimum sample size in EACH subpopulation. | | | | _TOT_N ... (1x1) The minimum TOTAL sample size across all sub- | | subpopulations for the entire study. | | | | _DF_HYP ... (1x1) The hypothesis degrees of freedom for the Wald | | chi-square test statistic. | | | | _CRIT_ ... (1x1) The critical value for significance from the | | Wald chi-square test statistic. | | | | _NC_ ... (1x1) The value of the tablulated non-centrality | | parameter from the Wald chi-square test statistic. | | | | _BETA_N ... (1x1) The computed type-II error rate at the derived | | sample size from the Wald chi-square test statistic. | | | | _STATS_ ... (1x9) An output vector gathering the most important | | input and output parameters. It consists of: | | _RHO_, _PSI_, _PHI_, _N_, _TOT_N, _DF_HYP, _CRIT_, | | _NC_, and _BETA_N. | | | +--------------------------------------------------------------------; START BEGIN; /* Initialize some matrices at the beginning of the code */ _BLANK_ = ' '; /* (Scalar) blank character */ _MU_ = 0; /* (SxT) matrix of expected values */ _STD_ = 1; /* (SxT) standard deviations */ _PI_ = 1; /* (1xS) relative sizes in sub pops */ _TAU_ = 1; /* (SxT) prob of first t time points */ _TIMES_ = 0; /* (1xT) vector of time points */ _RHO_ = 0; /* (scalar) autocorrelation */ _PSI_ = 1; /* (scalar) damping parameter */ _PHI_ = 1; /* (scalar) scale parameter */ _X_ = 0; /* (STxr) design matrix */ _H_ = 0; /* (hxr) hypothesis matrix */ _h0_ = 0; /* (hx1) vector of constant terms */ _LINK_ = 'Identity'; /* (Scalar) link function */ _VARI_ = 'Gaussian'; /* (Scalar) variance function */ _TYPE_I = 0.05; /* Type I error rate */ _TYPE_II = 0.20; /* Type II error rate */ _PRINT_ = 1; /* Level of detail in printout */ _FLAG_H = 0; /* (Scalar) header flag */ _TNAME_ = { 'Time 1' 'Time 2' 'Time 3' 'Time 4' 'Time 5' 'Time 6' 'Time 7' 'Time 8' 'Time 9' 'Time 10' 'Time 11' 'Time 12' 'Time 13' 'Time 14' 'Time 15' 'Time 16' 'Time 17' 'Time 18' 'Time 19' 'Time 20' }; _SNAME_ = { 'SubPop 1' 'SubPop 2' 'SubPop 3' 'SubPop 4' 'SubPop 5' 'SubPop 6' 'SubPop 7' 'SubPop 8' 'SubPop 9' 'SubPop 10' 'SubPop 11' 'SubPop 12' 'SubPop 13' 'SubPop 14' 'SubPop 15' 'SubPop 16' 'SubPop 17' 'SubPop 18' 'SubPop 19' 'SubPop 20' }; RESET NONAME NOAUTONAME; /* Reset some options for IML */ FINISH; /* End of the BEGIN subroutine */ *---------------------------- HEADER ------------------------------; START HEADER; PRINT ,, '+--------------------------------------------------------+', '| |', '| G E E S I Z E |', '| |', '| Application of GEE Procedures to |', '| |', '| Sample Size Calculations for |', '| |', '| Repeated Measures Experiments |', '| |', '| |', '| V E R S I O N 3 . 1 |', '| |', '| Copyright (c) 1999 and 2003 |', '| |', '| By James Rochon, Ph.D., |', '| and Dipl.-Stat. Gerlinde Dahmen |', '| |', '+--------------------------------------------------------+'; _FLAG_H = 1; /* Trip the header flag */ FINISH; /* End of the HEADER subroutine */ *---------------------------- DIVIDER ------------------------------; START DIVIDER; PRINT ,, '= = = = = = = = = = = = = = = = = = = = = = ='; FINISH; /* End of the DIVIDER Subroutine */ *----------------------------- CHECKING ----------------------------; START CHECKING; /* Print out the header and make it look impressive! */ IF _FLAG_H = 0 THEN RUN HEADER; /* Some basic housekeeping operations. */ E_FLAG = 0; /* Error flag */ _S_ = NROW(_MU_); /* No. subpopulations */ _T_ = NCOL(_MU_); /* No. time points */ _ST_ = _S_#_T_; _LINK_ = UPCASE(COMPRESS(_LINK_)); IF (NROW(_LINK_)#NCOL(_LINK_)) > 1 THEN DO; PRINT ,'CHECKING: The _LINK_ function is not of the correct', 'dimension. Only the (1,1) element will be used.'; _LINK_ = _LINK_[1,1]; END; CHECK = _LINK_='IDENTITY' | _LINK_='LOGIT' | _LINK_='LOG' | _LINK_='RECIP' | _LINK_='CLOGLOG'; IF CHECK = 0 THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _LINK_ function is not', 'one of the appropriate types'; END; IF _MU_ = 0 THEN DO; E_FLAG = E_FLAG + 1; PRINT ,'CHECKING: The matrix of expected values _MU_', 'has not been specified'; END; IF _LINK_ = 'LOGIT' | _LINK_ = 'CLOGLOG' THEN DO; IF ANY(_MU_ <= 0) | ANY(_MU_ >= 1) THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _MU_ matrix takes on inadmissible', 'values. 0 < _MU_ < 1 are the only appropriate values'; END; END; ELSE IF _LINK_ = 'LOG' | _LINK_ = 'RECIP' THEN DO; IF ANY(_MU_) <= 0 THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _MU_ matrix takes on inadmissible', 'values. _MU_ > 0 are the only appropriate values'; END; END; IF (NROW(_PHI_)#NCOL(_PHI_)) > 1 THEN DO; PRINT ,'CHECKING: The _PHI_ parameter should be a scalar.', 'Only the (1,1) element will be used.'; _PHI_ = _PHI_[1,1]; END; _VARI_ = UPCASE(COMPRESS(_VARI_)); IF (NROW(_VARI_)#NCOL(_VARI_)) > 1 THEN DO; PRINT ,'CHECKING: The _VARI_ function is not of the', 'correct dimension. Only the (1,1) element will be used.'; _VARI_ = _VARI_[1,1]; END; CHECK = _VARI_='GAUSSIAN' | _VARI_='POISSON' | _VARI_='GAMMA' | _VARI_='BERNOULLI'; IF CHECK = 0 THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _VARI_ function is not', 'one of the appropriate types'; END; IF _VARI_='GAUSSIAN' THEN DO; IF ANY(_STD_ <= 0) THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: _STD_ contains invalid values.', '_STD_ > 0 are the only legitimate values.'; END; IF (NROW(_STD_)#NCOL(_STD_) = 1) THEN /* Convert scalar */ _STD_ = J(_S_,_T_,_STD_); /* to (SxT) matrix */ ELSE DO; IF NROW(_STD_) ^= _S_ | NCOL(_STD_) ^= _T_ THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: _STD_ is not of the correct dimension.', 'It should be a scalar or an (SxT) matrix.'; END; /* Of ELSE loop */ IF (ANY(_STD_ ^= 1) & _PHI_ ^= 1) THEN PRINT 'CHECKING: Note that you have specified values for', 'both variance parameters, _STD_ AND _PHI_. You may', 'want to use just one of these parameters'; END; /* Of IF(ANY ... loop */ END; /* Of IF _VARI_ loop */ IF _X_=0 THEN DO; IF _PRINT_ >= 1 THEN PRINT 'CHECKING: The design matrix _X_ has not been specified.', 'The one-way ANOVA between-subjects design is assumed'; _X_ = I(_S_) @ J(_T_,1,1); END; _R_ = NCOL(_X_); /* No. predictor variables in X */ IF _R_ > _ST_ | NROW(_X_) ^= _ST_ THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The design matrix _X_ is not of the', 'correct dimension. It should be an (STxR) matrix.'; END; IF _TIMES_ = 0 THEN DO; /* Default value */ IF _PRINT_ >= 1 THEN PRINT 'CHECKING: The _TIMES_ vector is not specified', 'Equally-spaced time points are assumed'; _TIMES_ = (1:_T_); /* Equally spaced intervals */ END; IF NROW(_TIMES_) > 1 | NCOL(_TIMES_) ^= _T_ THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _TIMES_ vector is not of the', 'correct dimension. It should be a (1xT) vector.'; END; IF ALL(_PI_=1) THEN _PI_ = J(1,_S_,1); ELSE DO; IF NROW(_PI_) > 1 | NCOL(_PI_) ^= _S_ THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _PI_ vector is not of the', 'correct dimension. It should be a (1xS) vector.'; END; IF ANY(_PI_ <= 0) THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _PI_ vector contains invalid', 'values. It should consist of strictly positive values.'; END; END; IF ANY(_TAU_^= 1) THEN DO; IF NROW(_TAU_) ^= _S_ | NCOL(_TAU_) ^= _T_ THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _TAU_ vector is not of the', 'correct dimension. It should be an (SxT) matrix.'; END; IF ANY(_TAU_ < 0) | ANY(_TAU_ > 1) THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _TAU_ vector contains invalid values.', '0 <= _TAU_ <= 1 are the only legitimate values.'; END; _CHECK_ = _TAU_[,+]; IF ANY(_CHECK_ ^= 1) THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _TAU_ vector contains invalid values.', 'Within each row, the probabilities must sum to 1', 'across the columns.'; END; FREE _CHECK_; END; IF (NROW(_RHO_)#NCOL(_RHO_)) > 1 THEN DO; PRINT ,'CHECKING: The _RHO_ parameter should be a scalar.', 'Only the (1,1) element will be used.'; _RHO_ = _RHO_[1,1]; END; IF (_RHO_ < 0 | _RHO_ >= 1) THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _RHO_ vector takes on inadmissible', 'values. 0 <= _RHO_ < 1 are the only appropriate values'; END; IF (NROW(_PSI_)#NCOL(_PSI_)) > 1 THEN DO; PRINT ,'CHECKING: The _PSI_ parameter should be a scalar.', 'Only the (1,1) element will be used.'; _PSI_ = _PSI_[1,1]; END; IF _PSI_ < 0 THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _PSI_ vector takes on inadmissible', 'values. _PSI_ >= 0 are the only appropriate values'; END; IF _PHI_ < 0 THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The _PHI_ vector takes on inadmissible', 'values. _PHI_ >= 0 are the only appropriate values'; END; IF _H_=0 THEN DO; IF _PRINT_ >= 1 THEN PRINT 'CHECKING: An hypothesis matrix _H_ has not been', 'specified. The default value will be used instead'; _H_ = J((_S_-1),1,1) || -I(_S_-1); END; _HH_ = NROW(_H_); IF _HH_ > _R_ | NCOL(_H_) ^= _R_ THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The hypothesis matrix _H_ is not of the', 'correct dimension. It should be an (_HH_x_R_) matrix'; END; IF _h0_=0 THEN DO; IF _PRINT_ >= 1 THEN PRINT 'CHECKING: The hypothesis matrix _H0_ has not been', 'specified. The default value will be used instead'; _H0_ = J(_HH_,1,0); END; IF NROW(_h0_) ^= _HH_ | NCOL(_h0_) > 1 THEN DO; E_FLAG = E_FLAG + 1; /* Trip the error flag */ PRINT ,'CHECKING: The hypothesis matrix _h0_ is not of the', 'correct dimension. It should be an (_HH_x1) matrix'; END; IF E_FLAG >= 1 THEN DO; PRINT ,'Because of errors in the input matrices, GEESIZE is', 'now terminating. Consult the output for the details.'; PRINT E_FLAG ' error(s) have been detected'; STOP; END; FINISH; /* End of the CHECKING Subroutine */ *----------------------------- PRINTBAK ----------------------------; START PRINTBAK; /* Print back the model parameters with the appropriate details */ _STATS_ = _S_ || _T_; _CLNAME_ = {'No. SubPopns' 'No. Times'}; PRINT ,'Basic model parameters:'; PRINT _STATS_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=8.0]; IF _PRINT_ >= 2 THEN DO; _STATS_ = _TYPE_I || _TYPE_II; _CLNAME_ = { 'Type I' 'Type II'}; PRINT ,'Type I and type II error rates'; PRINT _STATS_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=8.2]; PRINT ,'The time points of the repeated measures'; PRINT _TIMES_ [ROWNAME=_BLANK_ COLNAME=_TNAME_ FORMAT=8.0]; END; PRINT ,'The set of expected values by subpopulation over time:'; PRINT _MU_ [ROWNAME=_SNAME_ COLNAME=_TNAME_ FORMAT=12.4]; IF _VARI_='GAUSSIAN' THEN DO; PRINT , 'The set of standard deviations by subpopulation over time:'; PRINT _STD_ [ROWNAME=_SNAME_ COLNAME=_TNAME_ FORMAT=12.4]; END; IF ANY(_PI_ ^= 1) THEN DO; PRINT ,'Relative sample sizes in the different subpopulations'; PRINT _PI_ [ROWNAME=_BLANK_ COLNAME=_SNAME_ FORMAT=8.2]; END; IF ANY(_TAU_ ^= 1) THEN DO; PRINT ,'The marginal probabilities in the censored sample problem'; PRINT _TAU_ [ROWNAME=_SNAME_ COLNAME=_TNAME_ FORMAT=8.4]; END; PRINT 'The link function and variance parameters:'; _STATS_ = _LINK_ || _VARI_; _CLNAME_ = {'Link' 'Variance'}; PRINT _STATS_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_]; FREE _STATS_; PRINT 'The covariance parameters'; _STATS_ = _RHO_ || _PSI_ || _PHI_; _CLNAME_ = { 'Rho' 'Psi' 'Phi'}; PRINT _STATS_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_]; FREE _STATS_; IF _PRINT_ >= 3 THEN DO; PRINT 'The design matrix _X_ for the GEE model'; PRINT _X_ [ROWNAME=_BLANK_ COLNAME=_BLANK_ FORMAT=10.4]; PRINT 'The hypothesis matrix _H_ of contrasts on BETA'; PRINT _H_ [ROWNAME=_BLANK_ COLNAME=_BLANK_ FORMAT=10.4]; IF ANY(_h0_^=0) THEN DO; PRINT 'The conformable vector of contant terms'; PRINT _h0_ [ROWNAME=_BLANK_ COLNAME=_BLANK_ FORMAT =10.4]; END; END; FINISH; /* Of the PRINTBAK subroutine */ *----------------------------- PRELIMS -----------------------------; START PRELIMS; /* Construct the linear predictor _ETA_ and the vector of partial derivatives _DERIV_ according to the link specified. */ IF _LINK_ = 'IDENTITY' THEN DO; /* Identity */ _ETA_ = _MU_; _DERIV_ = J(_S_,_T_,1); END; ELSE IF _LINK_ = 'LOGIT' THEN DO; /* Logit */ _ETA_ = LOG(_MU_/(1-_MU_)); _DERIV_ = _MU_ # (1-_MU_); END; ELSE IF _LINK_ = 'CLOGLOG' THEN DO; /* C log-log */ _ETA_ = LOG(-LOG(1-_MU_)); _DERIV_ = EXP(_ETA_)#EXP(-EXP(_ETA_)); END; ELSE IF _LINK_ = 'LOG' THEN DO; /* Log link */ _ETA_ = LOG(_MU_); _DERIV_ = _MU_; END; ELSE IF _LINK_ = 'RECIP' THEN DO; /* Reciprocal*/ _ETA_ = 1 / (_MU_); _DERIV_ = -_MU_##2; END; /* Next, compute the standard deviation _STD_ according to the underlying stochastic model. */ IF _VARI_ = 'BERNOULLI' THEN _STD_ = SQRT(_MU_#(1-_MU_)); ELSE IF _VARI_ = 'POISSON' THEN _STD_ = SQRT(_MU_); ELSE IF _VARI_ = 'GAMMA' THEN _STD_ = _MU_; /* compute the correlation structure according to the values of RHO and PSI, and ensure that it is positive-definite. */ _CORR_ = (1-_RHO_) # I(_T_) + J(_T_,_T_,_RHO_); IF _PSI_ > 0 THEN DO; /* Not Exchangeable */ _DELTA_ = ABS( (J(_T_,1,1) @ _TIMES_) - (_TIMES_` @ J(1,_T_,1) ) ); /* Time differences */ _CORR_ = _CORR_##(_DELTA_##_PSI_); /* Expon. damping */ FREE _DELTA_; END; _EIG_ = EIGVAL(_CORR_); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'PRELIMS: This combination of RHO and PSI has', 'resulted in a correlation matrix which is not positive-definite', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; IF _PRINT_ >= 2 THEN DO; PRINT ,'The correlation matrix among the repeated measures'; PRINT _CORR_ [ROWNAME=_TNAME_ COLNAME=_TNAME_ FORMAT=12.4]; END; FINISH; /* Of the PRELIMS subroutine */ *----------------------------- B_HAT -----------------------------; START B_HAT; /* Begin, by computing the inverse of the correlation matrix. We must distinguish between the case where there is complete follow up vs. that with censoring due to staggered entry and loss to follow-up. */ IF ALL(_TAU_= 1) THEN /* Complete FU */ _RINV_ = INV(_CORR_); ELSE DO; _RV_ = _CORR_; _IV_ = I(nrow(_CORR_)); DO _TP_ = 1 TO _T_; /* SWEEP _CORR_ for */ _RV_ = SWEEP(_RV_,_TP_); /* each time point. */ IF _TP_ = 1 THEN DO; _RINV_ = _RV_; /* Stack on top of */ _IINV_ = _IV_; _RX_ = _CORR_; END; ELSE DO; _RINV_ = _RINV_ // _RV_; /* each other. */ _IINV_ = _IINV_ // _IV_; _RX_ = _RX_ // _CORR_; END; END; /* Of _TP_ loop */ END; /* Of ELSE loop */ /* Now, compute the GEE estimator anticipated at the end of the study. Use (8.11) in McCullagh & Nelder (1983). */ _PRE_ = J(_R_,_R_,0); _POST_ = J(_R_,1,0); _PREN_ = J(_R_,_R_,0); _PREN2_= J(_R_,_R_,0); _POSTN_ = J(_R_,1,0); /* NEW for differentiation between RW and R0 */ _START_ = 0; _STOP_ = 0; DO SUBPOP = 1 TO _S_; /* Process ea. subpopulation in turn */ /* First, read the appropriate submatrices of these quantities */ _START_ = _STOP_ + 1; _STOP_ = _STOP_ + _T_; _XS_ = _X_[_START_:_STOP_,]; /* Xs */ _WS_ = _DERIV_[SUBPOP,] / _STD_[SUBPOP,]; /* (dMU/dETA)(Ai^-1/2) */ _ES_ = _ETA_[SUBPOP,]; /* Deviations, Rs */ _ES_ = _ES_`; /* (Tx1) vector */ /* Distinguish between complete vs censored follow-up */ IF ALL(_TAU_= 1) THEN DO; /* Complete follow-up */ _RINV_ = INV(_CORR_); _R0_ = _CORR_; _RW_ = I(nrow(_CORR_)); /* NEW: Differentiation between RW and R0 */ _WSX_ = _RINV_ # (_WS_`*_WS_); /* (dMU/dETA)(A^-1/2)R_inv(A^-1/2)(dMU/dETA) */ _WSO_ = _RW_ # (_WS_`*_WS_); /* New: (dMU/dETA)(A^-1/2)I(A^-1/2)(dMU/dETA) */ _WSI_ = _R0_ # (_WS_`*_WS_); /* New: (dMU/dETA)(A^-1/2)R0(A^-1/2)(dMU/dETA) */ _PRE_ = _PRE_ + _PI_[1,SUBPOP] # (_XS_` * _WSX_ * _XS_); /* omega_s Xs' Ws Xs */ _PREN_ = _PREN_ + _PI_[1,SUBPOP] # (_XS_` * _WSO_ * _XS_); /* Same with working independence */ _PREN2_ = _PREN2_ + _PI_[1,SUBPOP] # (_XS_` * _WSI_ * _XS_); _POST_ = _POST_ + _PI_[1,SUBPOP] # (_XS_` * _WSX_ * _ES_); /* omega_s Xs' Ws ETAs */ _POSTN_ = _POSTN_ + _PI_[1,SUBPOP] # (_XS_` * _WSO_ * _ES_); /* Same with working independence */ END; /* Of complete follow-up loop */ ELSE DO; /* Censoring */ _STR_ = 0; _STP_ = 0; /* Initialize */ _PR_ = J(_R_,_R_,0); _PST_ = J(_R_,1,0); _PRN_ = J(_R_,_R_,0); _PRN2_ = J(_R_,_R_,0); _PSTN_ = J(_R_,1,0); /* NEW for differentiation between RW and R0 */ /* Now, we process each time point in turn. */ DO _TP_ = 1 TO _T_; _STR_ = _STP_ + 1; _STP_ = _STP_ + _T_; _RT_INV = _RINV_[_STR_:_STP_,]; /* Read Rt-inv */ _RT_INV = _RT_INV[1:_TP_,1:_TP_]; _RWT_ = _IINV_[_STR_:_STP_,]; /* R0 and RW */ _RWT_ = _RWT_[1:_TP_,1:_TP_]; _R0T_ = _RX_[_STR_:_STP_,]; _R0T_ = _R0T_[1:_TP_,1:_TP_]; _XST_ = _XS_[1:_TP_,]; /* Xst */ _WST_ = _WS_[,1:_TP_]; _WIST_ = _R0T_ # (_WST_`*_WST_); /* W_outer for differentiation between RW and R0 */ _WOST_ = _RWT_ # (_WST_`*_WST_); /* W_inner for differentiation between RW and R0 */ _WXST_ = _RT_INV # (_WST_`*_WST_); _EST_ = _ES_[1:_TP_,]; _PR_ = _PR_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WXST_ * _XST_); /* pi_st Xst' Wts Xst */ _PRN_ = _PRN_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WOST_ * _XST_); /* Same with working */ _PRN2_ = _PRN2_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WIST_ * _XST_); /* independence */ _PST_ = _PST_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WXST_ * _EST_); /* pi_st Xst' Wst ETAst */ _PSTN_ = _PSTN_ + _TAU_[SUBPOP,_TP_] # (_XST_` * _WOST_ * _EST_); /* pi_st Xst' Wst ETAst */ END; /* Of _TP_ loop */ FREE _TP_ _STR_ _STP_ _RT_INV _XST_ _WXST_ _EST_ _R0T_ _RWT_ _WOST_ _WIST_; _PRE_ = _PRE_ + _PI_[1,SUBPOP] # _PR_; _POST_ = _POST_ + _PI_[1,SUBPOP] # _PST_; _PREN_ = _PREN_ + _PI_[1,SUBPOP] # _PRN_; _PREN2_ = _PREN2_ + _PI_[1,SUBPOP] # _PRN2_; _POSTN_ = _POSTN_ + _PI_[1,SUBPOP] # _PSTN_; FREE _PR_ _PST_ _PRN_ _PRN2_ _PSTN_ ; END; /* Of censored case loop */ END; /* Of SUBPOP loop */ FREE _START_ _STOP_ _XS_ _WS_ _ES_ _WSX_ _WSO_ _WSI_; /* Now, invert the (Xs'WsXs) matrix to generate BETA. This is also contributes to the model based covariance matrix. */ _EIG_ = EIGVAL(_PRE_); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'B_HAT: The quantity Ds`(Vs^-1)Ds is not full rank,', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; /* Now, invert the (Xs'WsXs) matrix to generate BETA. This is also contributes to the model based covariance matrix. */ _EIG_ = EIGVAL(_PREN_); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'B_HAT: The Fisher information matrix is not positive definite,', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; _EIG_ = EIGVAL(_PREN2_); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'B_HAT: The OPG is not positive definite,', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; _EIG_ = EIGVAL(_PREN_ * inv(_PREN2_) * _PREN_); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'B_HAT: The robust variance matrix is not positive definite,', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; _COV_B = SOLVE(_PRE_,I(_R_)); /* (Xs' Ws Xs)^-1 */ _BETA_ = _COV_B * _POST_; /* GEE Estimate of BETA */ _COV_B = _PHI_ # _COV_B; /* Co (Ds'(Vs^-1)Ds)^1 */ _COV_BP = SOLVE(_PREN_,I(_R_)); _COV_BN = _COV_BP` * _PREN2_ * _COV_BP; _BETAN_ = _COV_BP * _POSTN_; _COV_BN = _PHI_ # _COV_BN; FREE _PRE_ _PREN_ _PREN2_; IF _PRINT_ >= 2 THEN DO; PRINT ,'The GEE estimate of BETA anticipated at end of study'; _CLNAME_ = {'Estimate' }; PRINT 'Beta', _BETA_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4]; PRINT 'Betan', _BETAN_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4]; print 'COVB',_Cov_B; print 'COVBN',_cov_BN; END; FINISH; /* Of the B_HAT subroutine */ *----------------------------- G_SIZE ----------------------------; START G_SIZE; /* This module derives the values of the non-centrality parameter and computes the corresponding sample size. First, derive the RHS in the expression for the non-centrality parameter */ IF _PRINT_ >= 2 THEN DO; PRINT ,'The hypothesis matrix _H_ of specific interest:'; PRINT _H_ [ROWNAME=_BLANK_ COLNAME=_BLANK_ FORMAT=12.4]; END; _DF_HYP = NROW(_H_); _XI_ = _H_*_BETA_ - _h0_; _COV_XI = _H_*_COV_B*_H_`; _XIN_ = _H_*_BETAN_ - _h0_; _COV_XIN = _H_ * _COV_BN * _H_`; /* Ensure that the hypothesis matrix is full rank. */ _EIG_ = EIGVAL(_COV_XI); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'B_HAT: The hypothesis matrix H is not full rank,', 'resulting in a non positive-definite covariance matrix', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; _EIG_ = EIGVAL(_COV_XIN); IF ANY(_EIG_) <= 0 THEN DO; PRINT ,'B_HAT: The hypothesis matrix H is not full rank,', 'resulting in a non positive-definite robust covariance matrix', 'GEESIZE is now ending ...'; STOP; END; FREE _EIG_; IF _PRINT_ >= 2 THEN DO; PRINT ,'The estimate of the linear contrast H_BETA - h0'; _CLNAME_ = {'Estimate' }; PRINT _XI_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4]; END; IF _PRINT_ >= 2 THEN DO; PRINT ,'The estimate of the linear contrast H_BETAN - h0'; _CLNAME_ = {'Estimate' }; PRINT _XIN_ [ROWNAME=_BLANK_ COLNAME=_CLNAME_ FORMAT=12.4]; END; /* Now, compute the non-centrality parameter and generate n. */ _NONCEN_ = SOLVE(_COV_XI,I(_DF_HYP)); _NONCEN_ = _XI_`*_NONCEN_*_XI_; FREE _XI_ _COV_XI; _NONCENN_ = SOLVE(_COV_XIN,I(_DF_HYP)); _NONCENN_ = _XIN_`*_NONCENN_*_XIN_; FREE _XIN_ _COV_XIN; /* Now, find the critical value and the tabulated non-cen value */ _CRIT_ = CINV((1-_TYPE_I),_DF_HYP); /* Critical value */ _NC_ = CNONCT(_CRIT_,_DF_HYP,_TYPE_II); /* Non-Cent parm */ _N_ = _NC_ / _NONCEN_; /* Unit size */ _N_ = CEIL(_N_); /* Round up */ SUB_SIZE = _N_ # _PI_; /* SubPop sizes */ SUB_SIZE = CEIL(SUB_SIZE); /* Rounded up */ _TOT_N = SUB_SIZE[,+]; /* Tot study size */ /* Robust variance matrix */ _NN_ = _NC_ / _NONCENN_; /* Unit size */ _NN_ = CEIL(_NN_); /* Round up */ SUB_SIZEN = _NN_ # _PI_; /* SubPop sizes */ SUB_SIZEN = CEIL(SUB_SIZEN); /* Rounded up */ _TOT_NN = SUB_SIZEN[,+]; /* Tot study size */ /* Compute observed type II error function at this sample size. */ _LAMBDA_ = _N_ # _NONCEN_; /* Non-central parameter */ _BETA_N = PROBCHI(_CRIT_,_DF_HYP,_LAMBDA_); FREE _NONCEN_ _LAMBDA_; _STATS_ = _N_ || _TOT_N || _DF_HYP || _CRIT_ || _NC_ || _BETA_N; /* Robust variance matrix */ _LAMBDAN_ = _NN_ # _NONCENN_; /* Non-central parameter */ _BETA_NN = PROBCHI(_CRIT_,_DF_HYP,_LAMBDAN_); FREE _NONCENN_ _LAMBDAN_; _STATSN_ = _NN_ || _TOT_NN || _DF_HYP || _CRIT_ || _NC_ || _BETA_NN; /* Report the results of the calculations as the user requests */ IF _PRINT_ > 0 THEN DO; PRINT ,'The minimum required sample size at the specified', 'alpha and beta levels of precision and associated statistics'; _CLNAME_ = { 'Unit Size' 'Total N' 'Hyp df' 'Crit Val' 'NC Parm' 'Type II' }; PRINT _STATS_ [COLNAME=_CLNAME_ ROWNAME=_BLANK_ FORMAT=10.4]; PRINT ,'The minimum required sample size at the specified', 'alpha and beta levels of precision and associated statistics' 'with independent working correlation'; _CLNAME_ = { 'Unit Size' 'Total N' 'Hyp df' 'Crit Val' 'NC Parm' 'Type II' }; PRINT _STATSN_ [COLNAME=_CLNAME_ ROWNAME=_BLANK_ FORMAT=10.4]; IF ANY(_PI_ ^= 1) THEN DO; PRINT ,'The sample sizes in the different subpopulations'; PRINT SUB_SIZE [ROWNAME=_BLANK_ COLNAME=_SNAME_ FORMAT=8.0]; PRINT SUB_SIZEN [ROWNAME=_BLANK_ COLNAME=_SNAME_ FORMAT=8.0]; END; FREE _CLNAME_; END; _STATS_ = _RHO_ || _PSI_ || _PHI_ || _STATS_; _STATSN_ = _RHO_ || _PSI_ || _PHI_ || _STATSN_; IF _PRINT_ > 0 THEN RUN DIVIDER; FINISH; /* Of the G_SIZE subroutine */ *----------------------------- GEESIZE ---------------------------; START GEESIZE; /* Check all the input matrices and print any error messages. */ RUN CHECKING; /* Print back the input matrices as requested. */ IF _PRINT_ >= 1 THEN RUN PRINTBAK; /* Perform preliminary calculations and estimate of BETA. */ RUN PRELIMS; RUN B_HAT; /* Calculate the required sample size and report the results. */ RUN G_SIZE; FINISH; /* Of the GEESIZE subroutine */ OPTIONS SOURCE;