/** \file Generate future incoming cohorts (51/52yr olds) based on census population projection and projected trends. - Use the 2004 as the base - Apr 1st, convert AIME into 2004 dollars - Apr 8th, modify mean of wlth_nonzero; mean and variance of loghatotax,simulation of the two - Apr 12nd, don't allow any feedback - Wealth distribution - use empirical distribution - Apr 15th, don't change the smoking trend - 9/8/2009 - Added new age variables, removed age splines from here - Simulate status_quo cohorts out to 2080. - 5/13/2014 - Use 2012 population projections. \todo convert the all the hard-coded lists of variables in to globals or locals as appropriate. \todo Resurrect the European SHARE weights, in case we ever want to compare against the European population again. \todo create a mapping between trends and scenarios to automate the generation of multiple scenarios with different trends */ include common.do ///!\bug Putting random junk in the scenario doesn't cause any errors or even any warnings local scr : env SCENARIO * Take bootstrap sample as an argument local bsamp : env BREP * Trend file to use for incoming cohorts. Named base_data/trend_all_`trend_name'.dta local trend_name : env TREND * Add an argument for biomarkers local biomarkers : env BIOMARKERS if `bsamp'==0 { local outfile $outdata/new51s_`scr'.dta } else { local outfile $outdata/input_rep`bsamp'/new51s_`scr'.dta } if `biomarkers'== 0 { local outfile $outdata/new51s_`scr'.dta } else { local outfile $outdata/new51s_`scr'_bio.dta } * Take the year as an argument local fy : env FYEAR local ly : env LYEAR * File to use for population projections global pop_proj pop5152_projection_2150 * Not adjust economic outcomes to indirect changes global indirect_adjust = -1 * Don't allow for any feedback * global indirect_adjust = -1 * Type of outcome global bin hibpe hearte diabe anyhi shlt work wlth_nonzero anydb anydc anyadl anyiadl global cont global iht iearnx hatotax dcwlthx global ordered wtstate smkstat rdb_ea_c rdb_na_c * For categorical outcomes global wtstate_cat overwt obese_1 obese_2 obese_3 global smkstat_cat smokev smoken global funcstat_cat iadl1 adl1p global rdb_ea_c_cat rdb_ea_2 rdb_ea_3 global rdb_na_c_cat rdb_na_2 rdb_na_3 rdb_na_4 global baselist $bin $cont $wtstate_cat $smkstat_cat $funcstat_cat $rdb_ea_c_cat $rdb_na_c_cat /* * Old way of simulating wealth ********************************************** * Dimension of wealth data for imputation ********************************************** drop _all use "$outdata/new51_wlth2004.dta" global wlth_impn = _N drop _all */ ********************************************** * Modify variance of loghatotax * No, draw hatota from empirical distribution * No, use Inverse hyperbolic sine transform ********************************************** * set seed 43241 clear global scenario = "`scr'" dis "Scenario is: " "$scenario" ********************************************** * Import population trends ********************************************** use "$outdata/$pop_proj.dta", clear qui sum year local begyr = r(min) local maxyr = r(max) sort year male hispan black, stable gen subgrp = 2^2 * male + 2^1 * hispan + black drop male hispan black reshape wide pop, i(year) j(subgrp) mkmat year pop*, mat(poptrend) * Row names global rowname "" forvalues i = `begyr'(2)`maxyr'{ global rowname $rowname yr`i' } matrix rownames poptrend = $rowname ******* Figure out adjustment factors for wealth - To do: calculate this if the user has restricted data access local res: env RES if `res'==1 { * use "$dua_rand_hrs/age5055_hrs1992r.dta" use "$outdata/age5055_hrs1992.dta" sum hatotax [aw=weight] local mean_hatota92 = r(mean) } else { local mean_hatota92 = 272.3572 } use "$outdata/age5055_hrs2010.dta" sum hatotax [aw=weight] local mean_hatota04 = r(mean) local wealth_adj = `mean_hatota04'/`mean_hatota92' di `wealth_adj' ****************** * The incoming cohort model is estimated from the 1992 cohort. So the means estimates will be for 1992, not 2010. * Therefore need to adjust trends to account for the shift from 1992 to 2010. ****************** global trend9210 hibpe hearte diabe anyhi work $wtstate_cat $smkstat_cat anyadl anyiadl * Get the 1992 means use "$outdata/age5055_hrs1992.dta", clear * Create indicator variables for weight status gen overwt = wtstate == 2 if wtstate < . gen obese_1 = wtstate == 3 if wtstate < . gen obese_2 = wtstate == 4 if wtstate < . gen obese_3 = wtstate == 5 if wtstate < . * Create indicator variables for smoking status gen smokev = smkstat == 3 | smkstat == 2 if smkstat < . gen smoken = smkstat == 3 if smkstat < . keep $trend9210 weight collapse $trend9210 [pw = weight] * collapse $trend9210 gen year = 1992 sort year, stable tempfile tmp save `tmp' if `bsamp'==0 { use "$outdata/age5055_hrs2010.dta", clear } else { use "$outdata/input_rep`bsamp'/age5055_hrs2010.dta", clear } * Create indicator variables for weight status gen overwt = wtstate == 2 if wtstate < . gen obese_1 = wtstate == 3 if wtstate < . gen obese_2 = wtstate == 4 if wtstate < . gen obese_3 = wtstate == 5 if wtstate < . * Create indicator variables for smoking status gen smokev = smkstat == 3 | smkstat == 2 if smkstat < . gen smoken = smkstat == 3 if smkstat < . keep $trend9210 weight collapse $trend9210 [aw = weight] * collapse $trend9210 gen year = 2010 sort year, stable append using `tmp' sort year, stable * For the incoming cohort simulation, we want proportion of EX SMOKERS, not EVER SMOKER replace smokev = smokev - smoken mkmat $trend9210, mat(trend9210) matrix colnames trend9210 = phibpe phearte pdiabe panyhi pwork pwtstate2 pwtstate3 pwtstate4 pwtstate5 psmkstat2 psmkstat3 panyadl panyiadl matrix rownames trend9210 = yr1992 yr2010 matlist trend9210 ********************************************** * Incoming cohort model predicts categorical BMI * Get centiles from 50-55 2010 data and use this to convert categorical to continous * Creates a dataset `bmi_centiles' in long format with the following variables: * centile - 0 to 100 indicating the centile * wtstate - the weight category for the centile, 1 to 5 * bmi - the bmi for that centile and weight state category * The dataset is sorted by centile wtstate ********************************************** if `bsamp'==0 { use "$outdata/age5055_hrs2010.dta", clear } else { use "$outdata/input_rep`bsamp'/age5055_hrs2010.dta", clear } gen bmi = exp(logbmi) local nwtstate_cats = wordcount("$wtstate_cat")+1 matrix bmi_centiles = J(101, `nwtstate_cats'+1, .) quietly{ forvalues i = 1/`nwtstate_cats'{ forvalues j = 0/100{ centile bmi if wtstate == `i', centile(`j') matrix bmi_centiles[`j'+1, `i'+1] = r(c_1) matrix bmi_centiles[`j'+1, 1] = `j' } } } local colnames centile forvalues i = 1/`nwtstate_cats'{ local colnames `colnames' bmi`i' } matrix colnames bmi_centiles = `colnames' clear svmat bmi_centiles, names(col) reshape long bmi, i(centile) j(wtstate) tempfile bmi_centiles save `bmi_centiles', replace * Process the SSA summary files and reconstruct the distributions by interpolation use $resmodels/ssa_dist.dta, replace * Move from the 50 categories to tenths of percentiles expand 20 sort q_grp gen id = (_n-1)/10 * Set values to be used for imputation at middle of each group replace q_dist = . if mod(id+1,2)>0 replace aime_dist = . if mod(id+1,2)>0 * Set lowest values replace q_dist = 0 if id == 0 replace aime_dist = 0 if id == 0 * In case we need a 100th expand 2 if _n == _N replace id = 100 if _n == _N ipolate q_dist id, gen(q_imp) epolate ipolate aime_dist id, gen(aime_imp) epolate gen aime_pct = id gen q_pct = id replace aime_dist = aime_imp replace q_dist = q_imp * quarters should be integers replace q_dist = round(q_dist) keep aime_pct aime_dist q_pct q_dist tempfile ssa save `ssa' ******************************************* * Import health trends - DON'T BRING IN HEALTH TRENDS.** ********************************************** drop _all use "$outdata/trend_all_`trend_name'" keep year phibpe phearte pdiabe panyhi pwtstate2 pwtstate3 pwtstate4 pwtstate5 psmkstat2 psmkstat3 panyadl panyiadl pwork pwlth_nonzero plogiearnx ploghatotax panydc panydb if "`scr'" == "obese_r"{ * Shut down the trends in obesity, diabetes, and hypertension foreach x in phibpe pdiabe pwtstate2 pwtstate3 { * foreach x in phibpe pdiabe{ replace `x' = 1 } } else if "`scr'" == "obs80"{ * Return to the 1976-1980 NHANES level merge m:1 year using $outdata/obs80_adjustments.dta, nogen update replace keepusing(pdiabe phibpe pwtstate3 pwtstate4 pwtstate5) * keep pwtstate2 constant replace pwtstate2 = 1 if year < = 2030 foreach x in pdiabe pwtstate2 pwtstate3 pwtstate4 pwtstate5 phibpe { qui sum `x' if year == 2030 replace `x' = r(mean) if year > 2030 } } else if "`scr'" == "obs_flat"{ **keep obesity flat.** replace pwtstate2 = 1 replace pwtstate3 = 1 replace pwtstate4 = 1 replace pwtstate5 = 1 replace pdiabe = 1 replace phibpe = 1 } else if "`scr'" == "smk80"{ * Return to the 1976-1980 NHANES level replace psmkstat3 = (1 + 0.016) ^ (year - 2010) if year <= 2030 qui sum psmkstat3 if year == 2030 replace psmkstat3 = r(mean) if year > 2030 } else if "`scr'" == "smk_flat"{ **Keep smoking at 2010 level replace psmkstat2 = 1 replace psmkstat3 = 1 } else if "`scr'" == "notrend"{ * Shut down the trends in everything foreach x in phibpe phearte pdiabe panyhi pwtstate2 pwtstate3 psmkstat2 psmkstat3 panyadl panyiadl pwork pwlth_nonzero plogiearnx ploghatotax panydc panydb { replace `x' = 1 } } * Generate other trends foreach x in shlt aime q rdb_na_2 rdb_na_3 rdb_na_4 rdb_ea_2 rdb_ea_3 hatotax iearnx dcwlthx { qui gen p`x' = 1 } * Rename variables forvalues i = 2/3{ ren prdb_ea_`i' prdb_ea_c`i' } forvalues i = 2/4{ ren prdb_na_`i' prdb_na_c`i' } ****************** * Adjust for the the trend between 1992 to 2010 (estimtion sample for initial cohort is from year 1992) ****************** local tlist : colnames trend9210 foreach x in `tlist' { local ratio =trend9210[rownumb(trend9210, "yr2010"), colnumb(trend9210, "`x'")]/ trend9210[rownumb(trend9210, "yr1992"), colnumb(trend9210, "`x'")] sum `x' replace `x' = `x' * `ratio' sum `x' } * Keep only even years keep if mod(year,2) == 0 qui sum year local begyr = r(min) local maxyr = r(max) mkmat * , mat(hlthtrend) * Row names global rowname "" forvalues i = `begyr'(2)`maxyr'{ global rowname $rowname yr`i' } matrix rownames hlthtrend = $rowname matlist hlthtrend ********************************************** * Interventions on the trends ********************************************** * Multiple trends shut down * if "$scenario" == "multi_r" | "$scenario" == "shareprev" { if "$scenario" == "multi_r" { local exclvar1 smkstat local exclvar2 wtstate local trow = rowsof(hlthtrend) local tcol = colsof(hlthtrend) local cyear = colnumb(hlthtrend,"year") local ckeep2 = colnumb(hlthtrend,"p`exclvar1'2") local ckeep3 = colnumb(hlthtrend,"p`exclvar1'3") local ckeep4 = colnumb(hlthtrend,"p`exclvar2'2") local ckeep5 = colnumb(hlthtrend,"p`exclvar2'3") local ckeep6 = colnumb(hlthtrend,"phibpe") * local ckeep7 = colnumb(hlthtrend,"phearte") * local ckeep8 = colnumb(hlthtrend,"pdiabe") local ckeep7 = colnumb(hlthtrend,"pdiabe") forvalues i = 1/`trow' { forvalues k = 4/7{ matrix hlthtrend[`i',`ckeep`k''] = 1 } } } ********************************************** * Import V-C matrix ********************************************** drop _all use "$indata/incoming_vcmatrix.dta" rename fhibpe hibpe rename fhearte hearte rename fdiabe diabe rename fanyhi anyhi rename fshlt shlt rename fwtstate wtstate rename fsmkstat smkstat rename fanyadl anyadl rename fanyiadl anyiadl rename fwork work rename fwlth_nonzero wlth_nonzero * rename fraime aime * rename frq q rename fanydc anydc rename fanydb anydb rename frdb_ea_c rdb_ea_c rename frdb_na_c rdb_na_c drop _rowname /* foreach i in $iht{ rename log`i' `i' } */ mkmat *, mat(omega) local d = colsof(omega) forvalues i = 1/`d'{ local k = `i'+1 forvalues j = `k'/`d'{ matrix omega[`i',`j'] = omega[`j',`i'] } } local colname: colnames omega noi dis "`colname'" matrix rownames omega = `colname' local num = rowsof(omega) * Get the variance of earnings, hatota, and dcwlth drop _all use "$indata/incoming_means_econ_tos.dta" mkmat theta omega ssr, matrix(iht_tos) rownames(_rowname) drop _all /* * This happens when the matrix is created. matrix omega[rownumb(omega,"iearnx"), colnumb(omega,"iearnx")] = iht_tos[rownumb(iht_tos,"iearnx"), colnumb(iht_tos,"ssr")] matrix omega[rownumb(omega,"hatotax"), colnumb(omega,"hatotax")] = iht_tos[rownumb(iht_tos,"hatotax"), colnumb(iht_tos,"ssr")] matrix omega[rownumb(omega,"dcwlthx"), colnumb(omega,"dcwlthx")] = iht_tos[rownumb(iht_tos,"dcwlthx"), colnumb(iht_tos,"ssr")] ********* * Covariance should be adjusted too forvalues i = 1/`num' { forvalues j = 1/`num'{ if `i' != `j' { matrix omega[`i', `j'] = tanh( omega[`i', `j']) * sqrt(omega[`i', `i'] * omega[`j', `j']) } } } */ matrix list omega * Different V-C matrices with each of the outcome as the first row foreach x in `colname' { takestring_old, oldlist("`colname'") newname("addlist") extlist("`x'") global vlist `x' $addlist matrix vc_`x' = J(`num',`num',.) matrix colnames vc_`x' = $vlist matrix rownames vc_`x' = $vlist foreach y in $vlist { foreach z in $vlist { matrix vc_`x'[rownumb(vc_`x',"`y'"), colnumb(vc_`x', "`z'")] = omega[rownumb(omega,"`y'"), colnumb(omega,"`z'")] } } } * Cholesky decomposition matrix L_omega = cholesky(omega) foreach x in `colname'{ matrix L_`x' = cholesky(vc_`x') } ********************************************** * Import mean-estimation matrix ********************************************** drop _all use "$indata/incoming_means.dta" gen order = _n drop _rowname * append using "$indata/incoming_means_econ.dta" /* foreach v in $iht { replace order = rownumb(omega,"`v'") if var == "`v'" } drop var */ sort order, stable if "`scr'" == "fem_work"{ replace constant = constant + male if inlist(order,9,11,14) replace male = 0 if inlist(order,9,11,14) } drop order mkmat *, mat(meanest) matrix rownames meanest = `colname' mat list meanest ************************************************ * Create matrices for SSA-related joint models * ************************************************ drop _all use "$resmodels/ssa_vcmatrix.dta" drop _rowname mkmat *, mat(ssa_omega) local d = colsof(ssa_omega) forvalues i = 1/`d'{ local k = `i'+1 forvalues j = `k'/`d'{ matrix ssa_omega[`i',`j'] = ssa_omega[`j',`i'] } } local ssa_colname: colnames ssa_omega noi dis "`ssa_colname'" matrix rownames ssa_omega = `ssa_colname' local num = rowsof(ssa_omega) matrix list ssa_omega * Different V-C matrices with each of the outcome as the first row foreach x in `ssa_colname' { takestring_old, oldlist("`ssa_colname'") newname("addlist") extlist("`x'") global ssavlist `x' $addlist matrix ssavc_`x' = J(`num',`num',.) matrix colnames ssavc_`x' = $ssavlist matrix rownames ssavc_`x' = $ssavlist foreach y in $ssavlist { foreach z in $ssavlist { matrix ssavc_`x'[rownumb(ssavc_`x',"`y'"), colnumb(ssavc_`x', "`z'")] = ssa_omega[rownumb(ssa_omega,"`y'"), colnumb(ssa_omega,"`z'")] } } } * Cholesky decomposition matrix L_ssaomega = cholesky(ssa_omega) foreach x in `ssa_colname'{ matrix L_ssa`x' = cholesky(ssavc_`x') } * Models for aime and quarters worked drop _all use "$resmodels/ssa_means.dta" gen order = _n drop _rowname sort order, stable drop order mkmat *, mat(ssa_meanest) matrix rownames ssa_meanest = `ssa_colname' mat list ssa_meanest * Read in the file with the cut points for the SSA ordered probit models, make a matrix for later use drop _all use "$resmodels/ssa_cut_points.dta" mkmat cut_*, mat(ssa_cutpoints) rownames(_rowname) mat list ssa_cutpoints local ssa_cuts = colsof(ssa_cutpoints) forvalues x = 1/`ssa_cuts' { foreach y in aime_pct q_pct { scalar `y'_cut`x' = ssa_cutpoints[rownumb(ssa_cutpoints,"`y'"),colnumb(ssa_cutpoints,"cut_`x'")] di `y'_cut`x' } } ********************************************** * Import Estimation for DI claim and DB tenure * Apr 8 2008, add ssiclaim ********************************************** * IMPORT ESTIMATES foreach var in init_diclaim init_logtenure init_ssiclaim init_hatota { mata: _getestimates("$resmodels/m`var'","$resmodels/s`var'", "coef_`var'") } ********************************************** * Import Estimation for deprsymp hearta heartae stroks ********************************************** * IMPORT ESTIMATES foreach var in init_deprsymp init_hearta init_heartae { mata: _getestimates("$outdata/m`var'","$outdata/s`var'", "coef_`var'") } ********************************************** * Import the incoming cohort, baseline prevalence and predicted prevalence * Baseline prevalence might be inaccurate ********************************************** * A list for random draws global drawlist "" foreach x in `colname' { global drawlist $drawlist d_`x' } drop _all * Read in the file with the cut points for the ordered probit models, make a matrix for later use use "$indata/incoming_cut_points.dta" mkmat cut_1 cut_2 cut_3 cut_4, mat(cutpoints) rownames(_rowname) use "$outdata/incoming_base.dta" * if missing("`bsamp'") & "`scr'" != "bootstrap" { * use "$outdata/incoming_base.dta", clear *} *else { * use "$outdata/input_rep`bsamp'/incoming_base.dta", clear *} sort male hispan black, stable gen subgrp = 2^2 * male + 2^1 * hispan + black * Set cutoffs for ordered outcomes scalar wtstate_cut1 = cutpoints[rownumb(cutpoints,"fwtstate"),colnumb(cutpoints,"cut_1")] scalar wtstate_cut2 = cutpoints[rownumb(cutpoints,"fwtstate"),colnumb(cutpoints,"cut_2")] scalar wtstate_cut3 = cutpoints[rownumb(cutpoints,"fwtstate"),colnumb(cutpoints,"cut_3")] scalar wtstate_cut4 = cutpoints[rownumb(cutpoints,"fwtstate"),colnumb(cutpoints,"cut_4")] scalar smkstat_cut1 = cutpoints[rownumb(cutpoints,"fsmkstat"),colnumb(cutpoints,"cut_1")] scalar smkstat_cut2 = cutpoints[rownumb(cutpoints,"fsmkstat"),colnumb(cutpoints,"cut_2")] scalar rdb_ea_c_cut1 = cutpoints[rownumb(cutpoints,"frdb_ea_c"),colnumb(cutpoints,"cut_1")] scalar rdb_ea_c_cut2 = cutpoints[rownumb(cutpoints,"frdb_ea_c"),colnumb(cutpoints,"cut_2")] scalar rdb_na_c_cut1 = cutpoints[rownumb(cutpoints,"frdb_na_c"),colnumb(cutpoints,"cut_1")] scalar rdb_na_c_cut2 = cutpoints[rownumb(cutpoints,"frdb_na_c"),colnumb(cutpoints,"cut_2")] scalar rdb_na_c_cut3 = cutpoints[rownumb(cutpoints,"frdb_na_c"),colnumb(cutpoints,"cut_3")] gen constant = 1 * Predicted prevalence local j = 1 foreach x in `colname' { matrix betamat = meanest[`j',1...] cap drop `x'_xb matrix score `x'_xb = betamat local j = `j' + 1 matrix mat_`x' = betamat mat list mat_`x' sum `x'_xb } drop constant ********************************************** * Form new linear combinations * A vector of incremental changes in linear combination ********************************************** ********************************************** * Simulate future years' data ********************************************** matrix share_us = (0.474 ,0.474,0.474,0.474, 0.72, 1.28, 0.31, 0.40, 0.44, 0.46, 0.62, 0.49) matrix colnames share_us = poverwt pobese_1 pobese_2 pobese_3 psmokev psmoken phearte pdiabe pstroke plunge pcancre phibpe cap rm `outfile' preserve forvalues yy=`fy'(2)`ly' { restore, preserve dis "***** Projection year is `yy' ***** " /* Generate random draws */ cap drop $drawlist drawnorm $drawlist, cov(omega) seed(987562) noisily sum d_* set seed 432422 if "$scenario" == "shareprev" { *SHARE prevalence foreach c in stroke lunge cancre { qui sum `c' cap drop p`c' qui gen p`c' = 1 - r(mean) } foreach c in pstroke plunge pcancre { local cnum = colnumb(share_us, "`c'") qui sum `c' local oldm = 1 - r(mean) local newm = share_us[1,`cnum'] replace `c' = 1 - (1 - `c') * `newm' } foreach c in stroke lunge cancre { cap drop draw qui gen draw = uniform() replace `c' = draw >= p`c' drop draw } } else if "$scenario" != "shareprev" { * Make sure RN sequence the same foreach c in stroke lunge cancre { cap drop draw qui gen draw = uniform() drop draw } } /* Determine the deviations */ foreach x in $bin { new_deviation, vname("`x'") vtype("b") cyr(`yy') data("hrs") } sum hearte_delta foreach x in $ordered { new_deviation, vname("`x'") vtype("o") cyr(`yy') data("hrs") } foreach x in $cont $iht { new_deviation, vname("`x'") vtype("c") cyr(`yy') data("hrs") } /* new latent variables */ * If adjust each outcome to all changes if $indirect_adjust == 0 { foreach x in $bin $ordered { cap drop inc`x' qui gen inc`x' = 0 foreach y in $bin $ordered { qui replace inc`x' = inc`x' + L_`y'[rownumb(L_`y',"`x'"), colnumb(L_`y', "`y'")] * `y'_delta di "inc`x' `y'_delta" sum inc`x' sum `y'_delta } cap drop `x'_xb_new qui gen `x'_xb_new = `x'_xb + inc`x' sum `x'_xb_new sum `x'_xb sum inc`x' } foreach x in $cont $iht{ cap drop inc`x' qui gen inc`x' = 0 qui replace inc`x' = inc`x' + L_`x'[rownumb(L_`x',"`x'"), colnumb(L_`x', "`x'")] * `x'_delta cap drop `x'_xb_new qui gen `x'_xb_new = `x'_xb + inc`x' } } * Allow for all feedbacks else if $indirect_adjust == 1 { foreach x in $bin $ordered $cont $iht{ cap drop inc`x' qui gen inc`x' = 0 foreach y in $bin $ordered $cont $iht { qui replace inc`x' = inc`x' + L_`y'[rownumb(L_`y',"`x'"), colnumb(L_`y', "`y'")] * `y'_delta } cap drop `x'_xb_new qui gen `x'_xb_new = `x'_xb + inc`x' } } * Don't allow for any feedback else if $indirect_adjust == -1 { foreach x in $bin $ordered $cont $iht { cap drop inc`x' qui gen inc`x' = 0 foreach y in `x' { qui replace inc`x' = inc`x' + `y'_delta } * Bryan's addition for diagnotic purposes * sum *_delta cap drop `x'_xb_new qui gen `x'_xb_new = `x'_xb + inc`x' sum `x'_xb_new sum `x'_xb sum inc`x' } } /* Predicted probabilities */ * Determine the probabilities * Binary outcomes foreach x in $bin{ cap drop p`x' gen p`x' = normal(-`x'_xb_new) } sum phearte **** For Weight and Smoking, we get new values by adjusting the cutoffs to match desired probabilities foreach v in wtstate smkstat { local ncuts = wordcount("$`v'_cat") local ncats = `ncuts'+1 /* summ `v'_xb_new local xb_mean = r(mean) */ * The hlthtrend matrix is the trend from 1992. So multiply by the proportion in 1992 to get proprotion in year yy local p1 = 1 forvalues probs = 2/`ncats' { local x p`v'`probs' local p`probs' = hlthtrend[rownumb(hlthtrend,"yr`yy'"), colnumb(hlthtrend, "p`v'`probs'")]*trend9210[rownumb(trend9210, "yr1992"), colnumb(trend9210, "`x'")] local p1 = `p1' - `p`probs'' } assert `p1' >= 0 assert `p1' <= 1 * Adjust the cuts forvalues c = 1/`ncuts' { local c_prev = `c' - 1 if `c' == 1 { dis "`v'" dis "`p1'" calc_cut_point `v'_xb_new, prob(`p1') prev_cut(-9999) } else { local prev_cut = `v'_cut`c_prev' dis "`v'" dis "`prev_cut'" dis "`p`c''" calc_cut_point `v'_xb_new, prob(`p`c'') prev_cut(`prev_cut') } matrix x = e(b) scalar `v'_cut`c' = x[1,1] * noi di "Cut `v'_cut`c' = " `v'_cut`c' } /* * Adjust the cuts forvalues c = 1/`ncuts' { local c_next = `c' + 1 local c_prev = `c' - 1 if `c' == 1 { scalar `v'_cut`c' = invnorm(`p1')+`xb_mean' } else if `c' == `ncuts' { scalar wtstate_cut`c' = invnorm(1-`p`c_next'')+`xb_mean' } else { scalar wtstate_cut`c' = invnorm(`p`c''+normal(`v'_cut`c_prev'-`xb_mean'))+`xb_mean' } } */ forvalues c = 1/`ncats' { di "`v'_`c' = `p`c''" } } * Ordered outcomes foreach x in $ordered{ if "`x'" == "rdb_na_c" { local numcut = 4 } else if "`x'" == "wtstate"{ local numcut = 5 } else{ local numcut = 3 } forvalues j = 1/`numcut'{ if `j' < `numcut'{ gen p`x'`j' = normal(`x'_cut`j' - `x'_xb_new) } else if `j' == `numcut'{ gen p`x'`j' = 1 } } } /* Predicted probabilities for shareprev scenario*/ if "$scenario" == "shareprev" { local clist: colnames share_us foreach c in pdiabe phibpe phearte { local cnum = colnumb(share_us, "`c'") qui sum `c' local oldm = 1 - r(mean) local newm = share_us[1,`cnum'] replace `c' = 1 - (1 - `c') * `newm' } * Ordinal outcomes gen poverwt = pwtstate2 - pwtstate1 gen pobese_1 = pwtstate3 - pwtstate2 gen pobese_2 = pwtstate4 - pwtstate3 gen pobese_3 = pwtstate5 - pwtstate4 gen psmokev = psmkstat2 - psmkstat1 gen psmoken = psmkstat3 - psmkstat2 foreach c in poverwt pobese_1 pobese_2 pobese_3 psmoken psmokev { local cnum = colnumb(share_us, "`c'") qui sum `c' local oldm = r(mean) local newm = share_us[1,`cnum'] replace `c' = `c' * `newm' } replace pwtstate4 = pwtstate5 - pobese_3 replace pwtstate3 = pwtstate4 - pobese_2 replace pwtstate2 = pwtstate3 - pobese_1 replace pwtstate1 = pwtstate2 - poverwt replace psmkstat2 = psmkstat3 - psmoken replace psmkstat1 = psmkstat2 - psmokev } /* Determine the status */ * Binary outcomes foreach x in $bin { cap drop `x' gen `x' = normal(d_`x') >= p`x' * For diagnostics sum `x' sum d_`x' sum `x' drop p`x' } * Ordered outcomes foreach x in $ordered { cap drop `x' gen `x' = . if "`x'" == "rdb_na_c" { local num_cut = 4 } else if "`x'" == "wtstate"{ local num_cut = 5 } else{ local num_cut = 3 } forvalues i = `num_cut'(-1)1 { replace `x' = `i' if normal(d_`x') <= p`x'`i' drop p`x'`i' } } * Continuous foreach x in $cont { cap drop `x' * Removing draw term for the $cont variables: gen `x' = `x'_xb_new + d_`x' * Added by Bryan for testing noisily sum `x' `x'_xb_new d_`x' } * Added by Bryan for testing noisily sum $cont **THETA and OMEGA for iearnx, hatotax, and dcwlthx foreach v in $iht { local `v'_omega = iht_tos[rownumb(iht_tos,"`v'"), colnumb(iht_tos,"omega")] local `v'_theta = iht_tos[rownumb(iht_tos,"`v'"), colnumb(iht_tos,"theta")] } * IHT **Instead of taking logs, take log(y+sqrt(y^2+1)) - gh(y,1,0) gen _ahg_temp = iearnx_xb_new + d_iearnx drop iearnx logiearnx iearnuc logiearnuc egen _iearnx = invgh(_ahg_temp), theta(`iearnx_theta') omega(`iearnx_omega') replace _iearnx = 0 if _iearnx < 0 | _iearnx == . /* Bryan's edits trying to incorporate iearnuc and logiearnuc */ gen iearnuc = _iearnx if !missing(_iearnx) egen logiearnuc = h(iearnuc) replace logiearnuc = logiearnuc/100 tabstat iearnuc, stat(mean, median, min, max) tabstat logiearnuc, stat(mean, median, min, max) replace _iearnx = min(_iearnx,200) if !missing(_iearnx) egen logiearnx = h(_iearnx) replace logiearnx = logiearnx/100 rename _iearnx iearnx **Force iearnx to be >=0.** drop _ahg_temp gen _ahg_temp = hatotax_xb_new + d_hatotax drop hatotax hatota loghatotax loghatota egen _hatotax = invgh(_ahg_temp) , theta(`hatotax_theta') omega(`hatotax_omega') replace _hatotax = 0 if wlth_nonzero != 1 **Adjust for growth from 1992 - 2010. replace _hatotax = _hatotax * `wealth_adj' gen hatota = _hatotax replace _hatotax = min(_hatotax,2000) if _hatotax!=. egen loghatotax = h(_hatotax) replace loghatotax = loghatotax/100 rename _hatotax hatotax drop _ahg_temp gen _ahg_temp = dcwlthx_xb_new + d_dcwlthx drop dcwlthx logdcwlthx egen _dcwlthx = invgh(_ahg_temp), theta(`dcwlthx_theta') omega(`dcwlthx_omega') replace _dcwlthx = 0 if _dcwlthx < 0 | _dcwlthx == . egen logdcwlthx = h(_dcwlthx) replace logdcwlthx = logdcwlthx/100 rename _dcwlthx dcwlthx drop _ahg_temp * New approach for aime and quarters worked - estimated a model on the 2004 50-55 year olds drawnorm d_aime_pct d_q_pct, cov(ssa_omega) seed(987562) sum d_aime_pct sum d_q_pct cap gen __cons = 1 foreach var in black hispan hsless college cancre diabe hibpe hearte lunge stroke anyadl anyiadl work age logiearnx loghatotax { gen male_`var' = male*`var' } * Predicted prevalence local j = 1 foreach x in `ssa_colname' { matrix betamat = ssa_meanest[`j',1...] cap drop `x'_xb matrix score `x'_xb = betamat local j = `j' + 1 matrix mat_`x' = betamat mat list mat_`x' di "mean of `x' is:" sum `x'_xb } drop __cons * Ordered outcomes local ssa_cats = `ssa_cuts' + 1 foreach x in aime_pct q_pct { forvalues j = 1/`ssa_cats'{ if `j' < `ssa_cats'{ gen p`x'`j' = normal(`x'_cut`j' - `x'_xb) } else if `j' == `ssa_cats'{ gen p`x'`j' = 1 } } } * Ordered outcomes foreach x in aime_pct q_pct { cap drop `x' gen `x' = . forvalues i = `ssa_cats'(-1)1 { replace `x' = `i' if normal(d_`x') <= p`x'`i' drop p`x'`i' } } ********************************************** * Incoming cohort model predicts categorical AIME and quarters worked in categories * Convert these categories into continuous values, then assign to values * Get 50-55 2004 distribution and use this to convert categorical to continous (we have 1000 points in our distribution) ********************************************** gen ssa_draw1 = runiform() gen ssa_draw2 = runiform() local adj = 100/`ssa_cats' replace aime_pct = round(`adj'*aime_pct - `adj'*ssa_draw1,0.1) replace q_pct = round(`adj'*q_pct - `adj'*ssa_draw2,0.1) * Fill in the AIME values merge m:1 aime_pct using `ssa', keepusing(aime_dist) nogen * Fill in the quarters worked values merge m:1 q_pct using `ssa', keepusing(q_dist) nogen rename aime_dist raime_test rename q_dist rq_test gen raime = raime_test gen rq = rq_test gen fraime = raime gen frq = rq drop aime_pct raime_test q_pct rq_test * Calculate rpia for each respondent * Calculate PIA for each respondent gen rl = 1 if died == 0 * No death year, since alive gen rdthyr = 2100 if died == 0 * Calculate PIA SsPIA_v2 raime rq rbyr rl rdthyr, gen(rpia) drop rl rdthyr /* foreach x in $iht { cap drop `x' egen `x' = log(invgh(`x'_xb_new + d_`x',theta, omega)) } */ * Binary and censored replace anydb = 0 if work != 1 replace anydc = 0 if work != 1 * Ordered and censored replace rdb_ea_c = 0 if anydb != 1 replace rdb_na_c = 0 if anydb != 1 * Continuous and truncated replace logiearnx = 0 if work != 1 replace logdcwlthx = 0 if anydc != 1 replace loghatotax = 0 if wlth_nonzero != 1 replace iearnx = 0 if work != 1 replace dcwlthx = 0 if anydc != 1 replace hatotax = 0 if wlth_nonzero != 1 * Bryan's edits to incorporate iearnuc and logiearnuc replace logiearnuc = 0 if work != 1 replace iearnuc = 0 if work != 1 * Fix inconsistencies in early versus normal retirement age (the former cannot be larger than the latter) replace rdb_ea_c = 2 if rdb_na_c == 1 & rdb_ea_c == 3 /* Dummies for ordered outcomes */ foreach x in $ordered { if "`x'" == "rdb_na_c" { local num_cut = 4 } else if "`x'" == "wtstate"{ local num_cut = 5 } else{ local num_cut = 3 } forvalues j = 2/`num_cut'{ local ovar = "`x'_cat" local v = word("$`ovar'", `j'-1) cap drop `v' gen `v' = `x' == `j' } } /* Adjust the population weight to reflect population size and structual change */ levelsof subgrp, local(subgrplv) dis "********** current year is: `yy' *************" gen weight`yy' = . foreach l of local subgrplv { local popratio = poptrend[rownumb(poptrend, "yr`yy'"), colnumb(poptrend,"pop`l'")]/poptrend[rownumb(poptrend, "yr2010"), colnumb(poptrend,"pop`l'")] replace weight`yy' = weight * `popratio' if subgrp == `l' } * Old HH total person level weight bys hhid: egen oldhhwt = total(weight) * New HH total person level weight bys hhid: egen newhhwt = total(weight`yy') * Adjust HH weight gen wthh`yy' = wthh * newhhwt/oldhhwt sum hearte sum hearte [aw=weight] drop weight wthh oldhhwt newhhwt ren weight`yy' weight ren wthh`yy' wthh sum hearte sum hearte [aw=weight] *smokev includes smoken replace smokev = 1 if smoken == 1 * Resort everything by HHIDPN. This fixes problems with the previous bys hhid that was unstable sorting sort hhidpn label var iearnx "Individual earnings in 1000s-max 200" label var hatotax "HH wlth in 1000s if positive,zero otherwise" label var dcwlthx "Individual DC wlth wv1-5 only in 1000s(=dcwlth)" label var logiearnx "(Log of earnings in 1000s)/100 if working,zero otherwise" label var logdcwlthx "(Log of DC wlth in 1000s)/100 if any DC,zero otherwise" label var loghatotax "(Log of hh wlth in 1000s if positive)/100,zero otherwise" label var iearnuc "Individual earnings in 1000s - uncapped" label var logiearnuc "IHT of earnings in 1000s/100 if working, zero otherwise" **PUT IN GROWTH FACTOR TO GET FROM 1992-2004.** *************************************** * Predict tenure for those with DB * Regression-based *************************************** matrix score logtenure = coef_init_logtenure drop db_tenure gen db_tenure = 10 * exp(logtenure) if anydb == 1 * Maximum tenure replace db_tenure = min(age_yrs - 15, db_tenure) if anydb == 1 replace db_tenure = -2 if anydb != 1 drop logtenure *************************************** * Predict DI claim * Regression-based *************************************** matrix score diclaim_xb = coef_init_diclaim drop diclaim gen x_diclaim = invnorm(uniform()) gen diclaim = x_diclaim + diclaim_xb >= 0 drop x_diclaim diclaim_xb *************************************** * Predict SSI claim * Regression-based *************************************** matrix score ssiclaim_xb = coef_init_ssiclaim drop ssiclaim gen x_ssiclaim = invnorm(uniform()) gen ssiclaim = x_ssiclaim + ssiclaim_xb >= 0 drop x_ssiclaim ssiclaim_xb *************************************** * Predict deprsymp * Regression-based *************************************** matrix score deprsymp_xb = coef_init_deprsymp drop deprsymp gen x_deprsymp = invnorm(uniform()) gen deprsymp = x_deprsymp + deprsymp_xb >=0 drop x_deprsymp deprsymp_xb *************************************** * Predict painstat * Regression-based *************************************** est use $outdata/new51_painstat.ster predict painstat_xb, xb gen x_painstat = invnorm(uniform()) gen painstat = 1 if x_painstat + painstat_xb <= _b[/cut1] replace painstat = 2 if x_painstat + painstat_xb > _b[/cut1] & x_painstat + painstat_xb <= _b[/cut2] replace painstat = 3 if x_painstat + painstat_xb > _b[/cut2] & x_painstat + painstat_xb <= _b[/cut3] replace painstat = 4 if x_painstat + painstat_xb > _b[/cut3] drop x_painstat painstat_xb est clear *************************************** * Predict adlstat conditional on anyadl * Regression-based *************************************** drop adlstat est use $outdata/new51_adlstat.ster predict adlstat_xb, xb gen x_adlstat = invnorm(uniform()) gen adlstat = 1 if !anyadl replace adlstat = 2 if x_adlstat + adlstat_xb <= _b[/cut1] & anyadl replace adlstat = 3 if x_adlstat + adlstat_xb > _b[/cut1] & x_adlstat + adlstat_xb <= _b[/cut2] & anyadl replace adlstat = 4 if x_adlstat + adlstat_xb > _b[/cut2] & anyadl drop x_adlstat adlstat_xb anyadl est clear *************************************** * Predict iadlstat conditional on anyiadl * Regression-based *************************************** drop iadlstat est use $outdata/new51_iadlstat.ster predict iadlstat_xb, xb gen x_iadlstat = invnorm(uniform()) gen iadlstat = 1 if !anyiadl replace iadlstat = 2 if x_iadlstat + iadlstat_xb < 0 & anyiadl replace iadlstat = 3 if x_iadlstat + iadlstat_xb >= 0 & anyiadl drop x_iadlstat iadlstat_xb anyiadl est clear *************************************** * Predict hearta * Regression-based *************************************** matrix score hearta_xb = coef_init_hearta drop hearta gen x_hearta = invnorm(uniform()) gen hearta = x_hearta + hearta_xb >=0 drop x_hearta hearta_xb replace hearta = 0 if hearte == 0 *************************************** * Predict heartae * Regression-based *************************************** matrix score heartae_xb = coef_init_heartae drop heartae gen x_heartae = invnorm(uniform()) gen heartae = x_heartae + heartae_xb >=0 drop x_heartae heartae_xb replace heartae = 0 if hearte == 0 replace heartae = 1 if hearta == 1 *** Impute early and normal DB retirement age drop era nra recode rdb_ea_c (1 = 50) (2 = 55) (3 = 60) , gen(era) recode rdb_na_c (1 = 55) (2 = 60) (3 = 62) (4 = 65) , gen(nra) drop rdb_ea_* rdb_na_* * Turn the categorical BMI into logbmi using centiles from 2004 data * These centiles were created earlier and are stored in the temp dataset `bmi_centiles' * Do a random draw from 0 - 100 for each person gen centile = round(uniform()*100) sort centile wtstate * Merge with the centile dataset merge n:1 centile wtstate using `bmi_centiles', keep(match) nogen * Resort everything by HHIDPN. sort hhidpn * Generate logbmi drop logbmi gen logbmi = log(bmi) * Remove helper vars drop centile obese_* overwt wtstate bmi ** Merge with biomarker data ** Generate 2-yr lag for the biomarker variables ** The treatment and physical activity variables were already included in the $timevariant global if `biomarkers'==1 { cap drop sysbp #d ; merge 1:1 hhidpn wave using "$dua_rand_hrs/bio_hrs_recoded_impfinal.dta", keepusing(hhidpn wave ldl_imp ldl_miss hdl_imp hdl_miss tchol_imp tchol_miss a1c_imp a1c_miss sysbp_imp sysbp_miss diabp crp_imp logcrp_imp crp_miss rxchol diaberx hibperx ) keep (master match) nogen ; #d cr foreach v in ldl hdl tchol a1c sysbp crp logcrp { rename `v'_imp `v' } foreach v in $timevariant cogstate selfmem { gen l2`v' = `v' local vlb: var label `v' label var l2`v' "Two-year lag of `vlb'" } foreach v in ldl tchol hdl a1c crp logcrp ldl_miss tchol_miss hdl_miss sysbp_miss a1c_miss crp_miss { gen l2`v' = `v' local vlb: var label `v' label var l2`v' "Two-year lag of `vlb'" } } else { foreach v in $timevariant cogstate selfmem { gen l2`v' = `v' local vlb: var label `v' label var l2`v' "Two-year lag of `vlb'" } } foreach v in $flist { gen f`v' = `v' local vlb: var label `v' label var f`v' "Init.of `vlb'" } gen period = 1 cap drop year gen year = `yy' drop d_* *_xb* *_delta inc* * Sep 9, 2008 **Change hatota, earnings variables.** drop l2loghatota l2loghatotax l2logiearnx l2logiearnuc * removed iearn from the following loop foreach i in hatota hatotax iearnx iearnuc{ egen l2log`i' = h(l2`i') replace l2log`i' = l2log`i'/100 } foreach i in iearnx iearnuc { drop flog`i' egen flog`i' = h(`i') replace flog`i' = flog`i'/100 } * Create fvars at age 50 foreach v in diabe smoken logbmi heart strok hibp lung canc { drop f`v'50 if "`v'" == "canc" { gen f`v'50 = `v're local vlb: var label `v're label var f`v'50 "Init. of `vlb' at age 50" } else if "`v'" == "heart" | "`v'" == "strok" | "`v'" == "hibp" | "`v'" == "lung" { gen f`v'50 = `v'e local vlb: var label `v'e label var f`v'50 "Init. of `vlb' at age 50" } else { gen f`v'50 = `v' local vlb: var label `v' label var f`v'50 "Init. of `vlb' at age 50" } } recast long hhid recast long hhidpn desc hhid* tostring hhid, gen(hhidstr) tostring hhidpn, gen(hhidpnstr) usedisplayformat drop hhid hhidpn gen hhid = "-`yy'" + hhidstr gen hhidpn = "-`yy'" + hhidpnstr destring hhid hhidpn, replace drop hhidpnstr hhidstr replace rbyr = rbyr + year - 2010 * Increment mean birth year of children *replace kid_byravg = kid_byravg + year - 2010 * make sure chf agrees with hearte replace chfe=0 if hearte==0 replace l2chfe=0 if l2hearte==0 if "`scr'" == "whiter"{ capt drop draw qui gen draw = uniform() summ weight if hispan == 1 local blah = (1- ((2030 - min(`yy',2030))/(2030-2010)))*r(sum) gsort -hispan draw gen sumw = sum(weight) replace hispan = 0 if sumw <= `blah' replace white = 1 if sumw <= `blah' drop sumw summ weight if black == 1 local blah = (1- ((2030 - min(`yy',2030))/(2030-2010)))*r(sum) gsort -black draw gen sumw = sum(weight) replace black = 0 if sumw <= `blah' replace white = 1 if sumw <= `blah' drop sumw capt drop draw if `yy'>=2030{ replace hispan = 0 replace black = 0 replace white = 1 } } if "`scr'" == "hsless_removed" { drop if hsless==1 } if "`scr'" == "hsless_ged" { replace hsless=0 if hsless==1 } gen entry = `yy' drop subgrp compress create_or_append `outfile' } exit, STATA clear