/* This file is for developing how new25 year olds are simulated.  
		Trying to impose trends on future cohorts.
		
		* BUG - need to make sure lage is correctly defined 

*/


quietly include common.do

local ster "$local_path/Estimates/incoming_separate_psid"

* Environmental variable for creating different scenarios (default, notrend, etc.)
local scr : env SCENARIO

local outfile $outdata/new25s_`scr'.dta

* Take the year as an argument
local fy : env FYEAR
local ly : env LYEAR


global indirect_adjust = 0

	* Try to trend smoking and weight
	global ordered1 educlvl
	global ordered2 wtstate smkstat numbiokids
	global ordered $ordered1 $ordered2
	
	* Removing work to try alternate specification
	global bin hibpe partnered partnertype inlaborforce

* For categorical outcomes 
	global educlvl_cat educ2 educ3 educ4  
	global wtstate_cat overwt obese_1 obese_2 obese_3
	global smkstat_cat smokev smoken 
	global numbiokids_cat numbiokids2 numbiokids3 numbiokids4 numbiokids5
*	global workstat_alt_cat workstat_alt2 workstat_alt3
*	global funcstat_cat iadl1 adl1p
*	global rdb_ea_c_cat rdb_ea_2 rdb_ea_3
*	global rdb_na_c_cat rdb_na_2 rdb_na_3 rdb_na_4
*	global baselist $bin $cont $wtstate_cat $smkstat_cat $funcstat_cat $rdb_ea_c_cat $rdb_na_c_cat

	global means09 hibpe $wtstate_cat $smkstat_cat $educlvl_cat partnered partnertype inlaborforce $numbiokids_cat

* Need to have mean values from original sample
use "$outdata/psid_all2009_pop_adjusted_2526", clear

* Move obese_3 to obese_2
* replace obese_2 = 1 if obese_3 == 1

* Generate summary variables for number of kids (variable numbering starts at 1).  Recode 5+ kids with to group with 4
gen numbiokids2 = (numbiokids == 1)
gen numbiokids3 = (numbiokids == 2)
gen numbiokids4 = (numbiokids == 3)
gen numbiokids5 = (numbiokids >= 4)

if "`scr'" == "hs_wtstate" {
	* This scenario will set the BMI distribution for everyone to be that of those who finished high school
	qui sum overwt if educlvl == 2 [aw=weight]
	local overwt_hs = r(mean)
	qui sum obese_1 if educlvl == 2 [aw=weight]
	local obese_1_hs = r(mean)
	qui sum obese_2 if educlvl == 2 [aw=weight]
	local obese_2_hs = r(mean)
	qui sum obese_3 if educlvl == 2 [aw=weight]
	local obese_3_hs = r(mean)
}


keep $means09 weight
collapse $means09 [aw = weight]
		
* For the incoming cohort simulation, we want proportion of EX SMOKERS, not EVER SMOKER
replace smokev = smokev - smoken

list educ2 educ3 educ4

if "`scr'" == "finish_hs" {
	replace educ2 = 1 - (educ3 + educ4)
	list educ2 educ3 educ4
}

gen educ1 = 1 - (educ2 + educ3 + educ4)
qui sum educ1
local hsless = r(mean)
drop educ1

if "`scr'" == "more_coll" {
	* Decrease high school by amount we will increase college -> amount is the same as the size of hsless population
	replace educ2 = educ2 - `hsless'
	replace educ3 = `hsless' + educ3
	list educ2 educ3 educ4

}

if "`scr'" == "no_obese" {
	* We will only have wtstate = 1 (normal) and 2 (overwt)
	* Scale up wtstates 1 and 2
	replace overwt = overwt/(1-obese_1-obese_2-obese_3)
	replace obese_1 = 0
	replace obese_2 = 0
	replace obese_3 = 0
}

if "`scr'" == "hs_wtstate" {
	* This scenario will set the BMI distribution for everyone to be that of those who finished high school
	replace overwt = `overwt_hs'
	replace obese_1 = `obese_1_hs'
	replace obese_2 = `obese_2_hs'
	replace obese_3 = `obese_3_hs'
}

* Just for testing
* replace numbiokids2 = .2
* replace numbiokids3 = .2
* replace numbiokids4 = .2
* replace numbiokids5 = .2
		
mkmat $means09, mat(means09)
matrix colnames means09 = phibpe pwtstate2 pwtstate3 pwtstate4 pwtstate5 psmkstat2 psmkstat3 peduclvl2 peduclvl3 peduclvl4 ppartnered ppartnertype pinlaborforce pnumbiokids2 pnumbiokids3 pnumbiokids4 pnumbiokids5
matrix rownames means09 = yr2009

matlist means09

save $outdata/psid_starting_means_`scr'.dta, replace


	**********************************************
	* Incoming cohort model predicts categorical BMI
	* Get centiles from 25-30 2009 data and use this to convert categorical to continous
	* Creates a dataset `bmi_centiles' in long format with the following variables:
	* 	centile - 0 to 100 indicating the centile
	*		wtstate - the weight category for the centile, 1 to 5
	* 	bmi			- the bmi for that centile and weight state category
	* The dataset is sorted by centile wtstate
	**********************************************
	
	use "$outdata/age2530_psid2009.dta", clear
	local nwtstate_cats = wordcount("$wtstate_cat")+1
	matrix bmi_centiles = J(101, `nwtstate_cats'+1, .)
	
	quietly{
		forvalues i = 1/`nwtstate_cats'{
			forvalues j = 0/100{
				centile bmi if wtstate == `i', centile(`j')
				matrix bmi_centiles[`j'+1, `i'+1] = r(c_1)
				matrix bmi_centiles[`j'+1, 1] = `j'
			}
		}
	}
	local colnames centile
	forvalues i = 1/`nwtstate_cats'{
		local colnames `colnames' bmi`i'
	}
	matrix colnames bmi_centiles = `colnames'
	matlist bmi_centiles
	clear
	svmat bmi_centiles, names(col)
	reshape long bmi, i(centile) j(wtstate)
	
	* Make sure each bmi value is unique
	bys wtstate bmi (centile): gen counter = _n
	replace bmi = bmi + counter*.00001 if counter >1
	drop counter
	sort centile wtstate
	
	tempfile bmi_centiles
	save `bmi_centiles', replace
	* Just to see what we are doing
	save $outdata/bmi_centiles.dta, replace


* Import population trends, make a matrix
use $outdata/pop2526_projection_2081.dta, clear

qui sum year
local begyr = r(min)
local maxyr = r(max)

sort year male hispan black, stable
gen subgrp = 2^2 * male + 2^1 * hispan + black
drop male hispan black
reshape wide pop, i(year) j(subgrp)
mkmat year pop*, mat(poptrend)
	
* Row names
global rowname ""
forvalues i = `begyr'(2)`maxyr'{
	global rowname $rowname yr`i'
}
matrix rownames poptrend = $rowname
matlist poptrend


* Bring in the health/demographic trends, make a matrix 
use $outdata/psid_trend_default.dta, replace
rename pworking pwork

* Place holder until we develop trends
gen pinlaborforce = 1
gen pworkstat_alt2 = 1
gen pworkstat_alt3 = 1

keep year pwtstate2 pwtstate3 pwtstate4 pwtstate5 psmkstat2 psmkstat3 peduc2 peduc3 peduc4 psingle pcohab pmarried phibpe ppartnered ppartnertype pinlaborforce pworkstat_alt2 pworkstat_alt3 pkids1 pkids2 pkids3 pkids4 pkids5

rename pkids1 pnumbiokids1
rename pkids2 pnumbiokids2
rename pkids3 pnumbiokids3
rename pkids4 pnumbiokids4
rename pkids5 pnumbiokids5


if "`scr'" == "notrend"{
	* Shut down the  trends in everything
	foreach x in pwtstate2 pwtstate3 pwtstate4 pwtstate5 psmkstat2 psmkstat3 peduc2 peduc3 peduc4 psingle pcohab pmarried phibpe ppartnered ppartnertype pinlaborforce pworkstat_alt2 pworkstat_alt3 pnumbiokids1 pnumbiokids2 pnumbiokids3 pnumbiokids4 pnumbiokids5 {
	replace `x' = 1
	}
}

if "`scr'" == "finish_hs"{
	* Shut down the  trends in education
	foreach x in peduc2 peduc3 peduc4 {
	replace `x' = 1
	}
}

if "`scr'" == "more_coll"{
	* Shut down the  trends in education
	foreach x in peduc2 peduc3 peduc4 {
	replace `x' = 1
	}
}

sum peduc2 peduc3 peduc4


forvalues i = 2/4 {
	rename peduc`i' peduclvl`i'
}

* Turn off education trends
/*
forvalues i = 2/4 {
	replace peduclvl`i' = 1
}
*/

qui sum year
local begyr = r(min)
local maxyr = r(max)

* keep odd years
keep if mod(year,2) > 0

mkmat * , mat(hlthtrend)
 
* Row names
global rowname ""
forvalues i = `begyr' (2) `maxyr'{
		global rowname $rowname yr`i'
} 

matrix rownames hlthtrend = $rowname
matlist hlthtrend	
save $outdata/hlthtrend.dta, replace


* Bring in the VC-matrix ("Omega")
drop _all
use "$outdata/psid_incoming_vcmatrix.dta"
drop _rowname

mkmat *, mat(omega)
local d = colsof(omega)
forvalues i = 1/`d'{
	local k = `i'+1
	forvalues j = `k'/`d'{
		matrix omega[`i',`j'] = omega[`j',`i']
	}
}
local colname: colnames omega
noi dis "`colname'"
matrix rownames omega = `colname'
local num = rowsof(omega)

matlist omega
	
* Different V-C matrices with each of the outcome as the first row

foreach x in `colname' {
	takestring_old, oldlist("`colname'") newname("addlist") extlist("`x'")
	global vlist `x' $addlist
	matrix vc_`x' = J(`num',`num',.)
	matrix colnames vc_`x' = $vlist
	matrix rownames vc_`x' = $vlist
	foreach y in $vlist {
		foreach z in $vlist {
			matrix vc_`x'[rownumb(vc_`x',"`y'"), colnumb(vc_`x', "`z'")] =  omega[rownumb(omega,"`y'"), colnumb(omega,"`z'")]
		}
	}
}
	
* Cholesky decomposition
matrix L_omega = cholesky(omega)
foreach x in `colname'{
	matrix L_`x' = cholesky(vc_`x')
}


* Bring in the models ("mean estimation matrix")
drop _all
use "$outdata/psid_incoming_means.dta"
* gen order = _n
drop _rowname
	
mkmat *, mat(meanest)
matrix rownames meanest = `colname'
mat list meanest


* A list for random draws
global drawlist ""
foreach x in `colname' {
  global drawlist $drawlist d_`x'
}

* Bring in the cut points for ordered probit models
drop _all
 	
* Read in the file with the cut points for the ordered probit models, make a matrix for later use 
use "$outdata/psid_incoming_cut_points.dta"
mkmat cut_1 cut_2 cut_3 cut_4 cut_5, mat(cutpoints) rownames(_rowname)
matlist cutpoints 


* Bring in the parameters (theta, omega, ssr) for assigning earnings 
	drop _all	
 	use "$outdata/iearnx_TOS.dta"
 	
	drop _rowname
 	
 	mkmat *, mat(iearnx_params)
	matlist iearnx_params


* Use the reweighted 2009 data
use "$outdata/psid_all2009_pop_adjusted_2526", replace

* This deals with variables that are not yet cleaned or are missing in PSID
do kludge.do

forvalues x = 1/4 {
	replace educ`x' = 0 if missing(educ`x')
}

count


gen subgrp = 2^2 * male + 2^1 * hispan + black

* Simulate the new 25-26 year olds.

gen weight_psid_09 = weight

* Person ID
sort hhid hhidpn, stable
gen hhidpn_cnt = _n 

gen double hhid_hold = hhid
gen double hhidpn_hold = hhidpn

foreach v of varlist hhid_hold hhidpn_hold {
    qui sum `v'
    local max = r(max)
    local max = round(log10(`max')) + 2
    format %`max'.0g `v'
  }

* Actual respondent birth year
gen rbyr_actual = rbyr


* Set cutoffs for ordered outcomes
	scalar educlvl_cut1 =     cutpoints[rownumb(cutpoints,"educlvl"),colnumb(cutpoints,"cut_1")]	
	scalar educlvl_cut2 =     cutpoints[rownumb(cutpoints,"educlvl"),colnumb(cutpoints,"cut_2")]	
	scalar educlvl_cut3 =     cutpoints[rownumb(cutpoints,"educlvl"),colnumb(cutpoints,"cut_3")]	
*	scalar educlvl_cut4 =     cutpoints[rownumb(cutpoints,"educlvl"),colnumb(cutpoints,"cut_4")]	
*	scalar educlvl_cut5 =     cutpoints[rownumb(cutpoints,"educlvl"),colnumb(cutpoints,"cut_5")]

	scalar wtstate_cut1 =     cutpoints[rownumb(cutpoints,"wtstate"),colnumb(cutpoints,"cut_1")]	        
	scalar wtstate_cut2 =     cutpoints[rownumb(cutpoints,"wtstate"),colnumb(cutpoints,"cut_2")]		
	scalar wtstate_cut3 =     cutpoints[rownumb(cutpoints,"wtstate"),colnumb(cutpoints,"cut_3")]	        
	scalar wtstate_cut4 =     cutpoints[rownumb(cutpoints,"wtstate"),colnumb(cutpoints,"cut_4")]		

	scalar smkstat_cut1 =     cutpoints[rownumb(cutpoints,"smkstat"),colnumb(cutpoints,"cut_1")]	        
	scalar smkstat_cut2 =     cutpoints[rownumb(cutpoints,"smkstat"),colnumb(cutpoints,"cut_2")]
	
	scalar numbiokids_cut1 =     cutpoints[rownumb(cutpoints,"numbiokids"),colnumb(cutpoints,"cut_1")]	        
	scalar numbiokids_cut2 =     cutpoints[rownumb(cutpoints,"numbiokids"),colnumb(cutpoints,"cut_2")]		
	scalar numbiokids_cut3 =     cutpoints[rownumb(cutpoints,"numbiokids"),colnumb(cutpoints,"cut_3")]	        
	scalar numbiokids_cut4 =     cutpoints[rownumb(cutpoints,"numbiokids"),colnumb(cutpoints,"cut_4")]		
	
*	scalar workstat_alt_cut1 = cutpoints[rownumb(cutpoints,"workstat_alt"),colnumb(cutpoints,"cut_1")]	       
*	scalar workstat_alt_cut2 = cutpoints[rownumb(cutpoints,"workstat_alt"),colnumb(cutpoints,"cut_2")]	       
	
	gen constant = 1
	gen age2526 = 1
	gen male_black = male*black
	gen male_hispan = male*hispan
	
cap rm `outfile'	
	
preserve
forvalues yy = `fy'(2)`ly' {
	restore, preserve
	di "**** Projection year is `yy' ****"
	
/* Generate random draws */ 	
	 cap drop $drawlist
	 drawnorm $drawlist, cov(omega) seed(8675309)	
	 noisily sum d_*


	* Drop variables we will assign
	drop logbmi bmi hatotax iearnx 
	
	
	* Return weight to 2009 value since weighting is used in the cut point adjustment
	sum weight
	replace weight = weight_psid_09
	sum weight


	* Predicted prevalence
	local j = 1
	foreach x in `colname' {
		matrix betamat = meanest[`j',1...]
		cap drop `x'_xb
		
		matrix score `x'_xb = betamat
		local j = `j' + 1
		matrix mat_`x' = betamat
		di "Initial assignment - used in education in `yy'"
		mat list mat_`x'
		sum `x'_xb
	}

	 
	/* Determine the deviations */
	foreach x in $ordered1 $ordered2  {
			new_deviation, vname("`x'") vtype("o") cyr(`yy') data("psid")
			sum `x'_delta
	}
	foreach x in $bin {
			new_deviation, vname("`x'") vtype("b") cyr(`yy') data("psid")
	}
	foreach x in $cont $iht {
			new_deviation, vname("`x'") vtype("c") cyr(`yy') data("psid")			 		
	}
	
	
	/* Assign education first, as this is used in assigning other conditions */
		if $indirect_adjust == 0 { 					
		foreach x in $ordered1 {
			cap drop inc`x'
			qui gen inc`x' = 0
			foreach y in $bin $ordered {
					qui replace inc`x' = inc`x' + L_`y'[rownumb(L_`y',"`x'"), colnumb(L_`y', "`y'")] * `y'_delta
			}
			cap drop `x'_xb_new
			qui gen `x'_xb_new = `x'_xb + inc`x'
			sum inc`x'
			sum `x'_xb_new
		}
	}
	
	foreach v in educlvl {
		local ncuts = wordcount("$`v'_cat")
		local ncats = `ncuts'+1
		
		local p1 = 1
		forvalues probs = 2/`ncats' {
			local x   p`v'`probs'
			local p`probs' = hlthtrend[rownumb(hlthtrend,"yr`yy'"), colnumb(hlthtrend, "p`v'`probs'")]*means09[rownumb(means09, "yr2009"), colnumb(means09, "`x'")]
			local p1 = `p1' - `p`probs''
		}
		di "`v' in `yy'"
		di "p1 = `p1'"
		di "p2 = `p2'"
		di "p3 = `p3'"
		di "p4 = `p4'"
		
		if "`scr'" == "finish_hs" {
				local p1 = 0
		}			
		
		di "p1 = `p1'"
		di "p2 = `p2'"
		di "p3 = `p3'"
		di "p4 = `p4'"
						
		assert `p1' >= 0
		assert `p1' <= 1
		
		* Adjust the cuts
	 	forvalues c = 1/`ncuts' {
			local c_prev = `c' - 1
			if `c' == 1 {
		    calc_cut_point `v'_xb_new,  prob(`p1') prev_cut(-9999)
			}
			else {
			  local prev_cut = `v'_cut`c_prev'
			  calc_cut_point `v'_xb_new,  prob(`p`c'') prev_cut(`prev_cut')
			}
			matrix x = e(b)
			scalar `v'_cut`c' = x[1,1]
			
			noi di "Cut `v'_cut`c' = " `v'_cut`c'
		}
		forvalues c = 1/`ncats' {
			di "`v'_`c' = `p`c''"
		}
		sum `v'_xb_new
	}
	
	
	foreach x in $ordered1{
		if "`x'" == "rdb_na_c"{
			local numcut = 4
		}
		else if "`x'" == "educlvl" {
			local numcut = 4
		}
		else if "`x'" == "wtstate"{
			local numcut = 5
		}
		else if "`x'" == "numbiokids"{
			local numcut = 5
		}
		else{
			local numcut = 3
		}
			
		forvalues j = 1/`numcut'{
			if `j' < `numcut'{
				gen p`x'`j' = normal(`x'_cut`j' - `x'_xb_new)
			}
			else if `j' == `numcut'{
				gen p`x'`j' = 1
			}
		di "`x' `j' ="
		sum p`x'*
		}
	}	
	* Ordered outcomes
	foreach x in $ordered1 {
		cap drop `x'
		gen `x' = .
		if "`x'" == "rdb_na_c" {
			local num_cut = 4
			}
		else if "`x'" == "educlvl" {
			local num_cut = 4
		}
		else if "`x'" == "wtstate"{
			local num_cut = 5
		}
		else if "`x'" == "numbiokids"{
			local num_cut = 5
		}
		else{
			local num_cut = 3
			}
		forvalues i = `num_cut'(-1)1 {
			replace `x' = `i' if normal(d_`x') <= p`x'`i'
			drop p`x'`i'
			}
		di "assigning ordered based on draw"
		noi tab `x'
	}	
		/* Dummies for ordered outcomes */	
	foreach x in $ordered1 {
		if "`x'" == "rdb_na_c" {
			local num_cut = 4
		}
		else if "`x'" == "educlvl" {
			local num_cut = 4
		}
		else if "`x'" == "wtstate"{
			local num_cut = 5
		}
		else if "`x'" == "numbiokids"{
			local num_cut = 5
		}
		else{
			local num_cut = 3
		}
		forvalues j = 2/`num_cut'{
			local ovar = "`x'_cat"
			local v = word("$`ovar'", `j'-1)
			cap drop `v'
			gen `v' = `x' == `j'
			noi sum `v'
		}
	}
	
	tab educlvl, m

	
	tab educlvl, m
	
	forvalues x = 1/4 {
		cap drop educ`x'
		gen educ`x' = (educlvl == `x')
	}
	sum educ1-educ4
	
	* Recalculate predicted prevalence with new education variables
	local j = 1
	foreach x in `colname' {
		matrix betamat = meanest[`j',1...]
		cap drop `x'_xb
		
		matrix score `x'_xb = betamat
		local j = `j' + 1
		matrix mat_`x' = betamat
		di "Second assignment - used for all other binary and ordered in `yy'"
		mat list mat_`x'
		sum `x'_xb
	}
	
	/*
	if "`scr'" == "hs_wtstate" {
		keep if educlvl == 1
	}
	*/
	
	if $indirect_adjust == 0 { 					
		foreach x in $bin $ordered2 {
			cap drop inc`x'
			qui gen inc`x' = 0
			foreach y in $bin $ordered {
					qui replace inc`x' = inc`x' + L_`y'[rownumb(L_`y',"`x'"), colnumb(L_`y', "`y'")] * `y'_delta
			}
			cap drop `x'_xb_new
			qui gen `x'_xb_new = `x'_xb + inc`x'
			sum inc`x'
			sum `x'_xb_new
		}
		foreach x in $cont $iht{
			cap drop inc`x'
			qui gen inc`x' = 0
			qui replace inc`x' = inc`x' + L_`x'[rownumb(L_`x',"`x'"), colnumb(L_`x', "`x'")] * `x'_delta
			cap drop `x'_xb_new
			qui gen `x'_xb_new = `x'_xb + inc`x'
		}							

	}
	
	/* Predicted probabilities */ 			

	* Determine the probabilities
	* Binary outcomes
	foreach x in $bin{
		cap drop p`x'
		gen p`x' = normal(-`x'_xb_new)
	}
	
		/* Determine the status */ 	 
	* Binary outcomes	
	foreach x in $bin {
		cap drop `x'
		gen `x' = normal(d_`x') >= p`x'
		drop p`x'
	}						

	**** For Weight and Smoking, we get new values by adjusting the cutoffs to match desired probabilities 

	foreach v in $ordered2 {
		
		if `v' == wtstate {
			local censor_var
		} 
		else if `v' == smkstat {
			local censor_var
		} 
		else if `v' == workstat_alt {
			local censor_var 
		}
		else if `v' == numbiokids {
			local censor_var
		}
		
		local ncuts = wordcount("$`v'_cat")
		local ncats = `ncuts'+1
		
		local p1 = 1
		forvalues probs = 2/`ncats' {
			local x   p`v'`probs'
			local p`probs' = hlthtrend[rownumb(hlthtrend,"yr`yy'"), colnumb(hlthtrend, "p`v'`probs'")]*means09[rownumb(means09, "yr2009"), colnumb(means09, "`x'")]
			local p1 = `p1' - `p`probs''
		}
		di "`v' in `yy'"
		di "p1 = `p1'"
		di "p2 = `p2'"
		assert `p1' >= 0
		assert `p1' <= 1
		
		* Adjust the cuts
		 forvalues c = 1/`ncuts' {
				local c_prev = `c' - 1
				if `c' == 1 {
			    calc_cut_point `v'_xb_new `censor_var',  prob(`p1') prev_cut(-9999)
				}
				else {
				  local prev_cut = `v'_cut`c_prev'
				  calc_cut_point `v'_xb_new `censor_var',  prob(`p`c'') prev_cut(`prev_cut')
				}
				matrix x = e(b)
				scalar `v'_cut`c' = x[1,1]
			
				noi di "Cut `v'_cut`c' = " `v'_cut`c'
			}
		forvalues c = 1/`ncats' {
			di "`v'_`c' = `p`c''"
		}
	}
	
	* Ordered outcomes
	foreach x in $ordered2{
		if "`x'" == "rdb_na_c"{
			local numcut = 4
		}
		else if "`x'" == "educlvl" {
			local numcut = 4
		}
		else if "`x'" == "wtstate"{
			local numcut = 5
		}
		else if "`x'" == "numbiokids"{
			local numcut = 5
		}
		else{
			local numcut = 3
		}
			
		forvalues j = 1/`numcut'{
			if `j' < `numcut'{
				gen p`x'`j' = normal(`x'_cut`j' - `x'_xb_new)
			}
			else if `j' == `numcut'{
				gen p`x'`j' = 1
			}
		di "`x' `j' ="
		sum p`x'*
		}
	}	


	/* Determine the status */ 	
	* Ordered outcomes
	foreach x in $ordered2 {
		cap drop `x'
		gen `x' = .
		if "`x'" == "rdb_na_c" {
			local num_cut = 4
			}
		else if "`x'" == "educlvl" {
			local num_cut = 4
		}
		else if "`x'" == "wtstate"{
			local num_cut = 5
		}
		else if "`x'" == "numbiokids"{
			local num_cut = 5
		}
		else{
			local num_cut = 3
			}
		forvalues i = `num_cut'(-1)1 {
			replace `x' = `i' if normal(d_`x') <= p`x'`i'
			* for testing purposes
			* gen p`x'`i'test = p`x'`i'
			drop p`x'`i'
			}
		di "assigning ordered based on draw"
		noi tab `x'
	}	

	* Continuous
	foreach x in $cont {
		cap drop `x'
		* Removing draw term for the $cont variables:   
		gen `x' = `x'_xb_new + d_`x'
	}

	
	/* Dummies for ordered outcomes */	
	foreach x in $ordered2 {
		if "`x'" == "rdb_na_c" {
			local num_cut = 4
		}
		else if "`x'" == "educlvl" {
			local num_cut = 4
		}
		else if "`x'" == "wtstate"{
			local num_cut = 5
		}
		else if "`x'" == "numbiokids"{
			local num_cut = 5
		}
		else{
			local num_cut = 3
		}
		forvalues j = 2/`num_cut'{
			local ovar = "`x'_cat"
			local v = word("$`ovar'", `j'-1)
			cap drop `v'
			gen `v' = `x' == `j'
			noi sum `v'
		}
	}
	
	tab wtstate, m
	tab educlvl, m
	tab smkstat, m
	tab numbiokids, m
*	tab workstat_alt, m
	
	tab inlaborforce, m
	
	cap drop hsless
	cap drop college
	gen hsless = (educlvl == 1)
	gen college = (educlvl >= 3)
	
	forvalues x = 1/4 {
		gen male_educ`x' = male*educ`x'
	}
	
	 /* Make sure single/cohab/married is logical */
	 replace mstat_new = 1 if (partnered == 0)
	 replace mstat_new = 2 if (partnertype == 0 & partnered == 1)
	 replace mstat_new = 3 if (partnertype == 1 & partnered == 1)
	 
	 cap drop single
	 cap drop cohab
	 cap drop married
	 gen single = mstat_new == 1
	 gen cohab = mstat_new == 2
	 gen married = mstat_new == 3
	 
	 cap drop male_single
	 cap drop male_cohab
	 cap drop male_married
	 gen male_single = male*single
	 gen male_cohab = male*cohab
	 gen male_married = male*married
	 
	 /* Current smokers are ever smokers */
	 replace smokev = 1 if smoken == 1
	 
	 /* Clean up numbiokids - assigned at n+1 levels */
	 replace numbiokids = numbiokids - 1
	 * Set lag to current
	 replace l2numbiokids = numbiokids
	 
	 
	 /* Make sure workstat_alt is consistent with censoring variable*/
*	 replace workstat_alt = 0 if inlaborforce == 0
*	 replace workstat_alt1 = (workstat_alt == 1)
*	 replace workstat_alt2 = (workstat_alt == 2)
*	 replace workstat_alt3 = (workstat_alt == 3)
*	 label define workstat_alt 0 "out of labor force" 1 "unemployed" 2 "part-time" 3 "full-time"
*	 label values workstat_alt workstat_alt 
	
	* BMI variables used in wealth and earnings assignment
	cap drop overwt 
	cap drop obese1 
	cap drop obese2p
	gen overwt = (wtstate == 2)
	gen obese1 = (wtstate == 3)
	gen obese2p = (wtstate == 4 | wtstate == 5)
	
	***** Populate earnings and wealth using models from incoming_separate_estimation.do  *****
	
	* need some additional draws - uncorrelated for now
	set seed 90210
	local drawlist2 d_hatota_cat d_iearn_cat d_labor_cat d_inscat
	foreach var of local drawlist2 {
		gen `var' = runiform()
	}
	
	local drawlist3 d_hatota_neg d_hatota_pos d_iearn_pos
	foreach var of local drawlist3 {
		drawnorm `var' 
	}
		
	*** Wealth ***
	* Predict category (negative wealth, zero wealth, positive wealth)
	est use "`ster'/hatota_cat.ster"
	predict p_hatota_cat1 p_hatota_cat2 p_hatota_cat3
	
	* Assign the category based on a random draw
	gen hatota_cat = .
	replace hatota_cat = 1 if d_hatota_cat < p_hatota_cat1  
	replace hatota_cat = 2 if d_hatota_cat >= p_hatota_cat1 & d_hatota_cat < p_hatota_cat1 + p_hatota_cat2 
	replace hatota_cat = 3 if d_hatota_cat >= p_hatota_cat1 + p_hatota_cat2
	
	tab hatota_cat
	
	* Predict negative wealth for indivduals in that category 
	est use "`ster'/lnhatotax_neg.ster"
	predict lnhatota_neg if hatota_cat == 1
	replace lnhatota_neg = lnhatota_neg + d_hatota_neg*e(rmse)

	* Predict positive wealth for individuals in that category 		
	est use "`ster'/lnhatotax_pos.ster"
	predict lnhatota_pos if hatota_cat == 3
	replace lnhatota_pos = lnhatota_pos + d_hatota_pos*e(rmse)
	
	* Populate the variable
	gen hatotax = .
	replace hatotax = -exp(-lnhatota_neg) if hatota_cat == 1
	replace hatotax = 0 if hatota_cat == 2
	replace hatotax = exp(lnhatota_pos) if hatota_cat == 3
	* Cap hatotax at $2M
	replace hatotax = min(2000,hatotax) if !missing(hatotax)
	
	replace hatota = hatotax
	
	*** Earnings ***
	est use "`ster'/iearn_cat.ster"
	predict p_iearn_cat 
	sum p_iearn_cat

	* Asssign the category based on a random draw
	gen iearn_cat = .
	replace iearn_cat = 1 if d_iearn_cat > p_iearn_cat 
	replace iearn_cat = 2 if d_iearn_cat <= p_iearn_cat   
	
	tab iearn_cat
	
	* IHT
	
	**Instead of taking logs, take log(y+sqrt(y^2+1)) - gh(y,1,0)
	cap drop iearnx_xb_new
	est use "`ster'/iearnx.ster"
	predict iearnx_xb_new
	
	* Draw term scaled by sqrt(ssr) of ghreg estimation) 
	cap drop _ahg_temp
	gen _ahg_temp = iearnx_xb_new + d_iearn_pos*sqrt(iearnx_params[1,colnumb(iearnx_params, "ssr")])
		
	cap drop iearnx_ihs
	local th = iearnx_params[1,colnumb(iearnx_params, "theta")]
	local om = iearnx_params[1,colnumb(iearnx_params, "omega")]
	egen _iearnx = invgh(_ahg_temp), theta(`th') omega(`om')
	replace _iearnx = 0 if _iearnx < 0 | _iearnx == .
	replace _iearnx = 0 if iearn_cat == 1
		
	replace _iearnx = min(_iearnx,200) if !missing(_iearnx)
	rename _iearnx iearnx
	replace iearn = iearnx
	

	* Unemployed, working part-time, working full-time if inlaborforce == 1
	est use "`ster'/laborcat.ster"
	predict p_labor_cat1 p_labor_cat2 p_labor_cat3
	* Assign the category based on a random draw
	gen labor_cat = .
	replace labor_cat = 1 if d_labor_cat < p_labor_cat1  
	replace labor_cat = 2 if d_labor_cat >= p_labor_cat1 & d_labor_cat < p_labor_cat1 + p_labor_cat2 
	replace labor_cat = 3 if d_labor_cat >= p_labor_cat1 + p_labor_cat2
	
	drop workcat
	gen workcat = .
	replace workcat = 1 if !(inlaborforce)
	replace workcat = 2 if inlaborforce & labor_cat == 1
	replace workcat = 3 if inlaborforce & labor_cat == 2
	replace workcat = 4 if inlaborforce & labor_cat == 3
		
		
	* Health insurance categorical variable (none, public only, any private)
	est use "`ster'/inscat.ster"
	predict p_inscat1 p_inscat2 p_inscat3
	* Assign the category based on the random draw	
	drop inscat
	gen inscat = .
	replace inscat = 1 if d_inscat < p_inscat1  
	replace inscat = 2 if d_inscat >= p_inscat1 & d_inscat < p_inscat1 + p_inscat2 
	replace inscat = 3 if d_inscat >= p_inscat1 + p_inscat2
		
	drop hatota_cat lnhatota_neg lnhatota_pos
	drop iearn_cat 
	drop `drawlist2' `drawlist3'
	drop p_hatota_cat1 p_hatota_cat2 p_hatota_cat3
	drop p_iearn_cat 	
	drop p_inscat1 p_inscat2 p_inscat3
		
		
	* Need to reweight individuals
	
	/* Adjust the population weight to reflect population size and structual change */
	levelsof subgrp, local(subgrplv)
	
	di "`subgrplv'"
	
	dis "********** current year is: `yy' *************"
	gen weight`yy' = .
	foreach l of local subgrplv {
	  local popratio = poptrend[rownumb(poptrend, "yr`yy'"), colnumb(poptrend,"pop`l'")]/poptrend[rownumb(poptrend, "yr2009"), colnumb(poptrend,"pop`l'")]
		replace weight`yy' = weight_psid_09 * `popratio' if subgrp == `l'
		di "`popratio'"
	}
	 
	* gen weight_`yy'_09 = weight	 
	tabstat weight, stat(sum)
	drop weight
	ren weight`yy' weight
	
	di "`yy'"
	tabstat weight, stat(sum)
	
	
	replace year = `yy'
	
	replace rbyr = rbyr_actual + `yy' - 2009
	
	codebook hhid_hold
	
  tostring hhidpn_hold, gen(hhidpnstr) usedisplayformat
  drop hhid hhidpn
  gen str hhidpn = "-`yy'" + hhidpnstr
  destring hhidpn, replace
	drop hhidpnstr 
	* Set hhid to be hhidpn
	gen double hhid = hhidpn
	
	* Assign hhidpn of head as hhid to both head and wife/"wife"
*	bys year hhid (relhd): gen long hhid_new = hhidpn[1]
*	drop hhid
*	rename hhid_new hhid
	
	
	* Drop iteration-specific variables
	*	drop d_* *_xb*  *_delta inc* constant
	drop  *_xb*  *_delta inc* 
	
	#d ;	
	*** POPULATE INITIAL VALUES AND LAGS WITH CURRENT VALUES ***
	* REMOVING: dcwlthx rdb_ea_c rdb_na_c rssclyr era nra
	* Initial condition only; 
	global zlist2 single shlt smokev anydb rdb_na_2 
		rdb_na_3 rdb_na_4 rdb_ea_2 rdb_ea_3 anydc db_tenure logdcwlthx ;	
	
	* Time-varying covariants and outcomes; 
	* REMOVING: iwstat memrye funcstat age_yrs age_1231 volhours gkcarehrs helphoursyr helphoursyr_sp helphoursyr_nonsp parhelphours ;
	global plist widowed married hearte stroke cancre hibpe diabe lunge anyhi diclaim ssiclaim oasiclaim dbclaim 
	nhmliv wlth_nonzero overwt obese_1 obese_2 obese_3 smoken iadlstat adlstat work retired 
	loghatotax logiearnx hatota hatotax iearnx smkstat wtstate iearnuc 
	age 
	inlaborforce 
	workcat
	educlvl
	mstat_new
	;

	#d cr		
		
	di "What the heck is going on here???"		
	di "plist is $plist"
		
		
	* 	global flist $plist; 


	foreach v of global plist {
		*	di "`v'"
		cap drop l2`v' f`v'
		gen l2`v' = `v'
		gen f`v' = `v'
		local vlb: var label `v'
		label var l2`v' "Lag of `vlb'"
		label var f`v' "Init. of `vlb'"
	}
	
	foreach v of global zlist2 {
		*	di "`v'"
		cap drop f`v'
		gen f`v' = `v'
		local vlb: var label `v'
		label var f`v' "Init.of `vlb'"
	}
	
	
	* Turn the categorical BMI into logbmi using centiles from 2009 data
	* These centiles were created earlier and are stored in the temp dataset `bmi_centiles'
	
	* Do a random draw from 0 - 100 for each person
	gen centile = round(uniform()*100)
	sum centile

	sort centile wtstate
	
	* Merge with the centile dataset for the new BMI values
	merge centile wtstate using `bmi_centiles', _merge(bmi_merge)

	* Resort everything by HHIDPN.
	sort hhidpn
	
	keep if bmi_merge == 3
	
	drop centile bmi_merge
	
	* Generate logbmi
	gen logbmi = log(bmi)
	sum logbmi
	
	di "Year is `yy'"
	tab wtstate
	
	*	desc
	
	gen scr = "`scr'"
	
	gen entry = `yy'
	
	/* Script to get rid of variables that are "present in data, but not in simulation"*/
	quietly include drop_vars.do
	
	* Save new25 files.
	compress
	create_or_append `outfile'  
	* save "$outdata/new25_`yy'_`scr'.dta", replace

}	

	
capture log close