/* Program to develop assignment of SSA variables for 2010 stock population */

include common.do
local bsamp : env BREP
if missing("`bsamp'") {
  local outdir $outdata
}
else {
  local outdir $outdata/input_rep`bsamp'
}

/* Get distributions of AIME and quarters worked */
use $resmodels/ssa_dist_50p.dta, replace
* Move from the 50 categories to tenths of percentiles
expand 20
sort q_grp
gen id = (_n-1)/10
* Set values to be used for imputation at middle of each group
replace q_dist = . if mod(id+1,2)>0
replace aime_dist = . if mod(id+1,2)>0
* Set lowest values
replace q_dist = 0 if id == 0
replace aime_dist = 0 if id == 0
* In case we need a 100th
expand 2 if _n == _N
replace id = 100 if _n == _N
	
ipolate q_dist id, gen(q_imp) epolate
ipolate aime_dist id, gen(aime_imp) epolate

gen aime_pct = id
gen q_pct = id
replace aime_dist = aime_imp
replace q_dist = q_imp
* quarters should be integers
replace q_dist = round(q_dist)
	
keep aime_pct aime_dist q_pct q_dist
tempfile ssa
save `ssa'

*** Separately for retired and not retired ***

foreach status in ret notret {

	************************************************
	* Create matrices for SSA-related joint models *
	************************************************
	drop _all
	use "$resmodels/`status'/ssa_vcmatrix.dta"

	drop _rowname

	mkmat *, mat(ssa_omega)
	local d = colsof(ssa_omega)
	forvalues i = 1/`d'{
		local k = `i'+1
		forvalues j = `k'/`d'{
			matrix ssa_omega[`i',`j'] = ssa_omega[`j',`i']
		}
	}
	local ssa_colname: colnames ssa_omega
	noi dis "`ssa_colname'"
	matrix rownames ssa_omega = `ssa_colname'
	local num = rowsof(ssa_omega)

	matrix list ssa_omega
  
	* Different V-C matrices with each of the outcome as the first row

	foreach x in `ssa_colname' {
		takestring, oldlist("`ssa_colname'") newname("addlist") extlist("`x'")
		global ssavlist `x' $addlist
		matrix ssavc_`x' = J(`num',`num',.)
		matrix colnames ssavc_`x' = $ssavlist
		matrix rownames ssavc_`x' = $ssavlist
		foreach y in $ssavlist {
			foreach z in $ssavlist {
				matrix ssavc_`x'[rownumb(ssavc_`x',"`y'"), colnumb(ssavc_`x', "`z'")] =  ssa_omega[rownumb(ssa_omega,"`y'"), colnumb(ssa_omega,"`z'")]
			}
		}
	}
	
	* Cholesky decomposition
	matrix L_ssaomega = cholesky(ssa_omega)
	foreach x in `ssa_colname'{
		matrix L_ssa`x' = cholesky(ssavc_`x')
	}

	* Models for aime and quarters worked
	drop _all
	use "$resmodels/`status'/ssa_means.dta"
	gen order = _n
	drop _rowname

	sort order, stable
	drop order
	mkmat *, mat(ssa_meanest)
	matrix rownames ssa_meanest = `ssa_colname'
	mat list ssa_meanest


	* Read in the file with the cut points for the SSA ordered probit models, make a matrix for later use 
	drop _all
	use "$resmodels/`status'/ssa_cut_points.dta"
	mkmat cut_*, mat(ssa_cutpoints) rownames(_rowname)

	mat list ssa_cutpoints
	local ssa_cuts = colsof(ssa_cutpoints)

	forvalues x = 1/`ssa_cuts' {
		foreach y in aime_pct q_pct {
			scalar `y'_cut`x' = ssa_cutpoints[rownumb(ssa_cutpoints,"`y'"),colnumb(ssa_cutpoints,"cut_`x'")]
			di `y'_cut`x'
		}
	}


	* New approach for aime and quarters worked - estimated a model on the 2004 50-55 year olds
	*Update for 13?
        
	
	foreach yr in 2010 2016{
	use `outdir'/all`yr'_pop_adjusted.dta, replace
        
	
	if "`status'" == "ret" {
		keep if isret > 0
	}
	else if "`status'" == "notret" {
		keep if isret == 0
	}
	
	count

	
	drawnorm d_aime_pct d_q_pct, cov(ssa_omega) seed(987562)	

	sum d_aime_pct
	sum d_q_pct

	cap gen __cons = 1

	* Interactions in model
	foreach var in black hispan hsless college work {
		gen male_`var' = male*`var'
	} 


	egen agecat = cut(age), at(0,50,55,60,65,70,75,80,85,200)
	
	* Nope - these people are spouses in the simulation, so we need SSA parameters for them.
	* drop if age < 50
	
	forvalues age = 50 (5) 85 {
		gen age`age' = (agecat == `age')	
	}
	
	gen isretx = isret/1000

	* Predicted prevalence
	local j = 1
	foreach x in `ssa_colname' {
		matrix betamat = ssa_meanest[`j',1...]
		cap drop `x'_xb
	
		matrix score `x'_xb = betamat
		local j = `j' + 1
		matrix mat_`x' = betamat
	
		mat list mat_`x'
		di "mean of `x' is:"
		sum `x'_xb
	}

 	drop __cons

	* Ordered outcomes
	local ssa_cats = `ssa_cuts' + 1

	foreach x in aime_pct q_pct {
		forvalues j = 1/`ssa_cats'{
			if `j' < `ssa_cats'{
				gen p`x'`j' = normal(`x'_cut`j' - `x'_xb)
			}
			else if `j' == `ssa_cats'{
				gen p`x'`j' = 1
			}
		}
	}		

	* Ordered outcomes
	foreach x in aime_pct q_pct {
		cap drop `x'
		gen `x' = .
		forvalues i = `ssa_cats'(-1)1 {
			replace `x' = `i' if normal(d_`x') <= p`x'`i'
			drop p`x'`i'
		}
	}	
		
	
	**********************************************
	* Incoming cohort model predicts categorical AIME and quarters worked in categories
	* Convert these categories into continuous values, then assign to values
	* Get 50-55 2004 distribution and use this to convert categorical to continous (we have 1000 points in our distribution)
	**********************************************
	gen ssa_draw1 = runiform()
	gen ssa_draw2 = runiform()

	local adj = 100/`ssa_cats'
	
	replace aime_pct = round(`adj'*aime_pct - `adj'*ssa_draw1,0.1)
	replace q_pct = round(`adj'*q_pct - `adj'*ssa_draw2,0.1)

	* Fill in the AIME values
	merge m:1 aime_pct using `ssa', keepusing(aime_dist) keep(matched) nogen
	* Fill in the quarters worked values
	merge m:1 q_pct using `ssa', keepusing(q_dist) keep(matched) nogen

	rename aime_dist raime_test
	rename q_dist rq_test		
		
	gen raime = raime_test
	gen rq = rq_test
	gen fraime = raime
	gen frq = rq
	
	sum rq, detail
	sum raime, detail
	
	save `outdir'/test_assign_`status'_`yr'.dta, replace
	
	* Assign the individuals with no quarters worked using the probit estimates
	est use "$resmodels/`status'/anyrq.ster"
	matrix anyrq = e(b)
	matrix score panyrq_xb = anyrq
	gen d_anyrq = invnorm(runiform())
	gen anyrq = (panyrq_xb > d_anyrq)
	replace rq = 0 if anyrq == 0
	replace frq = rq
	replace raime = 0 if rq == 0
	replace fraime = raime 
	
	drop aime_pct raime_test q_pct rq_test

	* We are missing some of the values in the AIME and quarters worked merge
	drop if missing(hhidpn)

	keep hhidpn wave raime rq fraime frq
	
	foreach var in raime rq fraime frq {
		rename `var' `var'_`status'
	}
	
	save `outdir'/imputed_ssa_`status'_`yr'.dta, replace

}
}

capture log close