/* Goal: BMI imputation for those who are missing height, weight or both across all years. See recode_health.sas for determining weight/height when it's not missing for all years based on known information This file (bmi_imputation.do) gets called by gen_analytic.do. underweight, overweight, obese, and wtstate categories are created there as well (after this imputation) */ * generate age splines for imputation gen age34l = min(34,age) if !missing(age) gen age3544 = min(max(0,age-35),45-35) if !missing(age) gen age4554 = min(max(0,age-45),55-45) if !missing(age) gen age5564 = min(max(0,age-55),65-55) if !missing(age) gen age65p = max(0,age-65) if !missing(age) **** BMI IMPUTATION **** * BMI imputation for three scenarios: missing height, missing weigth or missing both (across all survey years). See recode_health.sas for imputation of weight/height when it's not missing for all years * Create temporary variables: gen bmi_pred_miss_h=. gen bmi_pred_miss_w=. gen bmi_pred_miss_hw=. * Temp variable for subsequent weight imputation following BMI imputation, for those with missing both height and weight gen wght_pred_miss_hw=. * bmi imputation flag: gen bmi_imp=. replace bmi_imp=0 if inrange(age,20,120) & died==0 & inyr==1 replace bmi_imp=1 if inrange(age,20,120) & died==0 & inyr==1 & (missing(totalwght) | missing(avg_totalheight)) * BMI imputation for known weight, missing height forval yr= $firstyr(2)$lastyr { forval gndr= 0/1 { regress bmi totalwght age34l age3544 age4554 age5564 age65p black hispan other fpoor frich diabe hearte hibpe lunge married if male==`gndr' & inrange(age,20,120) & year==`yr' & inrange(bmi,15,50) & died==0 & inyr==1 predict bmi_pred_miss_h_`yr'_male`gndr' if year==`yr' & male==`gndr' & missing(bmi) replace bmi_pred_miss_h = bmi_pred_miss_h_`yr'_male`gndr' if year==`yr' & male==`gndr' & died==0 & inyr==1 } } * BMI imputation for known height, missing weight gen avg_totalheight_inv_sq = 1/(avg_totalheight*avg_totalheight) forval yr= $firstyr(2)$lastyr { forval gndr= 0/1 { regress bmi avg_totalheight_inv_sq age34l age3544 age4554 age5564 age65p black hispan other fpoor frich diabe hearte hibpe lunge married if male==`gndr' & inrange(age,20,120) & year==`yr' & inrange(bmi,15,50) & died==0 & inyr==1 predict bmi_pred_miss_w_`yr'_male`gndr' if year==`yr' & male==`gndr' & missing(bmi) replace bmi_pred_miss_w = bmi_pred_miss_w_`yr'_male`gndr' if year==`yr' & male==`gndr' & died==0 & inyr==1 } } * BMI imputation for missing height and weight forval yr= $firstyr(2)$lastyr { forval gndr= 0/1 { regress bmi age34l age3544 age4554 age5564 age65p black hispan other fpoor frich diabe hearte hibpe lunge married if male==`gndr' & inrange(age,20,120) & year==`yr' & inrange(bmi,15,50) & died==0 & inyr==1 predict bmi_pred_miss_hw_`yr'_male`gndr' if year==`yr' & male==`gndr' & missing(bmi) replace bmi_pred_miss_hw = bmi_pred_miss_hw_`yr'_male`gndr' if year==`yr' & male==`gndr' & died==0 & inyr==1 } } * replace BMI with imputed values replace bmi=bmi_pred_miss_h if missing(bmi) & inrange(age,20,120) & !missing(totalwght) & missing(avg_totalheight) & died==0 & inyr==1 replace bmi=bmi_pred_miss_w if missing(bmi) & inrange(age,20,120) & missing(totalwght) & !missing(avg_totalheight) & died==0 & inyr==1 replace bmi=bmi_pred_miss_hw if missing(bmi) & inrange(age,20,120) & missing(totalwght) & missing(avg_totalheight) & died==0 & inyr==1 * Weight imputation from imputed BMI, if both weight and height are missing forval yr= $firstyr(2)$lastyr { forval gndr= 0/1 { regress totalwght bmi age34l age3544 age4554 age5564 age65p black hispan other fpoor frich diabe hearte hibpe lunge married if male==`gndr' & inrange(age,20,120) & year==`yr' & inrange(bmi,15,50) & died==0 & inyr==1 predict wght_pred_miss_hw_`yr'_male`gndr' if year==`yr' & male==`gndr' & missing(totalwght) replace wght_pred_miss_hw = wght_pred_miss_hw_`yr'_male`gndr' if year==`yr' & male==`gndr' & died==0 & inyr==1 } } * fill in missing height and weight * calculate height from known weight and imputed BMI replace avg_totalheight = sqrt(totalwght/bmi) if missing(avg_totalheight) & inrange(age,20,120) & died==0 & inyr==1 * calculate weight from known height and imputed BMI replace totalwght = bmi*avg_totalheight*avg_totalheight if missing(totalwght) & inrange(age,20,120) & died==0 & inyr==1 * replace weight with imputed weight from imputed BMI replace totalwght = wght_pred_miss_hw if missing(totalwght) & inrange(age,20,120) & died==0 & inyr==1 * calculate height from imputed weight and imputed BMI replace avg_totalheight = sqrt(totalwght/bmi) if missing(avg_totalheight) & inrange(age,20,120) & died==0 & inyr==1 * drop intermediate variables drop age34l age3544 age4554 age5564 age65p bmi_pred* wght_pred* avg_totalheight_inv_sq