/* Wendy Cheng, Sep-25 2015 Generate NHANES analytic file to estimate the biomarkers and cholesterol-lowering drug use in NHANES Use the data from 1999 to 2014 */ clear set more off include "../../../fem_env.do" global nhanes_dir /sch-data-library/public-data/NHANES #delimit; * NHANES 1999-2000; tempfile nhanes99; use $nhanes_dir/stata/1999-2000/demo, clear; gen weight = wtint2yr; gen mec_weight = wtmec2yr ; gen cohort = 1999; duplicates report seqn; duplicates report seqn; save `nhanes99'; count; * NHANES 2001-2002; tempfile nhanes01; use $nhanes_dir/stata/2001-2002/demo_b, clear; gen weight = wtint2yr; gen mec_weight = wtmec2yr ; gen cohort = 2001; save `nhanes01'; count; * NHANES 2003-2004; tempfile nhanes03; use $nhanes_dir/stata/2003-2004/demo_c, replace; gen weight = wtint2yr; gen mec_weight = wtmec2yr ; gen cohort = 2003; save `nhanes03'; count; * NHANES 2005-2006; tempfile nhanes05; use $nhanes_dir/stata/2005-2006/demo_d, clear; gen weight = wtint2yr; gen mec_weight = wtmec2yr ; gen cohort = 2005; save `nhanes05'; count; * NHANES 2007-2008; tempfile nhanes07; use $nhanes_dir/stata/2007-2008/demo_e, clear; gen weight = wtint2yr; gen mec_weight = wtmec2yr ; gen cohort = 2007; save `nhanes07'; count; * NHANES 2009-2010; tempfile nhanes09; use $nhanes_dir/stata/2009-2010/demo_f, clear; gen weight = wtint2yr; gen mec_weight = wtmec2yr ; gen cohort = 2009; save `nhanes09'; count; * NHANES 2011-2012; tempfile nhanes11; use $nhanes_dir/stata/2011-2012/demo_g, clear; gen weight = WTINT2YR; gen mec_weight = WTMEC2YR ; rename RIDRETH1 ridreth1 ; gen cohort = 2011; save `nhanes11'; count; * NHANES 2013-2014; tempfile nhanes13; use $nhanes_dir/stata/2013-2014/DEMO_H, clear; gen weight = WTINT2YR; gen mec_weight = WTMEC2YR ; rename RIDRETH1 ridreth1 ; gen cohort = 2013; save `nhanes13'; count; *** Create antilipidemic drugs use indicators ; *** Based on Blood Pressure & Cholesterol Questionnaire 1999 to 2012; tempfile bpq; use $nhanes_dir/stata/1999-2000/bpq; append using $nhanes_dir/stata/2001-2002/bpq_b $nhanes_dir/stata/2003-2004/bpq_c $nhanes_dir/stata/2005-2006/bpq_d $nhanes_dir/stata/2007-2008/bpq_e $nhanes_dir/stata/2009-2010/bpq_f $nhanes_dir/stata/2011-2012/BPQ_G $nhanes_dir/stata/2013-2014/BPQ_H; keep seqn BPQ020 BPQ080 BPQ090D BPQ100D BPQ050A; gen SR_rxchol_dr = (BPQ090D ==1); gen SR_rxchol = (BPQ100D ==1); save `bpq'; *** Create antilipidemic drugs and statin use indicators; *** Based on Prescription Drug files 1999 to 2012; *** Statin; *** Ezetimide; *** Other lipid-lowering therapy; tempfile rx rxchol statinrx ezerx ; use $nhanes_dir/stata/1999-2000/rxq_rx, clear; append using $nhanes_dir/stata/2001-2002/rxq_rx_b $nhanes_dir/stata/2003-2004/rxq_rx_c $nhanes_dir/stata/2005-2006/rxq_rx_d $nhanes_dir/stata/2007-2008/rxq_rx_e $nhanes_dir/stata/2009-2010/rxq_rx_f $nhanes_dir/stata/2011-2012/rxq_rx_g; save `rx' ; *** Any antilipidemic / cholesterol lowering drugs; use `rx' ; gen rxchol = (rxddrgid=="d00280") | (rxddrgid=="d00348") | (rxddrgid=="d00746") | (rxddrgid=="d03183") | (rxddrgid=="d04105") | (rxddrgid=="d04140") | (rxddrgid=="d04851") | (rxddrgid=="d07637") | (rxddrgid=="d04787") | (rxddrgid=="d05348") | (rxddrgid=="d07110") | (rxddrgid=="c00019") | (rxddrgid=="d00193") | (rxddrgid=="d00196") | (rxddrgid=="d00245") | (rxddrgid=="d00353") | (rxddrgid=="d00744") | (rxddrgid=="d04286") | (rxddrgid=="d04695") | (rxddrgid=="d07371") | (rxddrgid=="d04824") ; collapse (max) rxchol, by(seqn); save `rxchol', replace; use `rx' ; *** Statins ; preserve ; gen statinrx = (rxddrgid=="d00280") | (rxddrgid=="d00348") | (rxddrgid=="d00746") | (rxddrgid=="d03183") | (rxddrgid=="d04105") | (rxddrgid=="d04140") | (rxddrgid=="d04851") | (rxddrgid=="d07637") | (rxddrgid=="d04787") | (rxddrgid=="d05348") | (rxddrgid=="d07110"); collapse (max) statinrx, by(seqn); save `statinrx', replace; *** EZE ; use `rx' ; gen ezerx = (rxddrgid=="d04824") | (rxddrgid=="d05348"); collapse (max) ezerx, by(seqn); save `ezerx', replace; local vrxchol rxchol statinrx ezerx SR_rxchol_dr SR_rxchol ; ***************************************************; ***** Create the health indicators ; ***************************************************; tempfile health mcq diq; * Medical conditions section; use $nhanes_dir/stata/1999-2000/mcq; append using $nhanes_dir/stata/2001-2002/mcq_b $nhanes_dir/stata/2003-2004/mcq_c $nhanes_dir/stata/2005-2006/mcq_d $nhanes_dir/stata/2007-2008/mcq_e $nhanes_dir/stata/2009-2010/mcq_f $nhanes_dir/stata/2011-2012/mcq_g $nhanes_dir/stata/2013-2014/MCQ_H ; ; save `mcq'; * diabetes section; use $nhanes_dir/stata/1999-2000/diq; append using $nhanes_dir/stata/2001-2002/diq_b $nhanes_dir/stata/2003-2004/diq_c $nhanes_dir/stata/2005-2006/diq_d $nhanes_dir/stata/2007-2008/diq_e $nhanes_dir/stata/2009-2010/diq_f $nhanes_dir/stata/2011-2012/diq_g $nhanes_dir/stata/2013-2014/DIQ_H; save `diq'; * blood pressure was derived from the bpq file, which was saved earlier; * MCQ160B - Cogestive heart failure, MCQ160C - coronary heart disease, MCQ160D - angina, MCQ160E - heart attack; * ASCVD includes coronary heart disease (CHD) (including angina, MI, heart attack), stroke, and peripheral arterial disease; use `mcq'; merge 1:1 seqn using `diq', keep(master match) keepusing(DIQ010) nogen; merge 1:1 seqn using `bpq', keep(master match) keepusing(BPQ020 BPQ050A SR_rxchol_dr SR_rxchol ) nogen; gen cancre = MCQ220==1 & !missing(MCQ220); gen diabe = DIQ010==1 & !missing(DIQ010); gen hearte = (MCQ160B==1 & !missing(MCQ160B)) | (MCQ160C==1 & !missing(MCQ160C)) | (MCQ160D==1 & !missing(MCQ160D)) | (MCQ160E==1 & !missing(MCQ160E)); gen hibpe = BPQ020==1 & !missing(BPQ020); gen lunge = (MCQ160G==1); gen stroke = (MCQ160F==1); gen hearta = MCQ160E==1 & !missing(MCQ160E) ; gen hibperx = (BPQ050A == 1 & hibpe==1) ; save `health'; * Cholesterol Lab Data - LDL, TG, merge with total cholesterol; * Note that the LDL and TG has different sample weight ; tempfile ldl; use $nhanes_dir/stata/1999-2000/lab13am, clear; append using $nhanes_dir/stata/2001-2002/l13am_b $nhanes_dir/stata/2003-2004/l13am_c $nhanes_dir/stata/2005-2006/trigly_d $nhanes_dir/stata/2007-2008/trigly_e $nhanes_dir/stata/2009-2010/trigly_f $nhanes_dir/stata/2011-2012/trigly_g $nhanes_dir/stata/2013-2014/trigly_h; keep seqn lbxtr lbdldl lbdldlsi WTSAF2YR; rename WTSAF2YR ldl_weight ; save `ldl'; * total cholesterol ; tempfile tchol ; use $nhanes_dir/stata/1999-2000/lab13, clear; append using $nhanes_dir/stata/2001-2002/l13_b $nhanes_dir/stata/2003-2004/l13_c $nhanes_dir/stata/2005-2006/tchol_d $nhanes_dir/stata/2007-2008/tchol_e $nhanes_dir/stata/2009-2010/tchol_f $nhanes_dir/stata/2011-2012/tchol_g $nhanes_dir/stata/2013-2014/TCHOL_H; keep seqn lbxtc; save `tchol' ; * HDL ; tempfile hdl_temp hdl ; use $nhanes_dir/stata/1999-2000/lab13, clear; append using $nhanes_dir/stata/2001-2002/l13_b $nhanes_dir/stata/2003-2004/l13_c ; keep seqn lbxhdd ; rename lbxhdd lbdhdd ; save `hdl_temp' ; use $nhanes_dir/stata/2005-2006/hdl_d, clear ; append using $nhanes_dir/stata/2007-2008/hdl_e $nhanes_dir/stata/2009-2010/hdl_f $nhanes_dir/stata/2011-2012/hdl_g $nhanes_dir/stata/2013-2014/HDL_H; keep seqn lbdhdd ; append using `hdl_temp' ; save `hdl' ; * Systolic BP - from Examination Data; tempfile sysbp ; use $nhanes_dir/stata/1999-2000/bpx, clear; append using $nhanes_dir/stata/2001-2002/bpx_b $nhanes_dir/stata/2003-2004/bpx_c $nhanes_dir/stata/2005-2006/bpx_d $nhanes_dir/stata/2007-2008/bpx_e $nhanes_dir/stata/2009-2010/bpx_f $nhanes_dir/stata/2011-2012/bpx_g $nhanes_dir/stata/2013-2014/BPX_H; egen sysbp = rowmean (BPXSY1 BPXSY2 BPXSY3 BPXSY4) ; keep seqn sysbp ; save `sysbp' ; * Glycohemoglobin (HbA1c); tempfile a1c ; use $nhanes_dir/stata/1999-2000/lab10, clear; append using $nhanes_dir/stata/2001-2002/l10_b $nhanes_dir/stata/2003-2004/l10_c $nhanes_dir/stata/2005-2006/ghb_d $nhanes_dir/stata/2007-2008/ghb_e $nhanes_dir/stata/2009-2010/ghb_f $nhanes_dir/stata/2011-2012/ghb_g $nhanes_dir/stata/2013-2014/GHB_H; rename lbxgh a1c ; keep seqn a1c ; save `a1c' ; * C-reactive protein (CRP) ; tempfile crp ; use $nhanes_dir/stata/1999-2000/lab11, clear; append using $nhanes_dir/stata/2001-2002/l11_b $nhanes_dir/stata/2003-2004/l11_c $nhanes_dir/stata/2005-2006/crp_d $nhanes_dir/stata/2007-2008/crp_e $nhanes_dir/stata/2009-2010/crp_f; rename lbxcrp crp ; keep seqn crp ; save `crp' ; * Smoking status ; tempfile smoke ; use $nhanes_dir/stata/1999-2000/smq, clear; append using $nhanes_dir/stata/2001-2002/smq_b $nhanes_dir/stata/2003-2004/smq_c $nhanes_dir/stata/2005-2006/smq_d $nhanes_dir/stata/2007-2008/smq_e $nhanes_dir/stata/2009-2010/smq_f $nhanes_dir/stata/2011-2012/smq_g $nhanes_dir/stata/2013-2014/SMQ_H; gen smoken = (SMQ040 == 1 | SMQ040 == 2) & !missing(SMQ040) ; replace smoken = . if SMQ020 == . ; keep seqn smoke ; save `smoke' ; * BMI; tempfile bmi; use $nhanes_dir/stata/1999-2000/bmx, clear; append using $nhanes_dir/stata/2001-2002/bmx_b $nhanes_dir/stata/2003-2004/bmx_c $nhanes_dir/stata/2005-2006/bmx_d $nhanes_dir/stata/2007-2008/bmx_e $nhanes_dir/stata/2009-2010/bmx_f $nhanes_dir/stata/2011-2012/bmx_g $nhanes_dir/stata/2013-2014/BMX_H; gen bmi = bmxbmi; keep seqn bmi; save `bmi'; local vhealth "cancre diabe hearte hibpe lunge stroke hearta bmi wtstate obese overwt smoke"; local vbiomarker "ldl_weight lbdldl lbdldlsi lbxtr lbxtc lbdhdd sysbp a1c crp" ; ********************************; *** Bring all the files together; ********************************; use `nhanes99'; append using `nhanes01' `nhanes03' `nhanes05' `nhanes07' `nhanes09' `nhanes11' `nhanes13'; drop wtmrep* wtirep*; count ; merge 1:1 seqn using `rxchol', keep(master match) nogen; merge 1:1 seqn using `statinrx', keep(master match) nogen; merge 1:1 seqn using `ezerx', keep(master match) nogen; merge 1:1 seqn using `health', keep(master match) nogen; merge 1:1 seqn using `ldl', keep(master match) nogen; merge 1:1 seqn using `hdl', keep(master match) nogen; merge 1:1 seqn using `tchol', keep(master match) nogen; merge 1:1 seqn using `bmi', keep(master match) nogen; merge 1:1 seqn using `smoke', keep(master match) nogen; merge 1:1 seqn using `sysbp', keep(master match) nogen; merge 1:1 seqn using `a1c', keep(master match) nogen; merge 1:1 seqn using `crp', keep(master match) nogen; tab cohort ; * Recode variables - Corresponds to HRS data; * Male; gen male = (riagendr == 1); * Race/ethnicity; recode ridreth1 (3=1) (4=2) (1/2=3) (5=4) ; gen race = ridreth1 ; gen black = (race == 2); gen white = (race == 3); gen hispan = (race == 1 | race == 2) ; gen other = (race == 4); * Age, topcoded at 84; gen age = ridagemn / 12; replace age = ridageyr if missing(age); gen lage = age - 2; gen age_year = ridageyr; * Education; recode dmdeduc2 (1/2=1) (3=2) (4/5=3) (else=.), gen(educ); gen hsless=educ==1; gen college=educ==3; * Income; gen hhin = indhhinc; replace hhin = indhhin2 if missing(hhin); replace hhin = 11 if indhhin2 == 14 | indhhin2 == 15; replace hhin = . if hhin > 11; tab hhin, missing; gen hhinlt45k = 0; replace hhinlt45k = 1 if hhin<=7; * Marital Status; gen widowed = dmdmartl==2; gen single = inlist(dmdmartl, 3, 4, 5); * Selection criteria; drop if age < 18; * Some interactions; gen male_college = male * college; gen male_hsless = male * hsless; gen male_black = male * black; gen male_hispan = male * hispan; * Obesity; * Obese - bmi >=30 & bmi < . overwt - bmi >= 25 & bmi < 30 normalwt - bmi >= 18.5 & bmi < 25; gen wtstate = . ; replace wtstate = 1 if bmi < 25 & !missing(bmi) ; replace wtstate = 2 if bmi >= 25 & bmi < 30 & !missing(bmi) ; replace wtstate = 3 if bmi >=30 & bmi < . & !missing(bmi) ; label var wtstate "bmi status" ; label define wtlb 1 "1 normal or underwt" 2 "2 overweight" 3 "3 obese", modify ; label values wtstate wtlb ; gen obese = . ; replace obese = 1 if wtstate == 3 ; replace obese = 0 if wtstate == 1 | wtstate ==2 ; gen overwt = . ; replace overwt = 1 if wtstate == 2 ; replace overwt = 0 if wtstate == 1 | wtstate ==3 ; gen logbmi = log(bmi) ; * Treated and Untreated systolic BP - For risk calculation ; gen treat_sysbp = sysbp if hibperx==1 & sysbp!= . ; gen untreat_sysbp = sysbp if hibperx == 0 & sysbp!= . ; local vdemo "age_year age lage male race white black hispan other single widowed educ hsless college male_black male_hispan male_hsless male_college hhin*"; keep seqn cohort `vrxchol' `vhealth' `vbiomarker' `vdemo' hibperx weight mec_weight logbmi overwt obese; order seqn cohort `vrxchol' `vhealth' `vdemo' hibperx weight mec_weight logbmi overwt obese; **gsort cohort - SR_rxchol_dr - SR_rxchol - rxchol - statinrx - ezerx ; gen yr = cohort; count ; tab cohort ; **replace age = floor(age); * Rename biomarker variables lbxtc lbxtr lbdldl lbdldlsi lbdhdd ; rename lbxtc tchol ; rename lbxtr trigly ; rename lbdldl ldl ; rename lbdldlsi ldl_mmol ; rename lbdhdd hdl ; *** Recategorize ldl ; gen ldl70 = (ldl>70) ; replace ldl70 = . if ldl == . ; gen ldl100 = (ldl>100) ; replace ldl100 = . if ldl == . ; gen ASCVD = (hearte==1 | stroke==1 ) ; save $outdata/nhanes_analytic.dta, replace; #d cr