*==========================================================================* * randomly split the stock_hrs_2006 dataset into 5 folds for 5-fold CV * * Yifan Wei * 09/01/2020 *==========================================================================* include "common.do" log using ID_selection_stock_hrs_2006_5fcv.log, replace use "../../../input_data/stock_hrs_2006.dta", clear keep hhidpn count set seed 2020 gen rand = runiform() sort rand gen fold = . replace fold = 1 if rand<=0.2&rand>0 replace fold = 2 if rand<=0.4&rand>0.2 replace fold = 3 if rand<=0.6&rand>0.4 replace fold = 4 if rand<=0.8&rand>0.6 replace fold = 5 if rand<1&rand>0.8 tab fold keep hhidpn fold save "../../../input_data/5fcrossvalidation_w8.dta", replace use "../../../input_data/stock_hrs_2006.dta", clear merge 1:1 hhidpn using "../../../input_data/5fcrossvalidation_w8.dta" keep if fold==1 saveold "../../../input_data/stock_hrs_2006_5fcv_1.dta", version(11) replace use "../../../input_data/stock_hrs_2006.dta", clear merge 1:1 hhidpn using "../../../input_data/5fcrossvalidation_w8.dta" keep if fold==2 saveold "../../../input_data/stock_hrs_2006_5fcv_2.dta", version(11) replace use "../../../input_data/stock_hrs_2006.dta", clear merge 1:1 hhidpn using "../../../input_data/5fcrossvalidation_w8.dta" keep if fold==3 saveold "../../../input_data/stock_hrs_2006_5fcv_3.dta", version(11) replace use "../../../input_data/stock_hrs_2006.dta", clear merge 1:1 hhidpn using "../../../input_data/5fcrossvalidation_w8.dta" keep if fold==4 saveold "../../../input_data/stock_hrs_2006_5fcv_4.dta", version(11) replace use "../../../input_data/stock_hrs_2006.dta", clear merge 1:1 hhidpn using "../../../input_data/5fcrossvalidation_w8.dta" keep if fold==5 saveold "../../../input_data/stock_hrs_2006_5fcv_5.dta", version(11) replace log close