**You will need to define the file path where the IPUMS extracts and MLP crosswalk(s) are saved **If linking IPUMS extracts from non-consecutive census years, download all MLP Crosswalks. **Define the name of the two IPUMS extract Stata files you wish to link to the MLP crosswalk **The first extract should be the first year of the crosswalk (e.g. 1900-1910 crosswalk, extract_1 should be the 1900 data) **Do not include the file extension (.dta) in the extract_1 or extract_2 local macros local path "" local extract_1 "" local extract_2 "" ********************************************************************************************* *****Open the IPUMS extracts, rename variables***** foreach extract in `extract_1' `extract_2' { display "Open `extract' extract, rename variables" clear use "`path'/`extract'.dta" quietly summarize year local year = r(min) rename * *_`year' *****Sort file by HISTID, check for HISTID consistency for 1860/1870***** if `year' == 1860 | `year' == 1870 { sort histid quietly gen dupe = 1 if histid == histid[_n-1] quietly drop if dupe == 1 quietly drop dupe } else { sort histid } quietly save "`path'/`extract'_to_merge.dta", replace } *****Open the first IPUMS extract***** clear quietly use "`path'/`extract_1'_to_merge.dta" *****Assign which crosswalk to merge the IPUMS data to***** quietly summarize year local year_a = r(min) local year_b = `year' *****Determine if MLP Crosswalk exists. If non-sequential years, build non-sequential MLP crosswalk***** capture confirm file "`path'/mlp_`year_a'_`year_b'_v1.0.dta" foreach no_error in 0 { *****MLP Crosswalk exists, move to next step***** if _rc==`no_error' { display "mlp_`year_a'_`year_b'_v1.0.dta exists" } *****MLP Crosswalk does not exist, create new MLP Crosswalk***** else if `year_a' < 1900 & `year_b' >= 1900 { display "Create the Crosswalk file mlp_`year_a'_`year_b'_v1.0.dta" *****Define MLP Crosswalks to combine***** if `year_a' == 1880 { local year_seq = `year_a' + 20 } else { local year_seq = `year_a' + 10 } local year_final_link = `year_b' - 10 *****Open First MLP Crosswalk***** clear quietly use "`path'/mlp_`year_a'_`year_seq'_v1.0.dta" *****Combine MLP Crosswalks together, keep only records that linked through entire time period***** forvalues year_a_temp = `year_seq' (10) `year_final_link' { if `year_a_temp' == 1890 { display "Skip 1890 Link: Data Missing" } else if `year_a_temp' == 1880 { local year_b_temp = `year_a_temp' + 20 display "Link `year_a' MLP cases to `year_b_temp' MLP cases" confirm file "`path'/mlp_`year_a_temp'_`year_b_temp'_v1.0.dta" capture merge 1:1 histid_`year_a_temp' using mlp_`year_a_temp'_`year_b_temp'_v1.0.dta, assert(master using match) keep(match) nogenerate *****Clean up file***** *capture quietly keep *_`year_a' *_`year_b' quietly sort histid_`year_a' *****Save new MLP Crosswalk***** quietly save "`path'/mlp_`year_a'_`year_b'_v1.0.dta", replace } else { local year_b_temp = `year_a_temp' + 10 display "Link `year_a' MLP cases to `year_b_temp' MLP cases" confirm file "`path'/mlp_`year_a_temp'_`year_b_temp'_v1.0.dta" capture merge 1:1 histid_`year_a_temp' using mlp_`year_a_temp'_`year_b_temp'_v1.0.dta, assert(master using match) keep(match) nogenerate *****Save new MLP Crosswalk***** quietly save "`path'/mlp_`year_a'_`year_b'_v1.0.dta", replace } } } else if (`year_a' < 1900 & `year_b' < 1900) | (`year_a' >= 1900 & `year_b' >= 1900) { display "Create the Crosswalk file mlp_`year_a'_`year_b'_v1.0.dta" *****Define MLP Crosswalks to combine***** local year_seq = `year_a' + 10 local year_final_link = `year_b' - 10 *****Open First MLP Crosswalk***** clear quietly use "`path'/mlp_`year_a'_`year_seq'_v1.0.dta" *****Combine MLP Crosswalks together, keep only records that linked through entire time period***** forvalues year_a_temp = `year_seq' (10) `year_final_link' { local year_b_temp = `year_a_temp' + 10 display "Link `year_a' MLP cases to `year_b_temp' MLP cases" confirm file "`path'/mlp_`year_a_temp'_`year_b_temp'_v1.0.dta" capture merge 1:1 histid_`year_a_temp' using mlp_`year_a_temp'_`year_b_temp'_v1.0.dta, assert(master using match) keep(match) nogenerate } *****Save new MLP Crosswalk***** quietly save "`path'/mlp_`year_a'_`year_b'_v1.0.dta", replace } *****Clean up file***** capture quietly keep histid_`year_a' histid_`year_b' quietly sort histid_`year_a' *****Reload first IPUMS extract***** clear quietly use "`path'/`extract_1'_to_merge.dta" } *****Merge the first IPUMS extract to the MLP crosswalk***** quietly joinby histid_`year_a' using "`path'/mlp_`year_a'_`year_b'_v1.0.dta", unmatched(both) _merge(link_`year_a'_mlp) *****Merge the second IPUMS extract to the MLP crosswalk and the first IPUMS extract***** quietly joinby histid_`year_b' using "`path'/`extract_2'_to_merge.dta", unmatched(both) _merge(link_`year_a'_`year_b') *****Create label indicating linked records and diagnostic for select cases that are not linked***** quietly replace link_`year_a'_`year_b' = 4 if link_`year_a'_mlp == 2 & link_`year_a'_`year_b' == 3 quietly replace link_`year_a'_`year_b' = 5 if link_`year_a'_mlp == 3 & link_`year_a'_`year_b' == 1 quietly drop link_`year_a'_mlp quietly drop if year_`year_a' == . & year_`year_b' == . label drop __MERGE label define linktype_lbl 1 "Record only in `year_a'" label define linktype_lbl 2 "Record only in `year_b'", add label define linktype_lbl 3 "Linked record `year_a'-`year_b'", add label define linktype_lbl 4 "Linked record `year_a' not found", add label define linktype_lbl 5 "Linked record `year_b' not found", add label values link_`year_a'_`year_b' linktype_lbl label var link_`year_a'_`year_b' "Link type" *****Save the combined file***** quietly save "`path'/`extract_1'_`year_a'_`year_b'.dta", replace *****Erase Extract to merge files***** erase "`path'/`extract_1'_to_merge.dta" erase "`path'/`extract_2'_to_merge.dta" *****Indicate records that have a linked record, but because of case selection, were not linked to crosswalk***** quietly tab link_`year_a'_`year_b', matcell(links) matrix zero = (0\0) matrix links = (links\zero) display _newline(2) as text %10.0fc links[3,1] " cases (" %3.2f (links[3,1] / (links[3,1] + links[4,1] + links[5,1]))*100 "% of linked records )" _newline as result "in MLP `year_a'-`year_b' crosswalk linked" _newline(2) as error %10.0fc links[4,1] " cases from `year_a' in `extract_1' (" %3.2f (links[4,1] / (links[3,1] + links[4,1] + links[5,1]))*100 "% of linked records )" _newline(1) %10.0fc links[5,1] " cases from `year_b' in `extract_2' (" %3.2f (links[5,1] / (links[3,1] + links[4,1] + links[5,1]))*100 "% of linked records )" _newline(1) as result "in MLP `year_a'-`year_b' crosswalk not linked due to case selection" *********************************************************************************************