**You will need to define the file path where the IPUMS extracts and MLP crosswalk(s) are saved

**If linking IPUMS extracts from non-consecutive census years, download all MLP Crosswalks.

**Define the name of the two IPUMS extract Stata files you wish to link to the MLP crosswalk

**The first extract should be the first year of the crosswalk (e.g. 1900-1910 crosswalk, extract_1 should be the 1900 data)

**Do not include the file extension (.dta) in the extract_1 or extract_2 local macros

local path ""

local extract_1 ""
local extract_2 ""

*********************************************************************************************

*****Open the IPUMS extracts, rename variables*****

foreach extract in `extract_1' `extract_2' {
	display "Open `extract' extract, rename variables"
	clear
	use "`path'/`extract'.dta"

	quietly summarize year

	local year = r(min)

	rename * *_`year'
	
	*****Sort file by HISTID, check for HISTID consistency for 1860/1870*****
	if `year' == 1860 | `year' == 1870 {
		sort histid
		quietly gen dupe = 1 if histid == histid[_n-1]
		quietly drop if dupe == 1
		quietly drop dupe
	}
	else {
		sort histid
	}
	
	quietly save "`path'/`extract'_to_merge.dta", replace
}

*****Open the first IPUMS extract*****

clear
quietly use "`path'/`extract_1'_to_merge.dta"

*****Assign which crosswalk to merge the IPUMS data to*****

quietly summarize year
	
local year_a = r(min)
local year_b = `year'

*****Determine if MLP Crosswalk exists. If non-sequential years, build non-sequential MLP crosswalk*****
capture confirm file "`path'/mlp_`year_a'_`year_b'_v1.0.dta"

foreach no_error in 0 {
	*****MLP Crosswalk exists, move to next step*****
	if _rc==`no_error' {
	 	 display "mlp_`year_a'_`year_b'_v1.0.dta exists"
	}
	*****MLP Crosswalk does not exist, create new MLP Crosswalk*****
	else if `year_a' < 1900 & `year_b' >= 1900 {
		display "Create the Crosswalk file mlp_`year_a'_`year_b'_v1.0.dta"
			
		*****Define MLP Crosswalks to combine*****
		if `year_a' == 1880 {
			local year_seq = `year_a' + 20
		}
		else {
			local year_seq = `year_a' + 10
		}
		
		local year_final_link = `year_b' - 10
	
		*****Open First MLP Crosswalk*****
		clear
		quietly use "`path'/mlp_`year_a'_`year_seq'_v1.0.dta"
	
		*****Combine MLP Crosswalks together, keep only records that linked through entire time period*****		
		forvalues year_a_temp = `year_seq' (10) `year_final_link' {
			if `year_a_temp' == 1890 {
				display "Skip 1890 Link: Data Missing"
			}
			else if `year_a_temp' == 1880 {
				local year_b_temp = `year_a_temp' + 20
			
				display "Link `year_a' MLP cases to `year_b_temp' MLP cases"
				confirm file "`path'/mlp_`year_a_temp'_`year_b_temp'_v1.0.dta"
				capture merge 1:1 histid_`year_a_temp' using mlp_`year_a_temp'_`year_b_temp'_v1.0.dta, assert(master using match) keep(match) nogenerate						
				
				*****Clean up file*****
				*capture quietly keep *_`year_a' *_`year_b'
				quietly sort histid_`year_a'
					
				*****Save new MLP Crosswalk*****
				quietly save "`path'/mlp_`year_a'_`year_b'_v1.0.dta", replace
			}
			else {
				local year_b_temp = `year_a_temp' + 10
			
				display "Link `year_a' MLP cases to `year_b_temp' MLP cases"
				confirm file "`path'/mlp_`year_a_temp'_`year_b_temp'_v1.0.dta"
				capture merge 1:1 histid_`year_a_temp' using mlp_`year_a_temp'_`year_b_temp'_v1.0.dta, assert(master using match) keep(match) nogenerate

				*****Save new MLP Crosswalk*****
				quietly save "`path'/mlp_`year_a'_`year_b'_v1.0.dta", replace
			}
		}
	}
	else if (`year_a' < 1900 & `year_b' < 1900) | (`year_a' >= 1900 & `year_b' >= 1900) {
		display "Create the Crosswalk file mlp_`year_a'_`year_b'_v1.0.dta"
		
		*****Define MLP Crosswalks to combine*****
		local year_seq = `year_a' + 10
		local year_final_link = `year_b' - 10

		*****Open First MLP Crosswalk*****
		clear
		quietly use "`path'/mlp_`year_a'_`year_seq'_v1.0.dta"

		*****Combine MLP Crosswalks together, keep only records that linked through entire time period*****		
		forvalues year_a_temp = `year_seq' (10) `year_final_link' {
			local year_b_temp = `year_a_temp' + 10
		
			display "Link `year_a' MLP cases to `year_b_temp' MLP cases"
			confirm file "`path'/mlp_`year_a_temp'_`year_b_temp'_v1.0.dta"
			capture merge 1:1 histid_`year_a_temp' using mlp_`year_a_temp'_`year_b_temp'_v1.0.dta, assert(master using match) keep(match) nogenerate
		}
	
		*****Save new MLP Crosswalk*****
		quietly save "`path'/mlp_`year_a'_`year_b'_v1.0.dta", replace
	}

	*****Clean up file*****
	capture quietly keep histid_`year_a' histid_`year_b'
	quietly sort histid_`year_a'
						
	*****Reload first IPUMS extract*****
	clear
	quietly use "`path'/`extract_1'_to_merge.dta"
}
*****Merge the first IPUMS extract to the MLP crosswalk*****
	
quietly joinby histid_`year_a' using "`path'/mlp_`year_a'_`year_b'_v1.0.dta", unmatched(both) _merge(link_`year_a'_mlp)

*****Merge the second IPUMS extract to the MLP crosswalk and the first IPUMS extract*****

quietly joinby histid_`year_b' using "`path'/`extract_2'_to_merge.dta", unmatched(both)  _merge(link_`year_a'_`year_b')

*****Create label indicating linked records and diagnostic for select cases that are not linked*****

quietly replace link_`year_a'_`year_b' = 4 if link_`year_a'_mlp == 2 & link_`year_a'_`year_b' == 3
quietly replace link_`year_a'_`year_b' = 5 if link_`year_a'_mlp == 3 & link_`year_a'_`year_b' == 1

quietly drop link_`year_a'_mlp

quietly drop if year_`year_a' == . & year_`year_b' == .

label drop __MERGE

label define linktype_lbl 1 "Record only in `year_a'"  
label define linktype_lbl 2 "Record only in `year_b'", add
label define linktype_lbl 3 "Linked record `year_a'-`year_b'", add
label define linktype_lbl 4 "Linked record `year_a' not found", add
label define linktype_lbl 5 "Linked record `year_b' not found", add
label values link_`year_a'_`year_b' linktype_lbl
label var link_`year_a'_`year_b'   "Link type"
	
*****Save the combined file*****

quietly save "`path'/`extract_1'_`year_a'_`year_b'.dta", replace

*****Erase Extract to merge files*****

erase "`path'/`extract_1'_to_merge.dta"
erase "`path'/`extract_2'_to_merge.dta"

*****Indicate records that have a linked record, but because of case selection, were not linked to crosswalk*****

quietly tab link_`year_a'_`year_b', matcell(links)
matrix zero = (0\0)
matrix links = (links\zero)

display _newline(2) as text %10.0fc links[3,1] " cases (" %3.2f (links[3,1] / (links[3,1] + links[4,1] + links[5,1]))*100 "% of linked records  )" _newline as result "in MLP `year_a'-`year_b' crosswalk linked" _newline(2) as error %10.0fc links[4,1] " cases from `year_a' in `extract_1' (" %3.2f (links[4,1] / (links[3,1] + links[4,1] + links[5,1]))*100 "% of linked records  )" _newline(1) %10.0fc links[5,1] " cases from `year_b' in `extract_2' (" %3.2f (links[5,1] / (links[3,1] + links[4,1] + links[5,1]))*100 "% of linked records  )" _newline(1) as result "in MLP `year_a'-`year_b' crosswalk not linked due to case selection"

*********************************************************************************************