macro drop _all 

**You will need to define the file path where the IPUMS extracts and MLP crosswalk(s) are saved
local path ""

**Define the name of the two IPUMS extract Stata files you wish to link to the MLP crosswalk
**The first extract should be the first year of your extracts (e.g. 1900-1910 crosswalk, extract_1 should be the 1900 data)

**Do not include the file extension (.dta) in the extract_1/extract_2 local macros

**You can add additional extracts beyond extract_1 and extract_2 (up to 10). 

local extract_1 ""
local extract_2 ""

**Optional Macros**

**The data format of the final dataset is a wide format (one person per record).
**If you would like to have the data in a long format (one person-year per record)
**Change the format macro below from blank to "long"

local format ""

**If you want to select households where at least one individual is linked
**Change the hh_select macro from blank to "yes"

local hh_select = ""

*********************************************************************************************
*********************************DO NOT EDIT BELOW THIS LINE*********************************
*********************************************************************************************

*****Open the IPUMS extracts, rename variables*****
local version "2_0"
local extract_list `extract_1' `extract_2' `extract_3' `extract_4' `extract_5' `extract_6' `extract_7' `extract_8' `extract_9' `extract_10'
local year_list = ""
local a = 1

foreach extract in `extract_list' {
	display "Open `extract' extract, rename variables"
	use "`path'/`extract'.dta" in 1, clear
    capture confirm variable year
    if _rc == 111 {
        display as error _newline(1) "Missing YEAR variable in `extract', cannot link datasets"
        quietly confirm variable year
    }
    capture confirm variable histid
    if _rc == 111 {
        display as error _newline(1) "Missing HISTID variable in `extract', cannot link datasets"
        quietly confirm variable histid
    }

	use "`path'/`extract'.dta", clear
	quietly summarize year

	local year = r(min)

    local year_`a' = `year'
    local a = `a' + 1

	rename * *_`year'
	
	*****Sort file by HISTID, check for HISTID consistency for 1860/1870*****
	if `year' == 1860 | `year' == 1870 {
		if `year' == 1860 {
			sort histid
			quietly gen dupe = 1 if histid == histid[_n-1]
			quietly drop if dupe == 1
			quietly drop dupe
		}
		else if `year' == 1870 {
			quietly bysort serial: egen hh_count = max(pernum)
			sort histid serial pernum
			quietly gen dupe_drop = 0

			***Drop latter duplicate within same household
			quietly replace dupe_drop = 1 if histid == histid[_n-1] & serial == serial[_n-1] & pernum > pernum[_n-1]
			quietly replace dupe_drop = 1 if histid == histid[_n+1] & serial == serial[_n+1] & pernum > pernum[_n+1]

			***Drop duplicates in different households where one household contains non-duplicates
			quietly replace dupe_drop = 1 if histid == histid[_n-1] & serial ~= serial[_n-1] & pernum == pernum[_n-1] & dupe_hh == 1 & dupe_hh[_n-1] == 0
			quietly replace dupe_drop = 1 if histid == histid[_n+1] & serial ~= serial[_n+1] & pernum == pernum[_n+1] & dupe_hh == 1 & dupe_hh[_n+1] == 0

			***Drop duplicates in different households where one contains non-duplicates but one household is smaller (smaller household incorrect)
			quietly replace dupe_drop = 1 if histid == histid[_n-1] & serial ~= serial[_n-1] & pernum == pernum[_n-1] & dupe_hh == 1 & dupe_hh[_n-1] == 1 & hh_count < hh_count[_n-1]
			quietly replace dupe_drop = 1 if histid == histid[_n+1] & serial ~= serial[_n+1] & pernum == pernum[_n+1] & dupe_hh == 1 & dupe_hh[_n+1] == 1 & hh_count < hh_count[_n+1]

			***Drop duplicates of single-person households
			quietly replace dupe_drop = 1 if histid == histid[_n-1] & serial ~= serial[_n-1] & pernum == pernum[_n-1] & dupe_hh == 1 & dupe_hh[_n-1] == 1 & hh_count == 1
			quietly drop if dupe_drop == 1
			drop dupe_drop hh_count
		}
	}
	else {
		sort histid
	}
	
	quietly save "`path'/`extract'_to_merge.dta", replace
}

local year_list `year_1' `year_2' `year_3' `year_4' `year_5' `year_6' `year_7' `year_8' `year_9' `year_10' 

*****Load MLP Crosswalk*****
local w: word count `year_list'
if `w' == 10 {
    display "Load MLP Crosswalk"
    use mlp_census_crosswalk_v`version'.dta, clear
}
else {
    display "Load MLP Crosswalk"
    if `w' == 2 {
        local operator = "&"
    }
    else if `w' > 2 & `w' < 10 {
        local operator = "|"
    }
    local mlp_varlist = ""
    local a = 1
    foreach year_mlp in `year_list' {
        if `a' == 1 {
            local mlp_varlist = "histid_`year_mlp'" 
            local a = `a' + 1
        }
        else {
            local mlp_varlist = "`mlp_varlist'" + " " + "histid_`year_mlp'"
        }
    }
    local mlp_varlist_if = "if"
    local a = 1
    foreach year_mlp in `year_list' {
        if `a' == 1 {
            local mlp_varlist_if = "`mlp_varlist_if'" + " " + "histid_`year_mlp'" + `" ~= "" "'
            local a = `a' + 1
        }
        else {
            local mlp_varlist_if = `"`mlp_varlist_if'"' + " `operator' " + "histid_`year_mlp'" + `" ~= "" "'
        }
    }
    use `mlp_varlist' hik using mlp_census_crosswalk_v`version'.dta `mlp_varlist_if', clear
    if `w' > 2 {
        quietly gen universe = 0
        foreach year in `year_list' {
            quietly replace universe = universe + 1 if histid_`year' ~= ""
        }
        quietly keep if universe >= 2
        drop universe
    }
}

*****Merge the IPUMS extracts to the MLP crosswalk*****
forvalues i = 1 / `w' {
    display "Merge `year_`i'' IPUMS Extract to MLP Crosswalk"
    local j = `i' + 1
    quietly joinby histid_`year_`i'' using "`path'/`extract_`i''_to_merge.dta", unmatched(both) _merge(link_`year_`i'')
    if "`hh_select'" == "yes" {
        quietly gen flag = link_`year_`i'' if link_`year_`i'' == 3
        quietly replace flag = 2 if link_`year_`i'' == 1
        quietly replace flag = 1 if link_`year_`i'' == 2
        quietly bysort serial_`year_`i'': egen universe = max(flag)
        quietly keep if universe > 1
        drop universe flag
    }
    else {
        quietly drop if link_`year_`i'' == 2
    }

    if `i' < `w' {
        sort histid_`year_`j''
    }
    drop link_`year_`i''
}

*****Convert data to long format*****
if "`format'" == "long" {
    display "Convert data to a long format"
    preserve
    quietly keep if _n == 1
    capture drop year*
    capture keep *_`year_1'
    rename *_`year_1' *_
    quietly describe, varlist
    local varlist = r(varlist) 
    restore

    quietly capture drop year*

    reshape long `varlist', i(hik) j(year)
    rename *_ *
}

*****Save the combined file*****
local year_save = "`year_1'"
foreach year_mlp in `year_list' {
    if `year_mlp' ~= `year_1' {
        local year_save = "`year_save'" + "-" + "`year_mlp'"
    } 
}   

quietly save "`path'/`extract_1'_`year_save'.dta", replace

*****Erase Extract to merge files*****
foreach extract in `extract_list' {
    erase "`path'/`extract'_to_merge.dta"
}
*********************************************************************************************