In [1]:
import json
import glob
import requests
import openpyxl
import pandas as pd

country_map = { 
    "Eswatini (formerly Swaziland)": "Eswatini",
    "Swaziland": "Eswatini",
    "Macedonia": "North Macedonia",
    "North Macedonia (formerly Macedonia)": "North Macedonia",
    "Curacao": "Curaçao",
    "Congo, Democratic Republic": "Congo (Kinshasa)",
    "Bahamas": "Bahamas, The",
    "China, People's Republic": "China",
    "Congo, Republic": "Congo (Brazzaville)",
    "Cote d'Ivoire": "Côte d'Ivoire",
    "Gambia": "Gambia, The",
    "Micronesia, Federated States": "Micronesia, Federated States of",
}

skip_countries = { 
    "Serbia and Montenegro (former)",
    "Czechoslovakia (former)",
    "Netherlands Antilles (former)",
    "Soviet Union (former)",
    "United States",
}

def uniformize(name):
    try:
        return country_map[name]
    except KeyError:
        return name

(A) Determination of preference immigration¶

The Attorney General shall determine for the most recent previous 5-fiscal-year period for which data are available, the total number of aliens who are natives of each foreign state and who (i) were admitted or otherwise provided lawful permanent resident status (other than under this subsection) and (ii) were subject to the numerical limitations of section 1151(a) of this title (other than paragraph (3) thereof) or who were admitted or otherwise provided lawful permanent resident status as an immediate relative or other alien described in section 1151(b)(2) of this title.

In [2]:
immi_totals = {}

for fname in sorted(glob.glob("data/fy20*_table10d.xlsx")):
    if fname == "data/fy2017_table10d.xlsx":
        continue
    print(fname)
    dataframe = openpyxl.load_workbook(fname)
    dataframe1 = dataframe.active

    for row in range(0, dataframe1.max_row):
        for col in dataframe1.iter_cols(1, 1): 
            if value := col[row].value:
                if value.lower().strip() == "zimbabwe":
                    last_row_countries = row + 1 
                elif value.lower().strip() == "afghanistan":
                    first_row_countries = row + 1 

    for row in dataframe1.iter_rows(
        min_row=first_row_countries, max_row=last_row_countries, max_col=5
    ):
        country = uniformize(row[0].value)  # uniform mapping
        if country not in immi_totals:
            immi_totals[country] = 0
        for col in row[2:]:
            try:
                immi_totals[country] += col.value
            except TypeError:
                pass  # "D" "-"
data/fy2018_table10d.xlsx
data/fy2019_table10d.xlsx
data/fy2020_table10d.xlsx
data/fy2021_table10d.xlsx
data/fy2022_table10d.xlsx

(B) Identification of high-admission and low-admission regions and high-admission and low-admission states¶

The Attorney General-

  • (i) shall identify-
    • (I) each region (each in this paragraph referred to as a "high-admission region") for which the total of the numbers determined under subparagraph (A) for states in the region is greater than 1/6 of the total of all such numbers, and
    • (II) each other region (each in this paragraph referred to as a "low-admission region"); and
In [3]:
# Based on DV-24 instructions create a map of "region -> countries"

countries = {"AF": [], "AS": [], "EU": [], "NA": [], "OC": [], "SA": []}
for region in countries.keys():
    with open(region, "r") as fp:
        countries[region] = list(set([_.strip() for _ in fp.readlines()]))
        
# countries["NA"].append("United States")
# countries["SA"].pop(countries["SA"].index("Mexico"))
In [4]:
# Calculate immigration total and immigration for each region

region_immi_totals = {}
for region in countries.keys():
    region_immi_totals[region] = 0
    for country in countries[region]:
        region_immi_totals[region] += immi_totals.get(country, 0)
        
total_immigrants = sum(region_immi_totals.values())
In [5]:
hi_adm_regions = {
    region: region_immi_totals[region] > total_immigrants / 6.0
    for region in region_immi_totals.keys()
}
import pprint
pprint.pprint(hi_adm_regions)
{'AF': False, 'AS': True, 'EU': False, 'NA': False, 'OC': False, 'SA': True}
  • (ii) shall identify-
    • (I) each foreign state for which the number determined under subparagraph (A) is greater than 50,000 (each such state in this paragraph referred to as a "high-admission state"), and
    • (II) each other foreign state (each such state in this paragraph referred to as a "low-admission state").
In [6]:
for region in countries.keys():
    for country in countries[region]:
        if immi_totals.get(country, 0) > 50000:
            print(f"High-admission state: {country} ({region})")
High-admission state: Nigeria (AF)
High-admission state: Korea, South (AS)
High-admission state: Philippines (AS)
High-admission state: China (AS)
High-admission state: Bangladesh (AS)
High-admission state: Pakistan (AS)
High-admission state: Vietnam (AS)
High-admission state: India (AS)
High-admission state: Canada (NA)
High-admission state: Dominican Republic (SA)
High-admission state: Brazil (SA)
High-admission state: El Salvador (SA)
High-admission state: Haiti (SA)
High-admission state: Jamaica (SA)
High-admission state: Honduras (SA)
High-admission state: Mexico (SA)
High-admission state: Colombia (SA)
High-admission state: Venezuela (SA)
In [7]:
# Let's put it all together for convenience and add population data

# First get population from census
r = requests.get(
    "https://api.census.gov/data/timeseries/idb/5year?get=NAME,POP&YR=2022"
)
data = r.json()
cols = data.pop(0)
populations = {row[0]: int(row[1]) for row in data}

# There are some countries that census doesn't include, so let's grab 
# random data from wiki...
with open("extra_populations.json", "r") as fp:
    populations.update(json.load(fp))

data = []
for region in countries.keys():
    for country in countries[region]:
        data.append([country, region, immi_totals.get(country, 0), populations[country], hi_adm_regions[region], immi_totals.get(country, 0) > 50000])
        
df = pd.DataFrame(data, columns=["country", "region", "immi5y", "population", "hi_adm_reg", "hi_adm_state"])
df.head()
df.to_csv("quota_data.csv")

(C) Determination of percentage of worldwide immigration attributable to high-admission regions¶

The Attorney General shall determine the percentage of the total of the numbers determined under subparagraph (A) that are numbers for foreign states in high-admission regions.

In [8]:
total_immigration_hi_adm_region = df[df.hi_adm_reg].immi5y.sum() / df.immi5y.sum()

(D) Determination of regional populations excluding high-admission states and ratios of populations of regions within low-admission regions and high-admission regions¶

The Attorney General shall determine-

  • (i) based on available estimates for each region, the total population of each region not including the population of any high-admission state;
In [9]:
regions_pop_excl_hi_adm_states = {}
for region in df.region.unique():
    regions_pop_excl_hi_adm_states[region] = df[(~df.hi_adm_state) & (df.region == region)].population.sum()
print("The total population of each region not including the population of any high-admission state")
pprint.pprint(regions_pop_excl_hi_adm_states)
The total population of each region not including the population of any high-admission state
{'AF': 1191601402,
 'AS': 1007570959,
 'EU': 929039243,
 'NA': 402272,
 'OC': 43407916,
 'SA': 186654532}
  • (ii) for each low-admission region, the ratio of the population of the region determined under clause (i) to the total of the populations determined under such clause for all the low-admission regions; and
  • (iii) for each high-admission region, the ratio of the population of the region determined under clause (i) to the total of the populations determined under such clause for all the high-admission regions.
In [10]:
total_pop_low_adm_reg = 0
total_pop_hi_adm_reg = 0
population_ratio = {}
for region, hi_admission in hi_adm_regions.items():
    if hi_admission:
        total_pop_hi_adm_reg += regions_pop_excl_hi_adm_states[region]
    else:
        total_pop_low_adm_reg += regions_pop_excl_hi_adm_states[region]

for region, population in regions_pop_excl_hi_adm_states.items():
    if hi_adm_regions[region]:
        pop_ratio = population / total_pop_hi_adm_reg
    else:
        pop_ratio = population / total_pop_low_adm_reg
    
    print(region, pop_ratio, hi_adm_regions[region])
    population_ratio[region] = pop_ratio
AF 0.5505329036965932 False
AS 0.8437024386042016 True
EU 0.42922630943402906 False
NA 0.00018585407155792853 False
OC 0.02005493279781976 False
SA 0.1562975613957984 True

(E) Distribution of visas¶

  • (i) No visas for natives of high-admission states The percentage of visas made available under this paragraph to natives of a high-admission state is 0.
  • (ii) For low-admission states in low-admission regions Subject to clauses (iv) and (v), the percentage of visas made available under this paragraph to natives (other than natives of a high-admission state) in a low-admission region is the product of-
    • (I) the percentage determined under subparagraph (C), and
    • (II) the population ratio for that region determined under subparagraph (D)(ii).
  • (iii) For low-admission states in high-admission regions Subject to clauses (iv) and (v), the percentage of visas made available under this paragraph to natives (other than natives of a high-admission state) in a high-admission region is the product of-
    • (I) 100 percent minus the percentage determined under subparagraph (C), and
    • (II) the population ratio for that region determined under subparagraph (D)(iii).
In [11]:
total_visas = 55000
final_quota = {}

for region in hi_adm_regions.keys():
    if hi_adm_regions[region]:
        quota = (1 - total_immigration_hi_adm_region) * population_ratio[region]
    else:
        quota = total_immigration_hi_adm_region * population_ratio[region]
    print(region, round(quota * total_visas), round(quota * 100.0, 2))
    final_quota[region] = quota
AF 24544 44.62
AS 8790 15.98
EU 19135 34.79
NA 8 0.02
OC 894 1.63
SA 1628 2.96
In [ ]: