import json
import glob
import requests
import openpyxl
import pandas as pd
country_map = {
"Eswatini (formerly Swaziland)": "Eswatini",
"Swaziland": "Eswatini",
"Macedonia": "North Macedonia",
"North Macedonia (formerly Macedonia)": "North Macedonia",
"Curacao": "Curaçao",
"Congo, Democratic Republic": "Congo (Kinshasa)",
"Bahamas": "Bahamas, The",
"China, People's Republic": "China",
"Congo, Republic": "Congo (Brazzaville)",
"Cote d'Ivoire": "Côte d'Ivoire",
"Gambia": "Gambia, The",
"Micronesia, Federated States": "Micronesia, Federated States of",
}
skip_countries = {
"Serbia and Montenegro (former)",
"Czechoslovakia (former)",
"Netherlands Antilles (former)",
"Soviet Union (former)",
"United States",
}
def uniformize(name):
try:
return country_map[name]
except KeyError:
return name
The Attorney General shall determine for the most recent previous 5-fiscal-year period for which data are available, the total number of aliens who are natives of each foreign state and who (i) were admitted or otherwise provided lawful permanent resident status (other than under this subsection) and (ii) were subject to the numerical limitations of section 1151(a) of this title (other than paragraph (3) thereof) or who were admitted or otherwise provided lawful permanent resident status as an immediate relative or other alien described in section 1151(b)(2) of this title.
immi_totals = {}
for fname in sorted(glob.glob("data/fy20*_table10d.xlsx")):
if fname == "data/fy2017_table10d.xlsx":
continue
print(fname)
dataframe = openpyxl.load_workbook(fname)
dataframe1 = dataframe.active
for row in range(0, dataframe1.max_row):
for col in dataframe1.iter_cols(1, 1):
if value := col[row].value:
if value.lower().strip() == "zimbabwe":
last_row_countries = row + 1
elif value.lower().strip() == "afghanistan":
first_row_countries = row + 1
for row in dataframe1.iter_rows(
min_row=first_row_countries, max_row=last_row_countries, max_col=5
):
country = uniformize(row[0].value) # uniform mapping
if country not in immi_totals:
immi_totals[country] = 0
for col in row[2:]:
try:
immi_totals[country] += col.value
except TypeError:
pass # "D" "-"
data/fy2018_table10d.xlsx data/fy2019_table10d.xlsx data/fy2020_table10d.xlsx data/fy2021_table10d.xlsx data/fy2022_table10d.xlsx
The Attorney General-
# Based on DV-24 instructions create a map of "region -> countries"
countries = {"AF": [], "AS": [], "EU": [], "NA": [], "OC": [], "SA": []}
for region in countries.keys():
with open(region, "r") as fp:
countries[region] = list(set([_.strip() for _ in fp.readlines()]))
# countries["NA"].append("United States")
# countries["SA"].pop(countries["SA"].index("Mexico"))
# Calculate immigration total and immigration for each region
region_immi_totals = {}
for region in countries.keys():
region_immi_totals[region] = 0
for country in countries[region]:
region_immi_totals[region] += immi_totals.get(country, 0)
total_immigrants = sum(region_immi_totals.values())
hi_adm_regions = {
region: region_immi_totals[region] > total_immigrants / 6.0
for region in region_immi_totals.keys()
}
import pprint
pprint.pprint(hi_adm_regions)
{'AF': False, 'AS': True, 'EU': False, 'NA': False, 'OC': False, 'SA': True}
for region in countries.keys():
for country in countries[region]:
if immi_totals.get(country, 0) > 50000:
print(f"High-admission state: {country} ({region})")
High-admission state: Nigeria (AF) High-admission state: Korea, South (AS) High-admission state: Philippines (AS) High-admission state: China (AS) High-admission state: Bangladesh (AS) High-admission state: Pakistan (AS) High-admission state: Vietnam (AS) High-admission state: India (AS) High-admission state: Canada (NA) High-admission state: Dominican Republic (SA) High-admission state: Brazil (SA) High-admission state: El Salvador (SA) High-admission state: Haiti (SA) High-admission state: Jamaica (SA) High-admission state: Honduras (SA) High-admission state: Mexico (SA) High-admission state: Colombia (SA) High-admission state: Venezuela (SA)
# Let's put it all together for convenience and add population data
# First get population from census
r = requests.get(
"https://api.census.gov/data/timeseries/idb/5year?get=NAME,POP&YR=2022"
)
data = r.json()
cols = data.pop(0)
populations = {row[0]: int(row[1]) for row in data}
# There are some countries that census doesn't include, so let's grab
# random data from wiki...
with open("extra_populations.json", "r") as fp:
populations.update(json.load(fp))
data = []
for region in countries.keys():
for country in countries[region]:
data.append([country, region, immi_totals.get(country, 0), populations[country], hi_adm_regions[region], immi_totals.get(country, 0) > 50000])
df = pd.DataFrame(data, columns=["country", "region", "immi5y", "population", "hi_adm_reg", "hi_adm_state"])
df.head()
df.to_csv("quota_data.csv")
The Attorney General shall determine the percentage of the total of the numbers determined under subparagraph (A) that are numbers for foreign states in high-admission regions.
total_immigration_hi_adm_region = df[df.hi_adm_reg].immi5y.sum() / df.immi5y.sum()
The Attorney General shall determine-
regions_pop_excl_hi_adm_states = {}
for region in df.region.unique():
regions_pop_excl_hi_adm_states[region] = df[(~df.hi_adm_state) & (df.region == region)].population.sum()
print("The total population of each region not including the population of any high-admission state")
pprint.pprint(regions_pop_excl_hi_adm_states)
The total population of each region not including the population of any high-admission state {'AF': 1191601402, 'AS': 1007570959, 'EU': 929039243, 'NA': 402272, 'OC': 43407916, 'SA': 186654532}
total_pop_low_adm_reg = 0
total_pop_hi_adm_reg = 0
population_ratio = {}
for region, hi_admission in hi_adm_regions.items():
if hi_admission:
total_pop_hi_adm_reg += regions_pop_excl_hi_adm_states[region]
else:
total_pop_low_adm_reg += regions_pop_excl_hi_adm_states[region]
for region, population in regions_pop_excl_hi_adm_states.items():
if hi_adm_regions[region]:
pop_ratio = population / total_pop_hi_adm_reg
else:
pop_ratio = population / total_pop_low_adm_reg
print(region, pop_ratio, hi_adm_regions[region])
population_ratio[region] = pop_ratio
AF 0.5505329036965932 False AS 0.8437024386042016 True EU 0.42922630943402906 False NA 0.00018585407155792853 False OC 0.02005493279781976 False SA 0.1562975613957984 True
total_visas = 55000
final_quota = {}
for region in hi_adm_regions.keys():
if hi_adm_regions[region]:
quota = (1 - total_immigration_hi_adm_region) * population_ratio[region]
else:
quota = total_immigration_hi_adm_region * population_ratio[region]
print(region, round(quota * total_visas), round(quota * 100.0, 2))
final_quota[region] = quota
AF 24544 44.62 AS 8790 15.98 EU 19135 34.79 NA 8 0.02 OC 894 1.63 SA 1628 2.96