In [1]:
# import our map function
from interactive_maps import choropleth_map
# others
import pandas as pd
import geopandas as gpd
from glob import glob

data: "Recorder alcohol per capita consumption" from Global Health Observatory data repository

In [2]:
# read files
data = pd.DataFrame()
for filename in glob("data-verbose*.csv"):
    temp = pd.read_csv(filename)
    data = data.append(temp)
data.columns
Out[2]:
Index(['GHO (CODE)', 'GHO (DISPLAY)', 'GHO (URL)', 'DATASOURCE (CODE)',
       'DATASOURCE (DISPLAY)', 'DATASOURCE (URL)', 'PUBLISHSTATE (CODE)',
       'PUBLISHSTATE (DISPLAY)', 'PUBLISHSTATE (URL)', 'YEAR (CODE)',
       'YEAR (DISPLAY)', 'YEAR (URL)', 'REGION (CODE)', 'REGION (DISPLAY)',
       'REGION (URL)', 'COUNTRY (CODE)', 'COUNTRY (DISPLAY)', 'COUNTRY (URL)',
       'ALCOHOLTYPE (CODE)', 'ALCOHOLTYPE (DISPLAY)', 'ALCOHOLTYPE (URL)',
       'Display Value', 'Numeric', 'Low', 'High', 'StdErr', 'StdDev',
       'Comments'],
      dtype='object')
In [3]:
data["ALCOHOLTYPE (DISPLAY)"].unique()
Out[3]:
array(['Spirits', 'Wine', 'Other alcoholic beverages', 'All types',
       'Beer'], dtype=object)
In [4]:
# all types of alcohol combined
data = data.loc[data["ALCOHOLTYPE (DISPLAY)"] == 'All types']
# only keep columns of interest
data = data[['YEAR (CODE)','COUNTRY (CODE)','COUNTRY (DISPLAY)','Numeric']]
data.reset_index(inplace=True, drop=True)
data.rename(columns={
    'YEAR (CODE)': 'Year',
    'COUNTRY (CODE)': 'Code',
    'COUNTRY (DISPLAY)': 'Country',
    'Numeric': 'Value'
}, inplace=True)
data.head()
Out[4]:
Year Code Country Value
0 1966 COL Colombia 3.96
1 1965 SGP Singapore 2.14
2 1972 GBR United Kingdom of Great Britain and Northern I... 9.14
3 1967 MUS Mauritius 2.23
4 1976 CZE Czechia 13.62
In [5]:
data[data["Value"].isna()]["Country"].unique()
Out[5]:
array([], dtype=object)
In [6]:
# read shapes
geo = gpd.read_file("../ne_110m_admin_0_countries.shp")[['ADMIN', 'ADM0_A3', 'geometry']]
geo.columns = ['Country', 'Code', 'geometry']
geo.head()
Out[6]:
Country Code geometry
0 Fiji FJI (POLYGON ((180 -16.06713266364245, 180 -16.555...
1 United Republic of Tanzania TZA POLYGON ((33.90371119710453 -0.950000000000000...
2 Western Sahara SAH POLYGON ((-8.665589565454809 27.65642588959236...
3 Canada CAN (POLYGON ((-122.84 49.00000000000011, -122.974...
4 United States of America USA (POLYGON ((-122.84 49.00000000000011, -120 49....
In [7]:
geo = geo.loc[~(geo['Country'] == 'Antarctica')]

Preparing the plot

In [8]:
data["Value"].describe()
Out[8]:
count    9465.000000
mean        4.849320
std         4.353969
min         0.000000
25%         1.200000
50%         3.700000
75%         7.640000
max        26.040000
Name: Value, dtype: float64
In [9]:
# Create bins to color each country
bins = [0, 1, 3, 6, 10, 15, 20, 1e2]
In [10]:
choropleth_map(
    data, geo,
    bins=bins, bin_labels="L",
    notebook=True, size=(900,450),
    value_title="Litres of pure alcohol per capita",
    value_axis_label="Litres of pure alcohol per capita",
    map_title="Alcohol consumption per capita in {}",
    chart_title="Evolution of alcohol consumption per capita",
    map_palette="YlGnBu", default_map_year=2016,
)