Aplicación 1.4: Datos georreferenciados

Datos espacio-temporales: Emisiones de CO2 al nivel mundial

En esta aplicación se analizarán los datos de la publicación de 2022 del Global Carbon Project sobre emisiones de gases de efecto invernadero al nivel mundial y nacional, con el objetivo de encontrar los países más contaminantes, tanto en términos de emisiones de CO2 totales como en forma per capita. Pueden encontrarse los datos e información sobre los mismos en la siguiente dirección: https://zenodo.org/record/7215364.

Igual que en la aplicación anterior, previamente al análisis estadístico deseado se usarán varias técnicas de data wrangling para “preparar” los datos para dicho estudio antes de su representación gráfica.

Code

# Lectura de librerías
library(tidyverse)
library(plotly)
library(sf)
library(geojsonsf)
library(leaflet)
library(viridis)
library(RColorBrewer)
# Lectura de datos
df <- read_csv("data/GCPdbase.csv")
# Reemplazar "USA" por "United States of America"
df$Country <-
  ifelse(df$Country == "USA", "United States of America",
         df$Country)
# Eliminar los registros "Global" y "International Transport" 
# de la lista de países (variable Country)
df <- df[!(df$Country %in% c("Global", "International Transport")),]
# Restringir período temporal al conjunto 1970-2021
df1 <-
  df %>% filter(Year >= 1970 & Year <= 2021)
# Top 10 de emisiones totales medias 1970-2021
top_10_emisiones_total <-
  df1 %>%
  group_by(Country) %>%
  summarise(mean_total = mean(Total)) %>%
  top_n(10, mean_total)
arrange(top_10_emisiones_total, desc(mean_total))

# A tibble: 10 × 2
   Country                  mean_total
   <chr>                         <dbl>
 1 United States of America      5229.
 2 China                         4602.
 3 Russia                        1806.
 4 Japan                         1100.
 5 India                         1018.
 6 Germany                        933.
 7 United Kingdom                 544.
 8 Canada                         492.
 9 Ukraine                        448.
10 France                         411.

Code

# Gráfico de áreas
datos_top_10_emisiones_total <- df1 %>%
  filter(Country %in% top_10_emisiones_total$Country)
ggplot(datos_top_10_emisiones_total, aes(x=Year, y=Total, fill=Country)) +
    geom_area()

Code

# Restringir período temporal al año 2021
df2 <- df %>% filter(Year == 2021)
# Top 5 de emisiones totales en 2021 y % sobre el total
top5_emisiones_2021 <- df2 %>% 
  select(Country, Total) %>% 
  mutate(Percent = round(Total/sum(Total), digits = 3)) %>% 
  top_n(5, Percent)%>% 
  arrange(desc(Percent)) %>%
  mutate(Lab_percent = scales::percent(Percent))
top5_emisiones_2021

# A tibble: 5 × 4
  Country                   Total Percent Lab_percent
  <chr>                     <dbl>   <dbl> <chr>      
1 China                    11472.   0.318 31.8%      
2 United States of America  5007.   0.139 13.9%      
3 India                     2710.   0.075 7.5%       
4 Russia                    1756.   0.049 4.9%       
5 Japan                     1067.   0.03  3.0%

Code

# Diagrama de sectores
ggplot(top5_emisiones_2021, aes(x = "", y = Percent, fill = Country)) +
    geom_col() + 
    geom_text(aes(label = Lab_percent),
              position = position_stack(vjust = 0.5)) +
    coord_polar(theta = "y")

Code

# Top 10 de emisiones per capita medias 1970-2021
top_10_emisiones_per_capita <- df1 %>%
  group_by(Country) %>%
  summarise(mean_per_capita = mean(`Per Capita`)) %>%
  top_n(10, mean_per_capita)
arrange(top_10_emisiones_per_capita, desc(mean_per_capita))

# A tibble: 10 × 2
   Country                   mean_per_capita
   <chr>                               <dbl>
 1 Sint Maarten (Dutch part)            52.3
 2 Qatar                                47.9
 3 Curaçao                              40.4
 4 United Arab Emirates                 32.9
 5 Luxembourg                           25.2
 6 Brunei Darussalam                    24.3
 7 Kuwait                               23.5
 8 Bahrain                              22.8
 9 United States of America             19.8
10 Trinidad and Tobago                  19.5

Code

# Gráfico de líneas
datos_top_10_emisiones_per_capita <- df1 %>% 
  filter(Country %in% top_10_emisiones_per_capita$Country)
ggplot(datos_top_10_emisiones_per_capita,
       aes(x=Year, y=`Per Capita`, group=Country, color=Country)) + 
    geom_line()

Code

# Mapa de distribución mundial de emisiones per capita medias
datos_emisiones_per_capita_medias <- df1 %>%
  group_by(`Country`) %>%
  summarize(CO2pc = mean(`Per Capita`))
map <- geojson_sf("data/GCPmap.geojson")
class(map)

[1] "sf"         "data.frame"

Code

str(map)

Classes 'sf' and 'data.frame':  177 obs. of  2 variables:
 $ name    : chr  "Afghanistan" "Angola" "Albania" "United Arab Emirates" ...
 $ geometry:sfc_GEOMETRY of length 177; first list element: List of 1
  ..$ : num [1:69, 1:2] 61.2 60.8 60.5 61 60.5 ...
  ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
 - attr(*, "sf_column")= chr "geometry"

Code

datos_map <- datos_emisiones_per_capita_medias %>% 
    rename(name = Country)
datos_geo <- inner_join(map, datos_map, by = "name")
ggplot(datos_geo) +
  geom_sf(aes(fill=CO2pc)) +
  theme_bw() +
  labs(title = "Emisiones de CO2 per capita medias") +
  scale_fill_viridis(option="magma")

Code

# Mapa interactivo
# Librería leaflet de R: 
# https://rstudio.github.io/leaflet/
# Paletas de colores según el tipo de variables:
# palNumeric <- colorNumeric("viridis", domain = shape$vcontinua)
# palBin <- colorBin("magma", domain = shape$vdiscreta, bins = 4)
# palQuantile <- colorQuantile("Spectral", domain = shape$numerica, n=4)
# palFactor <- colorFactor("RdBu", domain = shape$vcategorica)
palNumeric <- colorNumeric("YlGnBu", domain = datos_geo$CO2pc)
leaflet(datos_geo) %>% setView(0, 0, zoom = 2) %>% addTiles() %>%
    addProviderTiles("CartoDB.Positron") %>%
    addPolygons(color = "#444444" ,
                weight = 1, 
                smoothFactor = 0.5,
                opacity = 1.0,
                fillOpacity = 0.7,
                fillColor = ~palNumeric(datos_geo$CO2pc),   
                layerId = ~datos_geo$name,                  
                highlightOptions = highlightOptions(
                  color = "white", weight = 1,
                  bringToFront = TRUE))

Code

# Lectura de librerías
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import plotly.graph_objects as go
import geopandas as gpd
import folium
# Lectura de datos
df = pd.read_csv("data/GCPdbase.csv")
# Reemplazar "USA" por "United States of America"
df["Country"] = df["Country"].replace("USA", "United States of America")
# Eliminar los registros "Global" y "International Transport" 
# de la lista de países (variable Country)
df = df[~df["Country"].isin(["Global", "International Transport"])]
# Restringir período temporal al conjunto 1970-2021
df1 = df[(df["Year"] >= 1970) & (df["Year"] <= 2021)]
# Top 10 de emisiones totales medias por país
top_10_emisiones_total = df1.groupby("Country")["Total"].mean().nlargest(10).index
top_10_emisiones_total

Index(['United States of America', 'China', 'Russia', 'Japan', 'India',
       'Germany', 'United Kingdom', 'Canada', 'Ukraine', 'France'],
      dtype='object', name='Country')

Code

datos_top_10_emisiones_total = df1[df1["Country"].isin(top_10_emisiones_total)]
pivot_data_total =datos_top_10_emisiones_total.pivot_table(
  values="Total",
  index="Year",
  columns="Country",
  aggfunc="sum",
  fill_value=0)
# Gráfica de áreas
fig_area = go.Figure(layout=go.Layout(
  title="Top 10 por emisiones de CO2 totales (1970-2021)",
  xaxis_title="Años",
  yaxis_title="CO2"))
# Se insertan los datos
for country in pivot_data_total.columns:
    fig_area.add_trace(go.Scatter(x=pivot_data_total.index,
    y=pivot_data_total[country],
    name=country,
    mode="none",
    stackgroup="one"))

Code

# Restringir período temporal al año 2021
df2 = df[(df["Year"] == 2021)]
# Top 10 de emisiones totales medias por país
df2 = df2[["Country", "Total"]]
# Calculating Percentage
df2["Percent"] = (df2["Total"] / df2["Total"].sum())
top5_emisiones_2021 = df2.nlargest(5, "Percent")
top5_emisiones_2021

                        Country         Total   Percent
10879                     China  11472.369171  0.317776
60111  United States of America   5007.335889  0.138699
25567                     India   2709.683625  0.075056
47327                    Russia   1755.547390  0.048627
27743                     Japan   1067.398435  0.029566

Code

# Diagrama de sectores
fig, ax = plt.subplots()
ax.pie(top5_emisiones_2021["Percent"], labels = top5_emisiones_2021["Country"], autopct = '%1.1f%%', normalize=False)

([<matplotlib.patches.Wedge object at 0x12f659a50>, <matplotlib.patches.Wedge object at 0x12f6591e0>, <matplotlib.patches.Wedge object at 0x12f659750>, <matplotlib.patches.Wedge object at 0x12f659990>, <matplotlib.patches.Wedge object at 0x12f65a8c0>], [Text(0.5958851634949567, 0.9246193118935971, 'China'), Text(-0.8347624034749628, 0.716360055938844, 'United States of America'), Text(-1.099219206463291, 0.04143834145103926, 'India'), Text(-1.0329766146381076, -0.37809960805427256, 'Russia'), Text(-0.9100161313664236, -0.61795682749921, 'Japan')], [Text(0.32502827099724907, 0.5043378064874166, '31.8%'), Text(-0.45532494734997964, 0.3907418486939149, '13.9%'), Text(-0.5995741126163405, 0.022602731700566866, '7.5%'), Text(-0.5634417898026042, -0.20623614984778502, '4.9%'), Text(-0.49637243529077646, -0.3370673604541145, '3.0%')])

Code

plt.show()

Code

# Top 10 de emisiones per capita medias por país
emisiones_per_capita_medias = df1.groupby("Country").agg({"Per Capita": "mean"})
top_10_emisiones_per_capita = emisiones_per_capita_medias.nlargest(
  10, "Per Capita").index
top_10_emisiones_per_capita

Index(['Sint Maarten (Dutch part)', 'Qatar', 'Curaçao', 'United Arab Emirates',
       'Luxembourg', 'Brunei Darussalam', 'Kuwait', 'Bahrain',
       'United States of America', 'Trinidad and Tobago'],
      dtype='object', name='Country')

Code

# Gráfica de líneas
datos_top_10_emisiones_per_capita = df1[df1["Country"].isin(top_10_emisiones_per_capita)]
pivot_data_per_capita = datos_top_10_emisiones_per_capita.pivot_table(
  values="Per Capita",
  index="Year",
  columns="Country",
  aggfunc="mean",
  fill_value=0)
fig_line = go.Figure(
  layout=go.Layout(
    title="Top 10 por emisiones de CO2 per capita (1970-2021)",
    xaxis_title="Años",
    yaxis_title="CO2pc"))
for country in pivot_data_per_capita.columns:
    fig_line.add_trace(
      go.Scatter(x=pivot_data_per_capita.index,
      y=pivot_data_per_capita[country],
      name=country, mode="lines"))

Code

# Mapa de distribución mundial de emisiones per capita medias
datos_map = df1.groupby("Country")["Per Capita"].mean().reset_index()
map = gpd.read_file("data/GCPmap.geojson")
map.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 177 entries, 0 to 176
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   id        177 non-null    object  
 1   name      177 non-null    object  
 2   geometry  177 non-null    geometry
dtypes: geometry(1), object(2)
memory usage: 4.3+ KB

Code

datos_map_2 = datos_map.copy()   
datos_map_2 = datos_map_2.rename(columns = {"Country": "name"})
datos_map_2 = datos_map_2.rename(columns = {"Per Capita": "CO2pc"})
datos_geo  = map.merge(datos_map_2, on = 'name', how = 'left')
fig, ax = plt.subplots(1,1)
datos_geo.plot(column="CO2pc",
           legend=True,
           cmap='magma',
           ax=ax)
plt.title('Emisiones de CO2 per capita medias')
plt.tight_layout()
plt.show()

Code

# Mapa interactivo
# Libreria folium de Python: 
# https://python-visualization.github.io/folium/
world_geo = r'data/GCPmap.geojson'
datos_emisiones_per_capita_medias = df1.groupby(
  "ISO 3166-1 alpha-3")["Per Capita"].mean().reset_index()
mapa_emisiones = folium.Map(location=[0, 0], zoom_start=2)
folium.Choropleth(
    geo_data=world_geo,
    name="choropleth",
    data=datos_emisiones_per_capita_medias,
    columns=["ISO 3166-1 alpha-3", "Per Capita"],
    key_on="feature.id",
    fill_color="YlGnBu",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Emisiones per capita medias"
).add_to(mapa_emisiones)

<folium.features.Choropleth object at 0x132d121d0>

Code

folium.LayerControl().add_to(mapa_emisiones)

<folium.map.LayerControl object at 0x132d126b0>

Code

mapa_emisiones

Make this Notebook Trusted to load map: File -> Trust Notebook

Datos espaciales: Desarrollo humano al nivel mundial

En esta aplicación se analizarán los datos de la publicación del Global Data Lab sobre las variaciones habidas en el desarrollo humano, la salud, la educación y la renta dentro de más de 160 países (‘within’) y entre ellos (‘between’) desde 1990 hasta 2021.

Puede encontrarse toda la información detallada sobre el índice de desarrollo humano al nivel subnacional (SHDI en sus siglas en inglés) en la siguiente dirección: https://www.nature.com/articles/sdata201938; también se puede acceder a los datos originales desde esta otra página: https://globaldatalab.org/shdi/download_files/.

Respecto a las librerías especializadas en gráficas dinámicas (plotly) y en análisis exploratorio de datos espaciales (sf y geopandas), su información se encuentra en las siguientes páginas web:

Librerías gráficas plotly: https://plotly.com/graphing-libraries/
Librerías para el análisis gráfico de datos espaciales:
- R -> sf: https://r-spatial.github.io/sf/
- Python -> geopandas: https://geopandas.org/

De nuevo se usarán varias técnicas de data wrangling para “preparar” los datos antes del análisis gráfico y estadístico.

Code

# Lectura de librerías
library(tidyverse)
library(plotly)
library(viridis)
library(sf)
library(geojsonsf)
# Lectura de datos
df <- read_csv("data/GDLdbase.csv")
map <- geojson_sf("data/GDLmap.geojson")
class(map)

[1] "sf"         "data.frame"

Code

str(map)

Classes 'sf' and 'data.frame':  1745 obs. of  6 variables:
 $ GDLcode : chr  "AFGr101" "AFGr102" "AFGr103" "AFGr104" ...
 $ constant: chr  "World" "World" "World" "World" ...
 $ iso_code: chr  "AFG" "AFG" "AFG" "AFG" ...
 $ country : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
 $ region  : chr  "Central (Kabul Wardak Kapisa Logar Parwan Panjsher)" "Central Highlands (Bamyan Daikundi)" "East (Nangarhar Kunar Laghman Nooristan)" "North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)" ...
 $ geometry:sfc_MULTIPOLYGON of length 1745; first list element: List of 1
  ..$ :List of 1
  .. ..$ : num [1:289, 1:2] 67.6 67.6 67.6 67.5 67.5 ...
  ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
 - attr(*, "sf_column")= chr "geometry"

Code

print(map)

Simple feature collection with 1745 features and 5 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180 ymin: -55.98403 xmax: 180 ymax: 83.10833
Geodetic CRS:  WGS 84
First 10 features:
   GDLcode constant iso_code     country
1  AFGr101    World      AFG Afghanistan
2  AFGr102    World      AFG Afghanistan
3  AFGr103    World      AFG Afghanistan
4  AFGr104    World      AFG Afghanistan
5  AFGr105    World      AFG Afghanistan
6  AFGr106    World      AFG Afghanistan
7  AFGr107    World      AFG Afghanistan
8  AFGr108    World      AFG Afghanistan
9  AGOr201    World      AGO      Angola
10 AGOr202    World      AGO      Angola
                                                region
1  Central (Kabul Wardak Kapisa Logar Parwan Panjsher)
2                  Central Highlands (Bamyan Daikundi)
3             East (Nangarhar Kunar Laghman Nooristan)
4      North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)
5        North East (Baghlan Takhar Badakhshan Kunduz)
6        South (Uruzgan Helmand Zabul Nimroz Kandahar)
7             South East (Ghazni Paktya Paktika Khost)
8                      West (Ghor Herat Badghis Farah)
9                                              Cabinda
10                                               Zaire
                         geometry
1  MULTIPOLYGON (((67.61506 34...
2  MULTIPOLYGON (((65.23611 33...
3  MULTIPOLYGON (((69.92137 34...
4  MULTIPOLYGON (((66.38873 34...
5  MULTIPOLYGON (((67.39591 35...
6  MULTIPOLYGON (((60.89944 29...
7  MULTIPOLYGON (((68.10873 31...
8  MULTIPOLYGON (((61.12394 31...
9  MULTIPOLYGON (((12.21127 -5...
10 MULTIPOLYGON (((13.08792 -7...

Code

# Preparación de datos
# Eliminación de observaciones nacionales (variable level -> National)
df1 <- filter(df, level != 'National')
# Selección del año de análisis, 2021
df1$year <- as.character(df1$year)
df2021 <- filter(df1, year == '2021')
print(df2021)

# A tibble: 1,786 × 37
    ...1 iso_code country year  GDLCODE level region continent  sgdi  shdi shdif
   <dbl> <chr>    <chr>   <chr> <chr>   <chr> <chr>  <chr>     <dbl> <dbl> <dbl>
 1   280 AFG      Afghan… 2021  AFGr101 Subn… Centr… Asia/Pac… 0.734 0.55  0.444
 2   281 AFG      Afghan… 2021  AFGr102 Subn… Centr… Asia/Pac… 0.704 0.472 0.368
 3   282 AFG      Afghan… 2021  AFGr103 Subn… East … Asia/Pac… 0.583 0.459 0.31 
 4   283 AFG      Afghan… 2021  AFGr104 Subn… North… Asia/Pac… 0.749 0.497 0.405
 5   284 AFG      Afghan… 2021  AFGr105 Subn… North… Asia/Pac… 0.667 0.444 0.332
 6   285 AFG      Afghan… 2021  AFGr106 Subn… South… Asia/Pac… 0.563 0.407 0.269
 7   286 AFG      Afghan… 2021  AFGr107 Subn… South… Asia/Pac… 0.567 0.476 0.315
 8   287 AFG      Afghan… 2021  AFGr108 Subn… West … Asia/Pac… 0.692 0.447 0.344
 9   707 AGO      Angola  2021  AGOr201 Subn… Cabin… Africa    0.938 0.681 0.66 
10   708 AGO      Angola  2021  AGOr202 Subn… Zaire  Africa    0.892 0.615 0.581
# ℹ 1,776 more rows
# ℹ 26 more variables: shdim <dbl>, healthindex <dbl>, healthindexf <dbl>,
#   healthindexm <dbl>, incindex <dbl>, incindexf <dbl>, incindexm <dbl>,
#   edindex <dbl>, edindexf <dbl>, edindexm <dbl>, esch <dbl>, eschf <dbl>,
#   eschm <dbl>, msch <dbl>, mschf <dbl>, mschm <dbl>, lifexp <dbl>,
#   lifexpf <dbl>, lifexpm <dbl>, gnic <dbl>, gnicf <dbl>, gnicm <dbl>,
#   lgnic <dbl>, lgnicf <dbl>, lgnicm <dbl>, pop <dbl>

Code

unique(df2021$continent)

[1] "Asia/Pacific" "Africa"       "Europe"       "America"

Code

unique(df2021$country)

  [1] "Afghanistan"                  "Angola"                      
  [3] "Albania"                      "Argentina urban"             
  [5] "Armenia"                      "Australia"                   
  [7] "Austria"                      "Azerbaijan"                  
  [9] "Burundi"                      "Belgium"                     
 [11] "Benin"                        "Burkina Faso"                
 [13] "Bangladesh"                   "Bulgaria"                    
 [15] "Bosnia and Herzegovina"       "Belarus"                     
 [17] "Belize"                       "Bolivia"                     
 [19] "Brazil"                       "Barbados"                    
 [21] "Bhutan"                       "Botswana"                    
 [23] "Central African Republic CAR" "Canada"                      
 [25] "Switzerland"                  "Chili"                       
 [27] "China"                        "Cote d'Ivoire"               
 [29] "Cameroon"                     "Congo Democratic Republic"   
 [31] "Congo Brazzaville"            "Colombia"                    
 [33] "Comoros"                      "Cape Verde"                  
 [35] "Costa Rica"                   "Cuba"                        
 [37] "Czech Republic"               "Germany"                     
 [39] "Djibouti"                     "Denmark"                     
 [41] "Dominican Republic"           "Algeria"                     
 [43] "Ecuador"                      "Egypt"                       
 [45] "Eritrea"                      "Spain"                       
 [47] "Estonia"                      "Ethiopia"                    
 [49] "Finland"                      "Fiji"                        
 [51] "France"                       "Gabon"                       
 [53] "United Kingdom"               "Georgia"                     
 [55] "Ghana"                        "Guinea"                      
 [57] "Gambia"                       "Guinea Bissau"               
 [59] "Equatorial Guinea"            "Greece"                      
 [61] "Guatemala"                    "Guyana"                      
 [63] "Honduras"                     "Croatia"                     
 [65] "Haiti"                        "Hungary"                     
 [67] "Indonesia"                    "India"                       
 [69] "Ireland"                      "Iran"                        
 [71] "Iraq"                         "Italy"                       
 [73] "Jamaica"                      "Jordan"                      
 [75] "Japan"                        "Kazakhstan"                  
 [77] "Kenya"                        "Kyrgyzstan"                  
 [79] "Cambodia"                     "Kiribati"                    
 [81] "South Korea"                  "Kuwait"                      
 [83] "Lao"                          "Lebanon"                     
 [85] "Liberia"                      "Libya"                       
 [87] "Saint Lucia"                  "Lesotho"                     
 [89] "Lithuania"                    "Latvia"                      
 [91] "Morocco"                      "Moldova"                     
 [93] "Madagascar"                   "Maldives"                    
 [95] "Mexico"                       "North Macedonia"             
 [97] "Mali"                         "Myanmar"                     
 [99] "Monte Negro"                  "Mongolia"                    
[101] "Mozambique"                   "Mauritania"                  
[103] "Mauritius"                    "Malawi"                      
[105] "Malaysia"                     "Namibia"                     
[107] "Niger"                        "Nigeria"                     
[109] "Nicaragua"                    "Netherlands"                 
[111] "Norway"                       "Nepal"                       
[113] "New Zealand"                  "Pakistan"                    
[115] "Panama"                       "Peru"                        
[117] "Philippines"                  "Papua New Guinea"            
[119] "Poland"                       "Portugal"                    
[121] "Paraguay"                     "Palestine"                   
[123] "Romania"                      "Russian Federation"          
[125] "Rwanda"                       "Saudi Arabia"                
[127] "Sudan"                        "Senegal"                     
[129] "Sierra Leone"                 "El Salvador"                 
[131] "Somalia"                      "Serbia"                      
[133] "South Sudan"                  "Sao Tome & Principe"         
[135] "Suriname"                     "Slovakia"                    
[137] "Slovenia"                     "Sweden"                      
[139] "Eswatini"                     "Syria"                       
[141] "Chad"                         "Togo"                        
[143] "Thailand"                     "Tajikistan"                  
[145] "Turkmenistan"                 "Timor Leste"                 
[147] "Tonga"                        "Trinidad & Tobago"           
[149] "Tunisia"                      "Turkey"                      
[151] "Tuvalu"                       "Tanzania"                    
[153] "Uganda"                       "Ukraine"                     
[155] "Uruguay"                      "United States"               
[157] "Uzbekistan"                   "Venezuela"                   
[159] "Vietnam"                      "Vanuatu"                     
[161] "Samoa"                        "Kosovo"                      
[163] "Yemen"                        "South Africa"                
[165] "Zambia"                       "Zimbabwe"

Code

# Análisis exploratorio básico (EDA)
# Estadísticos para el año 2021
df2021 %>%
  select(-c(...1,iso_code, year, country, GDLCODE, level, region,continent)) %>%
  summary()

      sgdi             shdi            shdif            shdim       
 Min.   :0.3690   Min.   :0.2320   Min.   :0.1740   Min.   :0.2930  
 1st Qu.:0.9045   1st Qu.:0.5780   1st Qu.:0.5550   1st Qu.:0.6030  
 Median :0.9620   Median :0.7070   Median :0.6820   Median :0.7240  
 Mean   :0.9406   Mean   :0.6935   Mean   :0.6758   Mean   :0.7125  
 3rd Qu.:0.9900   3rd Qu.:0.8110   3rd Qu.:0.8080   3rd Qu.:0.8190  
 Max.   :1.0600   Max.   :0.9890   Max.   :0.9740   Max.   :0.9960  
 NA's   :63       NA's   :2        NA's   :63       NA's   :63      
  healthindex      healthindexf     healthindexm       incindex     
 Min.   :0.4000   Min.   :0.3120   Min.   :0.3900   Min.   :0.2790  
 1st Qu.:0.6920   1st Qu.:0.6920   1st Qu.:0.6945   1st Qu.:0.5560  
 Median :0.7840   Median :0.7980   Median :0.7770   Median :0.6980  
 Mean   :0.7761   Mean   :0.7799   Mean   :0.7743   Mean   :0.6811  
 3rd Qu.:0.8600   3rd Qu.:0.8720   3rd Qu.:0.8540   3rd Qu.:0.8110  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
 NA's   :2        NA's   :11       NA's   :11                       
   incindexf        incindexm         edindex          edindexf     
 Min.   :0.0380   Min.   :0.2910   Min.   :0.0610   Min.   :0.0630  
 1st Qu.:0.4945   1st Qu.:0.5890   1st Qu.:0.4973   1st Qu.:0.4808  
 Median :0.6130   Median :0.7440   Median :0.6620   Median :0.6730  
 Mean   :0.6260   Mean   :0.7164   Mean   :0.6423   Mean   :0.6406  
 3rd Qu.:0.7740   3rd Qu.:0.8445   3rd Qu.:0.7897   3rd Qu.:0.7983  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :0.9940  
 NA's   :15       NA's   :15                        NA's   :58      
    edindexm           esch            eschf            eschm       
 Min.   :0.1100   Min.   : 0.369   Min.   : 1.395   Min.   : 3.332  
 1st Qu.:0.5270   1st Qu.:11.145   1st Qu.:11.085   1st Qu.:11.363  
 Median :0.6630   Median :13.303   Median :13.642   Median :13.187  
 Mean   :0.6555   Mean   :13.052   Mean   :13.261   Mean   :13.065  
 3rd Qu.:0.7873   3rd Qu.:15.343   3rd Qu.:15.706   3rd Qu.:14.998  
 Max.   :0.9930   Max.   :18.000   Max.   :18.000   Max.   :18.000  
 NA's   :58                        NA's   :36       NA's   :36      
      msch            mschf            mschm            lifexp     
 Min.   : 0.330   Min.   : 0.136   Min.   : 0.524   Min.   :45.98  
 1st Qu.: 5.627   1st Qu.: 5.136   1st Qu.: 6.250   1st Qu.:64.98  
 Median : 8.558   Median : 8.521   Median : 8.870   Median :70.99  
 Mean   : 8.393   Mean   : 8.124   Mean   : 8.745   Mean   :70.45  
 3rd Qu.:11.227   3rd Qu.:11.146   3rd Qu.:11.354   3rd Qu.:75.88  
 Max.   :15.000   Max.   :14.981   Max.   :14.843   Max.   :85.61  
                  NA's   :53       NA's   :53       NA's   :2      
    lifexpf         lifexpm           gnic              gnicf         
 Min.   :42.78   Min.   :42.83   Min.   :   632.5   Min.   :   128.2  
 1st Qu.:67.49   1st Qu.:62.64   1st Qu.:  3960.0   1st Qu.:  2637.6  
 Median :74.38   Median :67.99   Median : 10156.4   Median :  5802.6  
 Mean   :73.19   Mean   :67.83   Mean   : 16411.9   Mean   : 12440.0  
 3rd Qu.:79.17   3rd Qu.:73.00   3rd Qu.: 21422.5   3rd Qu.: 16763.3  
 Max.   :88.29   Max.   :84.53   Max.   :209791.9   Max.   :162951.2  
 NA's   :11      NA's   :11                         NA's   :15        
     gnicm              lgnic            lgnicf           lgnicm      
 Min.   :   687.5   Min.   : 6.450   Min.   : 4.853   Min.   : 6.533  
 1st Qu.:  4939.9   1st Qu.: 8.284   1st Qu.: 7.878   1st Qu.: 8.505  
 Median : 13755.2   Median : 9.226   Median : 8.666   Median : 9.529  
 Mean   : 20564.4   Mean   : 9.116   Mean   : 8.750   Mean   : 9.353  
 3rd Qu.: 26757.8   3rd Qu.: 9.973   3rd Qu.: 9.727   3rd Qu.:10.194  
 Max.   :258567.3   Max.   :12.254   Max.   :12.001   Max.   :12.463  
 NA's   :15                          NA's   :15       NA's   :15      
      pop       
 Min.   :161.1  
 1st Qu.:172.5  
 Median :189.8  
 Mean   :247.1  
 3rd Qu.:242.4  
 Max.   :549.3  
 NA's   :1779

Code

# Estadísticos para España en el año 2021
df2021 %>%
  select(-c(...1,iso_code, year, GDLCODE, level, region,continent)) %>%
  filter(country == 'Spain') %>% 
  summary()

   country               sgdi             shdi            shdif       
 Length:19          Min.   :0.9750   Min.   :0.8530   Min.   :0.8490  
 Class :character   1st Qu.:0.9835   1st Qu.:0.8725   1st Qu.:0.8655  
 Mode  :character   Median :0.9850   Median :0.9000   Median :0.8900  
                    Mean   :0.9854   Mean   :0.8943   Mean   :0.8863  
                    3rd Qu.:0.9870   3rd Qu.:0.9110   3rd Qu.:0.9030  
                    Max.   :0.9960   Max.   :0.9400   Max.   :0.9270  
                                                                      
     shdim         healthindex      healthindexf     healthindexm   
 Min.   :0.8560   Min.   :0.9300   Min.   :0.9310   Min.   :0.9290  
 1st Qu.:0.8775   1st Qu.:0.9570   1st Qu.:0.9660   1st Qu.:0.9510  
 Median :0.9050   Median :0.9680   Median :0.9720   Median :0.9630  
 Mean   :0.8996   Mean   :0.9653   Mean   :0.9696   Mean   :0.9614  
 3rd Qu.:0.9175   3rd Qu.:0.9750   3rd Qu.:0.9800   3rd Qu.:0.9700  
 Max.   :0.9510   Max.   :0.9970   Max.   :0.9960   Max.   :0.9930  
                                                                    
    incindex        incindexf        incindexm         edindex      
 Min.   :0.8490   Min.   :0.8190   Min.   :0.8750   Min.   :0.7610  
 1st Qu.:0.8635   1st Qu.:0.8335   1st Qu.:0.8900   1st Qu.:0.7975  
 Median :0.8830   Median :0.8520   Median :0.9100   Median :0.8520  
 Mean   :0.8892   Mean   :0.8584   Mean   :0.9159   Mean   :0.8336  
 3rd Qu.:0.9105   3rd Qu.:0.8790   3rd Qu.:0.9375   3rd Qu.:0.8585  
 Max.   :0.9470   Max.   :0.9150   Max.   :0.9740   Max.   :0.8810  
                                                                    
    edindexf         edindexm           esch           eschf      
 Min.   :0.7660   Min.   :0.7570   Min.   :15.13   Min.   :15.25  
 1st Qu.:0.8085   1st Qu.:0.7870   1st Qu.:17.04   1st Qu.:17.40  
 Median :0.8500   Median :0.8400   Median :17.77   Median :18.00  
 Mean   :0.8371   Mean   :0.8271   Mean   :17.42   Mean   :17.60  
 3rd Qu.:0.8600   3rd Qu.:0.8535   3rd Qu.:18.00   3rd Qu.:18.00  
 Max.   :0.8760   Max.   :0.8880   Max.   :18.00   Max.   :18.00  
                                                                  
     eschm            msch            mschf            mschm       
 Min.   :14.99   Min.   : 9.462   Min.   : 9.399   Min.   : 9.548  
 1st Qu.:16.61   1st Qu.:10.029   1st Qu.: 9.986   1st Qu.:10.159  
 Median :17.34   Median :10.552   Median :10.513   Median :10.548  
 Mean   :17.10   Mean   :10.498   Mean   :10.442   Mean   :10.562  
 3rd Qu.:17.82   3rd Qu.:10.898   3rd Qu.:10.805   3rd Qu.:10.955  
 Max.   :18.00   Max.   :11.444   Max.   :11.290   Max.   :11.634  
                                                                   
     lifexp         lifexpf         lifexpm           gnic      
 Min.   :80.46   Min.   :83.00   Min.   :77.89   Min.   :27583  
 1st Qu.:82.19   1st Qu.:85.28   1st Qu.:79.32   1st Qu.:30418  
 Median :82.93   Median :85.67   Median :80.06   Median :34612  
 Mean   :82.74   Mean   :85.52   Mean   :79.99   Mean   :36767  
 3rd Qu.:83.38   3rd Qu.:86.21   3rd Qu.:80.56   3rd Qu.:41466  
 Max.   :84.81   Max.   :87.25   Max.   :82.04   Max.   :52774  
                                                                
     gnicf           gnicm           lgnic           lgnicf     
 Min.   :22592   Min.   :32745   Min.   :10.22   Min.   :10.03  
 1st Qu.:24867   1st Qu.:36170   1st Qu.:10.32   1st Qu.:10.12  
 Median :28224   Median :41247   Median :10.45   Median :10.25  
 Mean   :29934   Mean   :43875   Mean   :10.49   Mean   :10.29  
 3rd Qu.:33694   3rd Qu.:49565   3rd Qu.:10.63   3rd Qu.:10.43  
 Max.   :42681   Max.   :63336   Max.   :10.87   Max.   :10.66  
                                                                
     lgnicm           pop     
 Min.   :10.40   Min.   : NA  
 1st Qu.:10.50   1st Qu.: NA  
 Median :10.63   Median : NA  
 Mean   :10.67   Mean   :NaN  
 3rd Qu.:10.81   3rd Qu.: NA  
 Max.   :11.06   Max.   : NA  
                 NA's   :19

Code

# Diferencias regionales durante el período 1990-2021 (box plots)
# Comparación entre continentes
df1 %>%
  plot_ly(x = ~shdi, y = ~continent, frame = ~year, color = ~continent, 
          type = 'box') %>%
    layout(xaxis = list(title = "SHDI"),
           yaxis = list(title = "Continente"))

Code

# Comparación entre varios países 1990-2021
subset(df1, country %in% c("Spain", "Portugal", "France")) %>%
  plot_ly(x = ~shdi, y = ~country, frame = ~year, color = ~country, 
          type = 'box') %>%
    layout(xaxis = list(title = "SHDI"),
           yaxis = list(title = "País"))

Code

# Evolución regional dentro de un país 1990-2021 (series temporales)
plot_ly(df1[df1$country == "Spain",], x = ~year, y = ~shdi, 
        color = ~region, type = "scatter", mode = "lines") %>%
    layout(xaxis = list(title = "Año"),
           yaxis = list(title = "SHDI", type = "log"))

Code

# Correlación desarrollo humano-renta per cápita 1990-2021 (scatter plots)
plot_ly(df1, 
        y = ~shdi, 
        x = ~lgnic,
        frame = ~year,
        type = 'scatter', 
        mode = 'markers', 
        color = ~continent, 
        colors = 'Set1') %>%
    layout(xaxis = list(title = "Renta nacional bruta per capita (log)"),
           yaxis = list(title = "SHDI"))

Code

# Análisis exploratorio espacial (ESDA)
# Mapa de distribución espacial del SHDI en 2021
df2021_2 <- df2021 %>% 
  rename(GDLcode = GDLCODE)
gdf2021 <- inner_join(map, df2021_2, by = "GDLcode")
colnames(gdf2021)

 [1] "GDLcode"      "constant"     "iso_code.x"   "country.x"    "region.x"    
 [6] "...1"         "iso_code.y"   "country.y"    "year"         "level"       
[11] "region.y"     "continent"    "sgdi"         "shdi"         "shdif"       
[16] "shdim"        "healthindex"  "healthindexf" "healthindexm" "incindex"    
[21] "incindexf"    "incindexm"    "edindex"      "edindexf"     "edindexm"    
[26] "esch"         "eschf"        "eschm"        "msch"         "mschf"       
[31] "mschm"        "lifexp"       "lifexpf"      "lifexpm"      "gnic"        
[36] "gnicf"        "gnicm"        "lgnic"        "lgnicf"       "lgnicm"      
[41] "pop"          "geometry"

Code

ggplot(gdf2021) +
  geom_sf(aes(fill=shdi)) +
  theme_bw() +
  labs(title = "Distribución espacial del SHDI en 2021") +
  scale_fill_viridis(option="OrRd")

Code

# El SHDI español en 2021
gdf2021Spain <- gdf2021[gdf2021$country.x == "Spain",]
ggplot(gdf2021Spain) + geom_sf(aes(fill=shdi)) +
  theme_bw() +
  labs(title = "Distribución espacial del SHDI español en 2021") +
  scale_fill_viridis(option="OrRd")

Code

# Gráfica sólo con las regiones peninsulares
ggplot(gdf2021Spain_2 <- gdf2021Spain[-c(14,17),]) + geom_sf(aes(fill=shdi)) +
  theme_bw() +
  labs(title = "Distribución espacial del SHDI peninsular español en 2021") +
  scale_fill_viridis(option="OrRd")

Code

# Lectura de librerías
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from matplotlib.ticker import FormatStrFormatter
from matplotlib_scalebar.scalebar import ScaleBar
from pylab import rcParams
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 72
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import chart_studio
import chart_studio.plotly as save2cs
import geopandas as gpd
import warnings
warnings.filterwarnings('ignore')
# Lectura de datos
df = pd.read_csv('data/GDLdbase.csv', index_col=0)
map = gpd.read_file("data/GDLmap.geojson")
map.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1745 entries, 0 to 1744
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   GDLcode   1745 non-null   object  
 1   constant  1745 non-null   object  
 2   iso_code  1742 non-null   object  
 3   country   1742 non-null   object  
 4   region    1742 non-null   object  
 5   geometry  1745 non-null   geometry
dtypes: geometry(1), object(5)
memory usage: 81.9+ KB

Code

map

      GDLcode constant iso_code      country  \
0     AFGr101    World      AFG  Afghanistan   
1     AFGr102    World      AFG  Afghanistan   
2     AFGr103    World      AFG  Afghanistan   
3     AFGr104    World      AFG  Afghanistan   
4     AFGr105    World      AFG  Afghanistan   
...       ...      ...      ...          ...   
1740  ZWEr106    World      ZWE     Zimbabwe   
1741  ZWEr107    World      ZWE     Zimbabwe   
1742  ZWEr108    World      ZWE     Zimbabwe   
1743  ZWEr109    World      ZWE     Zimbabwe   
1744  ZWEr110    World      ZWE     Zimbabwe   

                                                 region  \
0     Central (Kabul Wardak Kapisa Logar Parwan Panj...   
1                   Central Highlands (Bamyan Daikundi)   
2              East (Nangarhar Kunar Laghman Nooristan)   
3       North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)   
4         North East (Baghlan Takhar Badakhshan Kunduz)   
...                                                 ...   
1740                                 Matebeleland South   
1741                                           Midlands   
1742                                           Masvingo   
1743                                             Harare   
1744                                           Bulawayo   

                                               geometry  
0     MULTIPOLYGON (((67.61506 34.23957, 67.59259 34...  
1     MULTIPOLYGON (((65.23611 33.37125, 65.25839 33...  
2     MULTIPOLYGON (((69.92137 34.02534, 69.90879 34...  
3     MULTIPOLYGON (((66.38873 34.93133, 66.33110 34...  
4     MULTIPOLYGON (((67.39591 35.43840, 67.45535 35...  
...                                                 ...  
1740  MULTIPOLYGON (((29.36831 -22.19781, 29.33401 -...  
1741  MULTIPOLYGON (((29.22650 -19.48012, 29.21404 -...  
1742  MULTIPOLYGON (((31.06733 -22.34189, 31.08549 -...  
1743  MULTIPOLYGON (((31.08852 -17.66625, 31.11562 -...  
1744  MULTIPOLYGON (((28.61305 -20.23587, 28.60440 -...  

[1745 rows x 6 columns]

Code

# Preparación de datos
# Eliminación de observaciones nacionales (variable level -> National)
df1 = df[df.level != 'National']
# Selección del año de análisis, 2021
df1["year"]= df1["year"].astype(str)
df2021 = df1[df1['year'] == '2021']
df2021

      iso_code      country  year  GDLCODE   level  \
280        AFG  Afghanistan  2021  AFGr101  Subnat   
281        AFG  Afghanistan  2021  AFGr102  Subnat   
282        AFG  Afghanistan  2021  AFGr103  Subnat   
283        AFG  Afghanistan  2021  AFGr104  Subnat   
284        AFG  Afghanistan  2021  AFGr105  Subnat   
...        ...          ...   ...      ...     ...   
58879      ZWE     Zimbabwe  2021  ZWEr106  Subnat   
58880      ZWE     Zimbabwe  2021  ZWEr107  Subnat   
58881      ZWE     Zimbabwe  2021  ZWEr108  Subnat   
58882      ZWE     Zimbabwe  2021  ZWEr109  Subnat   
58883      ZWE     Zimbabwe  2021  ZWEr110  Subnat   

                                                  region     continent   sgdi  \
280    Central (Kabul Wardak Kapisa Logar Parwan Panj...  Asia/Pacific  0.734   
281                  Central Highlands (Bamyan Daikundi)  Asia/Pacific  0.704   
282             East (Nangarhar Kunar Laghman Nooristan)  Asia/Pacific  0.583   
283      North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)  Asia/Pacific  0.749   
284        North East (Baghlan Takhar Badakhshan Kunduz)  Asia/Pacific  0.667   
...                                                  ...           ...    ...   
58879                                 Matebeleland South        Africa  0.985   
58880                                           Midlands        Africa  0.958   
58881                                           Masvingo        Africa  0.962   
58882                                             Harare        Africa  0.962   
58883                                           Bulawayo        Africa  0.985   

        shdi  shdif  shdim  healthindex  healthindexf  healthindexm  incindex  \
280    0.550  0.444  0.605        0.675         0.694         0.664     0.471   
281    0.472  0.368  0.522        0.653         0.667         0.643     0.396   
282    0.459  0.310  0.532        0.638         0.648         0.630     0.394   
283    0.497  0.405  0.541        0.628         0.636         0.620     0.464   
284    0.444  0.332  0.498        0.623         0.629         0.615     0.416   
...      ...    ...    ...          ...           ...           ...       ...   
58879  0.585  0.581  0.590        0.665         0.681         0.652     0.530   
58880  0.590  0.577  0.602        0.593         0.595         0.586     0.544   
58881  0.588  0.578  0.601        0.650         0.664         0.639     0.520   
58882  0.665  0.652  0.678        0.619         0.627         0.610     0.648   
58883  0.693  0.689  0.699        0.656         0.670         0.644     0.688   

       incindexf  incindexm  edindex  edindexf  edindexm    esch   eschf  \
280        0.280      0.553    0.524     0.451     0.604  12.898  10.399   
281        0.217      0.472    0.408     0.343     0.468  12.447  10.958   
282        0.215      0.470    0.385     0.214     0.510   9.858   6.043   
283        0.274      0.546    0.420     0.382     0.467  11.456  10.004   
284        0.234      0.494    0.338     0.249     0.407   9.288   7.202   
...          ...        ...      ...       ...       ...     ...     ...   
58879      0.508      0.551    0.566     0.566     0.570  10.989  11.145   
58880      0.522      0.566    0.635     0.618     0.659  12.668  12.575   
58881      0.498      0.541    0.602     0.584     0.628  12.058  12.013   
58882      0.624      0.672    0.732     0.707     0.759  13.156  12.470   
58883      0.663      0.712    0.739     0.735     0.746  14.115  14.173   

        eschm    msch   mschf   mschm  lifexp  lifexpf  lifexpm      gnic  \
280    15.407   4.975   4.851   5.281  63.843   67.633   60.672  2264.832   
281    14.155   1.857   1.168   2.256  62.415   65.829   59.326  1374.650   
282    13.270   3.325   1.397   4.232  61.471   64.627   58.426  1355.298   
283    12.857   3.058   3.124   3.305  60.843   63.825   57.822  2159.844   
284    11.235   2.390   1.473   2.842  60.500   63.384   57.490  1571.877   
...       ...     ...     ...     ...     ...      ...      ...       ...   
58879  10.863   7.834   7.697   8.044  63.246   66.784   59.907  3339.052   
58880  12.858   8.492   8.073   9.045  58.544   61.193   55.566  3670.709   
58881  12.184   8.013   7.504   8.685  62.276   65.641   59.025  3127.485   
58882  13.981  10.993  10.825  11.114  60.261   63.250   57.171  7314.452   
58883  14.218  10.405  10.246  10.541  62.628   66.056   59.346  9516.362   

          gnicf      gnicm  lgnic  lgnicf  lgnicm  pop  
280     638.644   3894.162  7.725   6.459   8.267  NaN  
281     420.676   2282.217  7.226   6.042   7.733  NaN  
282     415.719   2247.852  7.212   6.030   7.718  NaN  
283     613.795   3701.300  7.678   6.420   8.216  NaN  
284     470.580   2634.320  7.360   6.154   7.876  NaN  
...         ...        ...    ...     ...     ...  ...  
58879  2886.675   3844.591  8.113   7.968   8.254  NaN  
58880  3168.011   4233.422  8.208   8.061   8.351  NaN  
58881  2706.948   3596.899  8.048   7.904   8.188  NaN  
58882  6235.132   8537.414  8.898   8.738   9.052  NaN  
58883  8073.914  11158.386  9.161   8.996   9.320  NaN  

[1786 rows x 36 columns]

Code

df2021['continent'].unique()

array(['Asia/Pacific', 'Africa', 'Europe', 'America'], dtype=object)

Code

df2021['country'].unique()

array(['Afghanistan', 'Angola', 'Albania', 'Argentina urban', 'Armenia',
       'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium',
       'Benin', 'Burkina Faso', 'Bangladesh', 'Bulgaria',
       'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bolivia', 'Brazil',
       'Barbados', 'Bhutan', 'Botswana', 'Central African Republic CAR',
       'Canada', 'Switzerland', 'Chili', 'China', "Cote d'Ivoire",
       'Cameroon', 'Congo Democratic Republic', 'Congo Brazzaville',
       'Colombia', 'Comoros', 'Cape Verde', 'Costa Rica', 'Cuba',
       'Czech Republic', 'Germany', 'Djibouti', 'Denmark',
       'Dominican Republic', 'Algeria', 'Ecuador', 'Egypt', 'Eritrea',
       'Spain', 'Estonia', 'Ethiopia', 'Finland', 'Fiji', 'France',
       'Gabon', 'United Kingdom', 'Georgia', 'Ghana', 'Guinea', 'Gambia',
       'Guinea Bissau', 'Equatorial Guinea', 'Greece', 'Guatemala',
       'Guyana', 'Honduras', 'Croatia', 'Haiti', 'Hungary', 'Indonesia',
       'India', 'Ireland', 'Iran', 'Iraq', 'Italy', 'Jamaica', 'Jordan',
       'Japan', 'Kazakhstan', 'Kenya', 'Kyrgyzstan', 'Cambodia',
       'Kiribati', 'South Korea', 'Kuwait', 'Lao', 'Lebanon', 'Liberia',
       'Libya', 'Saint Lucia', 'Lesotho', 'Lithuania', 'Latvia',
       'Morocco', 'Moldova', 'Madagascar', 'Maldives', 'Mexico',
       'North Macedonia', 'Mali', 'Myanmar', 'Monte Negro', 'Mongolia',
       'Mozambique', 'Mauritania', 'Mauritius', 'Malawi', 'Malaysia',
       'Namibia', 'Niger', 'Nigeria', 'Nicaragua', 'Netherlands',
       'Norway', 'Nepal', 'New Zealand', 'Pakistan', 'Panama', 'Peru',
       'Philippines', 'Papua New Guinea', 'Poland', 'Portugal',
       'Paraguay', 'Palestine', 'Romania', 'Russian Federation', 'Rwanda',
       'Saudi Arabia', 'Sudan', 'Senegal', 'Sierra Leone', 'El Salvador',
       'Somalia', 'Serbia', 'South Sudan', 'Sao Tome & Principe',
       'Suriname', 'Slovakia', 'Slovenia', 'Sweden', 'Eswatini', 'Syria',
       'Chad', 'Togo', 'Thailand', 'Tajikistan', 'Turkmenistan',
       'Timor Leste', 'Tonga', 'Trinidad & Tobago', 'Tunisia', 'Turkey',
       'Tuvalu', 'Tanzania', 'Uganda', 'Ukraine', 'Uruguay',
       'United States', 'Uzbekistan', 'Venezuela', 'Vietnam', 'Vanuatu',
       'Samoa', 'Kosovo', 'Yemen', 'South Africa', 'Zambia', 'Zimbabwe'],
      dtype=object)

Code

# Análisis exploratorio básico (EDA)
# Estadísticos para el año 2021
df2021.describe().round(3)

           sgdi      shdi     shdif     shdim  healthindex  healthindexf  \
count  1723.000  1784.000  1723.000  1723.000     1784.000      1775.000   
mean      0.941     0.693     0.676     0.712        0.776         0.780   
std       0.071     0.158     0.167     0.147        0.119         0.125   
min       0.369     0.232     0.174     0.293        0.400         0.312   
25%       0.905     0.578     0.555     0.603        0.692         0.692   
50%       0.962     0.707     0.682     0.724        0.784         0.798   
75%       0.990     0.811     0.808     0.819        0.860         0.872   
max       1.060     0.989     0.974     0.996        1.000         1.000   

       healthindexm  incindex  incindexf  incindexm   edindex  edindexf  \
count      1775.000  1786.000   1771.000   1771.000  1786.000  1728.000   
mean          0.774     0.681      0.626      0.716     0.642     0.641   
std           0.117     0.174      0.185      0.175     0.191     0.204   
min           0.390     0.279      0.038      0.291     0.061     0.063   
25%           0.694     0.556      0.494      0.589     0.497     0.481   
50%           0.777     0.698      0.613      0.744     0.662     0.673   
75%           0.854     0.811      0.774      0.844     0.790     0.798   
max           1.000     1.000      1.000      1.000     1.000     0.994   

       edindexm      esch     eschf     eschm      msch     mschf     mschm  \
count  1728.000  1786.000  1750.000  1750.000  1786.000  1733.000  1733.000   
mean      0.656    13.052    13.261    13.065     8.393     8.124     8.745   
std       0.170     3.163     3.251     2.736     3.447     3.725     3.172   
min       0.110     0.369     1.395     3.332     0.330     0.136     0.524   
25%       0.527    11.145    11.086    11.363     5.626     5.136     6.250   
50%       0.663    13.303    13.642    13.187     8.558     8.521     8.870   
75%       0.787    15.343    15.706    14.998    11.227    11.146    11.354   
max       0.993    18.000    18.000    18.000    15.000    14.981    14.843   

         lifexp   lifexpf   lifexpm        gnic       gnicf       gnicm  \
count  1784.000  1775.000  1775.000    1786.000    1771.000    1771.000   
mean     70.449    73.194    67.835   16411.868   12440.010   20564.405   
std       7.751     8.146     7.593   18232.444   14887.574   21984.616   
min      45.978    42.782    42.830     632.458     128.186     687.484   
25%      64.985    67.494    62.640    3960.012    2637.606    4939.909   
50%      70.986    74.375    67.990   10156.434    5802.553   13755.222   
75%      75.882    79.173    72.997   21422.519   16763.350   26757.780   
max      85.608    88.290    84.525  209791.914  162951.184  258567.313   

          lgnic    lgnicf    lgnicm      pop  
count  1786.000  1771.000  1771.000    7.000  
mean      9.116     8.750     9.353  247.140  
std       1.154     1.226     1.166  137.766  
min       6.450     4.853     6.533  161.098  
25%       8.284     7.877     8.505  172.455  
50%       9.226     8.666     9.529  189.798  
75%       9.972     9.727    10.194  242.433  
max      12.254    12.001    12.463  549.310

Code

# Estadísticos para España en el año 2021
df2021.query("country == 'Spain'").describe().round(3)

         sgdi    shdi   shdif   shdim  healthindex  healthindexf  \
count  19.000  19.000  19.000  19.000       19.000        19.000   
mean    0.985   0.894   0.886   0.900        0.965         0.970   
std     0.004   0.026   0.024   0.027        0.017         0.017   
min     0.975   0.853   0.849   0.856        0.930         0.931   
25%     0.984   0.872   0.865   0.878        0.957         0.966   
50%     0.985   0.900   0.890   0.905        0.968         0.972   
75%     0.987   0.911   0.903   0.918        0.975         0.980   
max     0.996   0.940   0.927   0.951        0.997         0.996   

       healthindexm  incindex  incindexf  incindexm  edindex  edindexf  \
count        19.000    19.000     19.000     19.000   19.000    19.000   
mean          0.961     0.889      0.858      0.916    0.834     0.837   
std           0.016     0.031      0.030      0.031    0.037     0.033   
min           0.929     0.849      0.819      0.875    0.761     0.766   
25%           0.951     0.863      0.833      0.890    0.798     0.808   
50%           0.963     0.883      0.852      0.910    0.852     0.850   
75%           0.970     0.911      0.879      0.938    0.858     0.860   
max           0.993     0.947      0.915      0.974    0.881     0.876   

       edindexm    esch   eschf   eschm    msch   mschf   mschm  lifexp  \
count    19.000  19.000  19.000  19.000  19.000  19.000  19.000  19.000   
mean      0.827  17.415  17.605  17.097  10.498  10.442  10.562  82.744   
std       0.040   0.823   0.733   0.896   0.570   0.540   0.604   1.102   
min       0.757  15.127  15.253  14.988   9.462   9.399   9.548  80.461   
25%       0.787  17.038  17.402  16.614  10.029   9.986  10.160  82.190   
50%       0.840  17.773  18.000  17.341  10.552  10.513  10.548  82.932   
75%       0.853  18.000  18.000  17.820  10.898  10.805  10.955  83.376   
max       0.888  18.000  18.000  18.000  11.444  11.290  11.634  84.810   

       lifexpf  lifexpm       gnic      gnicf      gnicm   lgnic  lgnicf  \
count   19.000   19.000     19.000     19.000     19.000  19.000  19.000   
mean    85.525   79.993  36767.182  29934.452  43874.659  10.492  10.287   
std      1.120    1.051   7792.902   6216.995   9460.374   0.204   0.201   
min     82.999   77.891  27582.996  22592.294  32745.203  10.225  10.025   
25%     85.275   79.324  30417.830  24866.672  36169.910  10.323  10.121   
50%     85.670   80.065  34612.370  28224.467  41246.942  10.452  10.248   
75%     86.214   80.560  41466.415  33694.492  49564.842  10.632  10.425   
max     87.253   82.042  52773.822  42681.253  63336.277  10.874  10.662   

       lgnicm  pop  
count  19.000  0.0  
mean   10.668  NaN  
std     0.208  NaN  
min    10.397  NaN  
25%    10.496  NaN  
50%    10.627  NaN  
75%    10.811  NaN  
max    11.056  NaN

Code

# Diferencias regionales durante el período 1990-2021 (box plots)
# Comparación entre continentes
px.box(df1,
         x = 'shdi',
         y = 'continent',
         range_x= [0, 1], 
         color = 'continent', 
         hover_name= 'region',  
         hover_data = ['country'],
         animation_frame= 'year',
         labels=dict(continent = "Continente",
                     shdi ="SHDI")
         )

Code

# Comparación entre países
px.box(df1[df1['country'].isin(['Spain', 'Portugal', 'France'])],
         x = 'shdi',
         y = 'country',
         range_x= [0.70, 1],
         color = 'country', 
         hover_name= 'region',  
         hover_data = ['country'],
         animation_frame= 'year',
         labels=dict(country = "País",
                     shdi ="SHDI")
         )

Code

# Evolución regional dentro de un país 1990-2021 (series temporales)
fig = px.line(
    df1[df1['country'].isin(['Spain'])],
    x="year",
    y="shdi",
    log_y= True,
    color="region",
    hover_name="country",
    hover_data= ['country'],
    labels=dict(shdi="SHDI",
                year = "Año"),
    facet_col="country",
    facet_col_wrap = 2,
    facet_row_spacing = 0.01,
    height= 500
    )
fig.update_layout(showlegend=False)

Code

# Correlación desarrollo humano-renta per cápita 1990-2021 (scatter plots)
px.scatter(df1,
            y = "shdi", 
            x = "lgnic",
            range_y= [0, 1],
            range_x= [5, 13],  
            hover_name = "region",
            hover_data= ['country'], 
            color = "continent", 
            #size = "pop", size_max = 60,
            trendline= 'ols',
            animation_frame= 'year',
            labels=dict(country = "País",
                        continent = "Continente",
                        lgnic ="Renta nacional bruta per capita (log)",
                        shdi="SHDI")
            )

Code

# Análisis exploratorio espacial (ESDA)
# Mapa de distribución espacial del SHDI en 2021
df2021_2 = df2021.copy()   
df2021_2 = df2021_2.rename(columns = {"GDLCODE": "GDLcode"})
gdf2021 = map.merge(df2021_2, on = 'GDLcode', how = 'left')
gdf2021.columns

Index(['GDLcode', 'constant', 'iso_code_x', 'country_x', 'region_x',
       'geometry', 'iso_code_y', 'country_y', 'year', 'level', 'region_y',
       'continent', 'sgdi', 'shdi', 'shdif', 'shdim', 'healthindex',
       'healthindexf', 'healthindexm', 'incindex', 'incindexf', 'incindexm',
       'edindex', 'edindexf', 'edindexm', 'esch', 'eschf', 'eschm', 'msch',
       'mschf', 'mschm', 'lifexp', 'lifexpf', 'lifexpm', 'gnic', 'gnicf',
       'gnicm', 'lgnic', 'lgnicf', 'lgnicm', 'pop'],
      dtype='object')

Code

gdf2021

      GDLcode constant iso_code_x    country_x  \
0     AFGr101    World        AFG  Afghanistan   
1     AFGr102    World        AFG  Afghanistan   
2     AFGr103    World        AFG  Afghanistan   
3     AFGr104    World        AFG  Afghanistan   
4     AFGr105    World        AFG  Afghanistan   
...       ...      ...        ...          ...   
1740  ZWEr106    World        ZWE     Zimbabwe   
1741  ZWEr107    World        ZWE     Zimbabwe   
1742  ZWEr108    World        ZWE     Zimbabwe   
1743  ZWEr109    World        ZWE     Zimbabwe   
1744  ZWEr110    World        ZWE     Zimbabwe   

                                               region_x  \
0     Central (Kabul Wardak Kapisa Logar Parwan Panj...   
1                   Central Highlands (Bamyan Daikundi)   
2              East (Nangarhar Kunar Laghman Nooristan)   
3       North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)   
4         North East (Baghlan Takhar Badakhshan Kunduz)   
...                                                 ...   
1740                                 Matebeleland South   
1741                                           Midlands   
1742                                           Masvingo   
1743                                             Harare   
1744                                           Bulawayo   

                                               geometry iso_code_y  \
0     MULTIPOLYGON (((67.61506 34.23957, 67.59259 34...        AFG   
1     MULTIPOLYGON (((65.23611 33.37125, 65.25839 33...        AFG   
2     MULTIPOLYGON (((69.92137 34.02534, 69.90879 34...        AFG   
3     MULTIPOLYGON (((66.38873 34.93133, 66.33110 34...        AFG   
4     MULTIPOLYGON (((67.39591 35.43840, 67.45535 35...        AFG   
...                                                 ...        ...   
1740  MULTIPOLYGON (((29.36831 -22.19781, 29.33401 -...        ZWE   
1741  MULTIPOLYGON (((29.22650 -19.48012, 29.21404 -...        ZWE   
1742  MULTIPOLYGON (((31.06733 -22.34189, 31.08549 -...        ZWE   
1743  MULTIPOLYGON (((31.08852 -17.66625, 31.11562 -...        ZWE   
1744  MULTIPOLYGON (((28.61305 -20.23587, 28.60440 -...        ZWE   

        country_y  year   level  \
0     Afghanistan  2021  Subnat   
1     Afghanistan  2021  Subnat   
2     Afghanistan  2021  Subnat   
3     Afghanistan  2021  Subnat   
4     Afghanistan  2021  Subnat   
...           ...   ...     ...   
1740     Zimbabwe  2021  Subnat   
1741     Zimbabwe  2021  Subnat   
1742     Zimbabwe  2021  Subnat   
1743     Zimbabwe  2021  Subnat   
1744     Zimbabwe  2021  Subnat   

                                               region_y     continent   sgdi  \
0     Central (Kabul Wardak Kapisa Logar Parwan Panj...  Asia/Pacific  0.734   
1                   Central Highlands (Bamyan Daikundi)  Asia/Pacific  0.704   
2              East (Nangarhar Kunar Laghman Nooristan)  Asia/Pacific  0.583   
3       North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)  Asia/Pacific  0.749   
4         North East (Baghlan Takhar Badakhshan Kunduz)  Asia/Pacific  0.667   
...                                                 ...           ...    ...   
1740                                 Matebeleland South        Africa  0.985   
1741                                           Midlands        Africa  0.958   
1742                                           Masvingo        Africa  0.962   
1743                                             Harare        Africa  0.962   
1744                                           Bulawayo        Africa  0.985   

       shdi  shdif  shdim  healthindex  healthindexf  healthindexm  incindex  \
0     0.550  0.444  0.605        0.675         0.694         0.664     0.471   
1     0.472  0.368  0.522        0.653         0.667         0.643     0.396   
2     0.459  0.310  0.532        0.638         0.648         0.630     0.394   
3     0.497  0.405  0.541        0.628         0.636         0.620     0.464   
4     0.444  0.332  0.498        0.623         0.629         0.615     0.416   
...     ...    ...    ...          ...           ...           ...       ...   
1740  0.585  0.581  0.590        0.665         0.681         0.652     0.530   
1741  0.590  0.577  0.602        0.593         0.595         0.586     0.544   
1742  0.588  0.578  0.601        0.650         0.664         0.639     0.520   
1743  0.665  0.652  0.678        0.619         0.627         0.610     0.648   
1744  0.693  0.689  0.699        0.656         0.670         0.644     0.688   

      incindexf  incindexm  edindex  edindexf  edindexm    esch   eschf  \
0         0.280      0.553    0.524     0.451     0.604  12.898  10.399   
1         0.217      0.472    0.408     0.343     0.468  12.447  10.958   
2         0.215      0.470    0.385     0.214     0.510   9.858   6.043   
3         0.274      0.546    0.420     0.382     0.467  11.456  10.004   
4         0.234      0.494    0.338     0.249     0.407   9.288   7.202   
...         ...        ...      ...       ...       ...     ...     ...   
1740      0.508      0.551    0.566     0.566     0.570  10.989  11.145   
1741      0.522      0.566    0.635     0.618     0.659  12.668  12.575   
1742      0.498      0.541    0.602     0.584     0.628  12.058  12.013   
1743      0.624      0.672    0.732     0.707     0.759  13.156  12.470   
1744      0.663      0.712    0.739     0.735     0.746  14.115  14.173   

       eschm    msch   mschf   mschm  lifexp  lifexpf  lifexpm      gnic  \
0     15.407   4.975   4.851   5.281  63.843   67.633   60.672  2264.832   
1     14.155   1.857   1.168   2.256  62.415   65.829   59.326  1374.650   
2     13.270   3.325   1.397   4.232  61.471   64.627   58.426  1355.298   
3     12.857   3.058   3.124   3.305  60.843   63.825   57.822  2159.844   
4     11.235   2.390   1.473   2.842  60.500   63.384   57.490  1571.877   
...      ...     ...     ...     ...     ...      ...      ...       ...   
1740  10.863   7.834   7.697   8.044  63.246   66.784   59.907  3339.052   
1741  12.858   8.492   8.073   9.045  58.544   61.193   55.566  3670.709   
1742  12.184   8.013   7.504   8.685  62.276   65.641   59.025  3127.485   
1743  13.981  10.993  10.825  11.114  60.261   63.250   57.171  7314.452   
1744  14.218  10.405  10.246  10.541  62.628   66.056   59.346  9516.362   

         gnicf      gnicm  lgnic  lgnicf  lgnicm  pop  
0      638.644   3894.162  7.725   6.459   8.267  NaN  
1      420.676   2282.217  7.226   6.042   7.733  NaN  
2      415.719   2247.852  7.212   6.030   7.718  NaN  
3      613.795   3701.300  7.678   6.420   8.216  NaN  
4      470.580   2634.320  7.360   6.154   7.876  NaN  
...        ...        ...    ...     ...     ...  ...  
1740  2886.675   3844.591  8.113   7.968   8.254  NaN  
1741  3168.011   4233.422  8.208   8.061   8.351  NaN  
1742  2706.948   3596.899  8.048   7.904   8.188  NaN  
1743  6235.132   8537.414  8.898   8.738   9.052  NaN  
1744  8073.914  11158.386  9.161   8.996   9.320  NaN  

[1745 rows x 41 columns]

Code

fig, ax = plt.subplots(1,1)
gdf2021.plot(column="shdi",
           legend=True,
           ax=ax)
plt.title('Distribución espacial del SHDI en 2021')
plt.tight_layout()
plt.show()

Code

# El SHDI español en 2021
gdf2021Spain = gdf2021.query("country_x =='Spain'").reset_index(drop=True)
fig, ax = plt.subplots(1,1)
gdf2021Spain.plot(column="shdi",
           legend=True,
           ax=ax)
plt.title('Distribución del SHDI español en 2021')
plt.tight_layout()
plt.show()

Code

# Gráfica sólo con las regiones peninsulares
gdf2021Spain_2 = gdf2021Spain.drop(labels=[13,16])
fig, ax = plt.subplots(1,1)
gdf2021Spain_2.plot(column="shdi",
           legend=True,
           ax=ax)
plt.title('Distribución del SHDI peninsular español en 2021')
plt.tight_layout()
plt.show()