Aplicación 1.4: Datos georreferenciados
Datos espacio-temporales: Emisiones de CO2 al nivel mundial
En esta aplicación se analizarán los datos de la publicación de 2022 del Global Carbon Project sobre emisiones de gases de efecto invernadero al nivel mundial y nacional, con el objetivo de encontrar los países más contaminantes, tanto en términos de emisiones de CO2 totales como en forma per capita. Pueden encontrarse los datos e información sobre los mismos en la siguiente dirección: https://zenodo.org/record/7215364.
Igual que en la aplicación anterior, previamente al análisis estadístico deseado se usarán varias técnicas de data wrangling para “preparar” los datos para dicho estudio antes de su representación gráfica.
Code
# Lectura de librerías
library(tidyverse)
library(plotly)
library(sf)
library(geojsonsf)
library(leaflet)
library(viridis)
library(RColorBrewer)
# Lectura de datos
<- read_csv("data/GCPdbase.csv")
df # Reemplazar "USA" por "United States of America"
$Country <-
dfifelse(df$Country == "USA", "United States of America",
$Country)
df# Eliminar los registros "Global" y "International Transport"
# de la lista de países (variable Country)
<- df[!(df$Country %in% c("Global", "International Transport")),]
df # Restringir período temporal al conjunto 1970-2021
<-
df1 %>% filter(Year >= 1970 & Year <= 2021)
df # Top 10 de emisiones totales medias 1970-2021
<-
top_10_emisiones_total %>%
df1 group_by(Country) %>%
summarise(mean_total = mean(Total)) %>%
top_n(10, mean_total)
arrange(top_10_emisiones_total, desc(mean_total))
# A tibble: 10 × 2
Country mean_total
<chr> <dbl>
1 United States of America 5229.
2 China 4602.
3 Russia 1806.
4 Japan 1100.
5 India 1018.
6 Germany 933.
7 United Kingdom 544.
8 Canada 492.
9 Ukraine 448.
10 France 411.
Code
# Gráfico de áreas
<- df1 %>%
datos_top_10_emisiones_total filter(Country %in% top_10_emisiones_total$Country)
ggplot(datos_top_10_emisiones_total, aes(x=Year, y=Total, fill=Country)) +
geom_area()
Code
# Restringir período temporal al año 2021
<- df %>% filter(Year == 2021)
df2 # Top 5 de emisiones totales en 2021 y % sobre el total
<- df2 %>%
top5_emisiones_2021 select(Country, Total) %>%
mutate(Percent = round(Total/sum(Total), digits = 3)) %>%
top_n(5, Percent)%>%
arrange(desc(Percent)) %>%
mutate(Lab_percent = scales::percent(Percent))
top5_emisiones_2021
# A tibble: 5 × 4
Country Total Percent Lab_percent
<chr> <dbl> <dbl> <chr>
1 China 11472. 0.318 31.8%
2 United States of America 5007. 0.139 13.9%
3 India 2710. 0.075 7.5%
4 Russia 1756. 0.049 4.9%
5 Japan 1067. 0.03 3.0%
Code
# Diagrama de sectores
ggplot(top5_emisiones_2021, aes(x = "", y = Percent, fill = Country)) +
geom_col() +
geom_text(aes(label = Lab_percent),
position = position_stack(vjust = 0.5)) +
coord_polar(theta = "y")
Code
# Top 10 de emisiones per capita medias 1970-2021
<- df1 %>%
top_10_emisiones_per_capita group_by(Country) %>%
summarise(mean_per_capita = mean(`Per Capita`)) %>%
top_n(10, mean_per_capita)
arrange(top_10_emisiones_per_capita, desc(mean_per_capita))
# A tibble: 10 × 2
Country mean_per_capita
<chr> <dbl>
1 Sint Maarten (Dutch part) 52.3
2 Qatar 47.9
3 Curaçao 40.4
4 United Arab Emirates 32.9
5 Luxembourg 25.2
6 Brunei Darussalam 24.3
7 Kuwait 23.5
8 Bahrain 22.8
9 United States of America 19.8
10 Trinidad and Tobago 19.5
Code
# Gráfico de líneas
<- df1 %>%
datos_top_10_emisiones_per_capita filter(Country %in% top_10_emisiones_per_capita$Country)
ggplot(datos_top_10_emisiones_per_capita,
aes(x=Year, y=`Per Capita`, group=Country, color=Country)) +
geom_line()
Code
# Mapa de distribución mundial de emisiones per capita medias
<- df1 %>%
datos_emisiones_per_capita_medias group_by(`Country`) %>%
summarize(CO2pc = mean(`Per Capita`))
<- geojson_sf("data/GCPmap.geojson")
map class(map)
[1] "sf" "data.frame"
Code
str(map)
Classes 'sf' and 'data.frame': 177 obs. of 2 variables:
$ name : chr "Afghanistan" "Angola" "Albania" "United Arab Emirates" ...
$ geometry:sfc_GEOMETRY of length 177; first list element: List of 1
..$ : num [1:69, 1:2] 61.2 60.8 60.5 61 60.5 ...
..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
- attr(*, "sf_column")= chr "geometry"
Code
<- datos_emisiones_per_capita_medias %>%
datos_map rename(name = Country)
<- inner_join(map, datos_map, by = "name")
datos_geo ggplot(datos_geo) +
geom_sf(aes(fill=CO2pc)) +
theme_bw() +
labs(title = "Emisiones de CO2 per capita medias") +
scale_fill_viridis(option="magma")
Code
# Mapa interactivo
# Librería leaflet de R:
# https://rstudio.github.io/leaflet/
# Paletas de colores según el tipo de variables:
# palNumeric <- colorNumeric("viridis", domain = shape$vcontinua)
# palBin <- colorBin("magma", domain = shape$vdiscreta, bins = 4)
# palQuantile <- colorQuantile("Spectral", domain = shape$numerica, n=4)
# palFactor <- colorFactor("RdBu", domain = shape$vcategorica)
<- colorNumeric("YlGnBu", domain = datos_geo$CO2pc)
palNumeric leaflet(datos_geo) %>% setView(0, 0, zoom = 2) %>% addTiles() %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(color = "#444444" ,
weight = 1,
smoothFactor = 0.5,
opacity = 1.0,
fillOpacity = 0.7,
fillColor = ~palNumeric(datos_geo$CO2pc),
layerId = ~datos_geo$name,
highlightOptions = highlightOptions(
color = "white", weight = 1,
bringToFront = TRUE))
Code
# Lectura de librerías
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
'ggplot')
plt.style.use(import plotly.graph_objects as go
import geopandas as gpd
import folium
# Lectura de datos
= pd.read_csv("data/GCPdbase.csv")
df # Reemplazar "USA" por "United States of America"
"Country"] = df["Country"].replace("USA", "United States of America")
df[# Eliminar los registros "Global" y "International Transport"
# de la lista de países (variable Country)
= df[~df["Country"].isin(["Global", "International Transport"])]
df # Restringir período temporal al conjunto 1970-2021
= df[(df["Year"] >= 1970) & (df["Year"] <= 2021)]
df1 # Top 10 de emisiones totales medias por país
= df1.groupby("Country")["Total"].mean().nlargest(10).index
top_10_emisiones_total top_10_emisiones_total
Index(['United States of America', 'China', 'Russia', 'Japan', 'India',
'Germany', 'United Kingdom', 'Canada', 'Ukraine', 'France'],
dtype='object', name='Country')
Code
= df1[df1["Country"].isin(top_10_emisiones_total)]
datos_top_10_emisiones_total =datos_top_10_emisiones_total.pivot_table(
pivot_data_total ="Total",
values="Year",
index="Country",
columns="sum",
aggfunc=0)
fill_value# Gráfica de áreas
= go.Figure(layout=go.Layout(
fig_area ="Top 10 por emisiones de CO2 totales (1970-2021)",
title="Años",
xaxis_title="CO2"))
yaxis_title# Se insertan los datos
for country in pivot_data_total.columns:
=pivot_data_total.index,
fig_area.add_trace(go.Scatter(x=pivot_data_total[country],
y=country,
name="none",
mode="one")) stackgroup
Code
# Restringir período temporal al año 2021
= df[(df["Year"] == 2021)]
df2 # Top 10 de emisiones totales medias por país
= df2[["Country", "Total"]]
df2 # Calculating Percentage
"Percent"] = (df2["Total"] / df2["Total"].sum())
df2[= df2.nlargest(5, "Percent")
top5_emisiones_2021 top5_emisiones_2021
Country Total Percent
10879 China 11472.369171 0.317776
60111 United States of America 5007.335889 0.138699
25567 India 2709.683625 0.075056
47327 Russia 1755.547390 0.048627
27743 Japan 1067.398435 0.029566
Code
# Diagrama de sectores
= plt.subplots()
fig, ax "Percent"], labels = top5_emisiones_2021["Country"], autopct = '%1.1f%%', normalize=False) ax.pie(top5_emisiones_2021[
([<matplotlib.patches.Wedge object at 0x12f659a50>, <matplotlib.patches.Wedge object at 0x12f6591e0>, <matplotlib.patches.Wedge object at 0x12f659750>, <matplotlib.patches.Wedge object at 0x12f659990>, <matplotlib.patches.Wedge object at 0x12f65a8c0>], [Text(0.5958851634949567, 0.9246193118935971, 'China'), Text(-0.8347624034749628, 0.716360055938844, 'United States of America'), Text(-1.099219206463291, 0.04143834145103926, 'India'), Text(-1.0329766146381076, -0.37809960805427256, 'Russia'), Text(-0.9100161313664236, -0.61795682749921, 'Japan')], [Text(0.32502827099724907, 0.5043378064874166, '31.8%'), Text(-0.45532494734997964, 0.3907418486939149, '13.9%'), Text(-0.5995741126163405, 0.022602731700566866, '7.5%'), Text(-0.5634417898026042, -0.20623614984778502, '4.9%'), Text(-0.49637243529077646, -0.3370673604541145, '3.0%')])
Code
plt.show()
Code
# Top 10 de emisiones per capita medias por país
= df1.groupby("Country").agg({"Per Capita": "mean"})
emisiones_per_capita_medias = emisiones_per_capita_medias.nlargest(
top_10_emisiones_per_capita 10, "Per Capita").index
top_10_emisiones_per_capita
Index(['Sint Maarten (Dutch part)', 'Qatar', 'Curaçao', 'United Arab Emirates',
'Luxembourg', 'Brunei Darussalam', 'Kuwait', 'Bahrain',
'United States of America', 'Trinidad and Tobago'],
dtype='object', name='Country')
Code
# Gráfica de líneas
= df1[df1["Country"].isin(top_10_emisiones_per_capita)]
datos_top_10_emisiones_per_capita = datos_top_10_emisiones_per_capita.pivot_table(
pivot_data_per_capita ="Per Capita",
values="Year",
index="Country",
columns="mean",
aggfunc=0)
fill_value= go.Figure(
fig_line =go.Layout(
layout="Top 10 por emisiones de CO2 per capita (1970-2021)",
title="Años",
xaxis_title="CO2pc"))
yaxis_titlefor country in pivot_data_per_capita.columns:
fig_line.add_trace(=pivot_data_per_capita.index,
go.Scatter(x=pivot_data_per_capita[country],
y=country, mode="lines")) name
Code
# Mapa de distribución mundial de emisiones per capita medias
= df1.groupby("Country")["Per Capita"].mean().reset_index()
datos_map map = gpd.read_file("data/GCPmap.geojson")
map.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 177 entries, 0 to 176
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 177 non-null object
1 name 177 non-null object
2 geometry 177 non-null geometry
dtypes: geometry(1), object(2)
memory usage: 4.3+ KB
Code
= datos_map.copy()
datos_map_2 = datos_map_2.rename(columns = {"Country": "name"})
datos_map_2 = datos_map_2.rename(columns = {"Per Capita": "CO2pc"})
datos_map_2 = map.merge(datos_map_2, on = 'name', how = 'left')
datos_geo = plt.subplots(1,1)
fig, ax ="CO2pc",
datos_geo.plot(column=True,
legend='magma',
cmap=ax)
ax'Emisiones de CO2 per capita medias')
plt.title(
plt.tight_layout() plt.show()
Code
# Mapa interactivo
# Libreria folium de Python:
# https://python-visualization.github.io/folium/
= r'data/GCPmap.geojson'
world_geo = df1.groupby(
datos_emisiones_per_capita_medias "ISO 3166-1 alpha-3")["Per Capita"].mean().reset_index()
= folium.Map(location=[0, 0], zoom_start=2)
mapa_emisiones
folium.Choropleth(=world_geo,
geo_data="choropleth",
name=datos_emisiones_per_capita_medias,
data=["ISO 3166-1 alpha-3", "Per Capita"],
columns="feature.id",
key_on="YlGnBu",
fill_color=0.7,
fill_opacity=0.2,
line_opacity="Emisiones per capita medias"
legend_name ).add_to(mapa_emisiones)
<folium.features.Choropleth object at 0x132d121d0>
Code
folium.LayerControl().add_to(mapa_emisiones)
<folium.map.LayerControl object at 0x132d126b0>
Code
mapa_emisiones
Datos espaciales: Desarrollo humano al nivel mundial
En esta aplicación se analizarán los datos de la publicación del Global Data Lab sobre las variaciones habidas en el desarrollo humano, la salud, la educación y la renta dentro de más de 160 países (‘within’) y entre ellos (‘between’) desde 1990 hasta 2021.
Puede encontrarse toda la información detallada sobre el índice de desarrollo humano al nivel subnacional (SHDI en sus siglas en inglés) en la siguiente dirección: https://www.nature.com/articles/sdata201938; también se puede acceder a los datos originales desde esta otra página: https://globaldatalab.org/shdi/download_files/.
Respecto a las librerías especializadas en gráficas dinámicas (plotly
) y en análisis exploratorio de datos espaciales (sf
y geopandas
), su información se encuentra en las siguientes páginas web:
Librerías gráficas
plotly
: https://plotly.com/graphing-libraries/Librerías para el análisis gráfico de datos espaciales:
R ->
sf
: https://r-spatial.github.io/sf/Python ->
geopandas
: https://geopandas.org/
De nuevo se usarán varias técnicas de data wrangling para “preparar” los datos antes del análisis gráfico y estadístico.
Code
# Lectura de librerías
library(tidyverse)
library(plotly)
library(viridis)
library(sf)
library(geojsonsf)
# Lectura de datos
<- read_csv("data/GDLdbase.csv")
df <- geojson_sf("data/GDLmap.geojson")
map class(map)
[1] "sf" "data.frame"
Code
str(map)
Classes 'sf' and 'data.frame': 1745 obs. of 6 variables:
$ GDLcode : chr "AFGr101" "AFGr102" "AFGr103" "AFGr104" ...
$ constant: chr "World" "World" "World" "World" ...
$ iso_code: chr "AFG" "AFG" "AFG" "AFG" ...
$ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
$ region : chr "Central (Kabul Wardak Kapisa Logar Parwan Panjsher)" "Central Highlands (Bamyan Daikundi)" "East (Nangarhar Kunar Laghman Nooristan)" "North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)" ...
$ geometry:sfc_MULTIPOLYGON of length 1745; first list element: List of 1
..$ :List of 1
.. ..$ : num [1:289, 1:2] 67.6 67.6 67.6 67.5 67.5 ...
..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
- attr(*, "sf_column")= chr "geometry"
Code
print(map)
Simple feature collection with 1745 features and 5 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -180 ymin: -55.98403 xmax: 180 ymax: 83.10833
Geodetic CRS: WGS 84
First 10 features:
GDLcode constant iso_code country
1 AFGr101 World AFG Afghanistan
2 AFGr102 World AFG Afghanistan
3 AFGr103 World AFG Afghanistan
4 AFGr104 World AFG Afghanistan
5 AFGr105 World AFG Afghanistan
6 AFGr106 World AFG Afghanistan
7 AFGr107 World AFG Afghanistan
8 AFGr108 World AFG Afghanistan
9 AGOr201 World AGO Angola
10 AGOr202 World AGO Angola
region
1 Central (Kabul Wardak Kapisa Logar Parwan Panjsher)
2 Central Highlands (Bamyan Daikundi)
3 East (Nangarhar Kunar Laghman Nooristan)
4 North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)
5 North East (Baghlan Takhar Badakhshan Kunduz)
6 South (Uruzgan Helmand Zabul Nimroz Kandahar)
7 South East (Ghazni Paktya Paktika Khost)
8 West (Ghor Herat Badghis Farah)
9 Cabinda
10 Zaire
geometry
1 MULTIPOLYGON (((67.61506 34...
2 MULTIPOLYGON (((65.23611 33...
3 MULTIPOLYGON (((69.92137 34...
4 MULTIPOLYGON (((66.38873 34...
5 MULTIPOLYGON (((67.39591 35...
6 MULTIPOLYGON (((60.89944 29...
7 MULTIPOLYGON (((68.10873 31...
8 MULTIPOLYGON (((61.12394 31...
9 MULTIPOLYGON (((12.21127 -5...
10 MULTIPOLYGON (((13.08792 -7...
Code
# Preparación de datos
# Eliminación de observaciones nacionales (variable level -> National)
<- filter(df, level != 'National')
df1 # Selección del año de análisis, 2021
$year <- as.character(df1$year)
df1<- filter(df1, year == '2021')
df2021 print(df2021)
# A tibble: 1,786 × 37
...1 iso_code country year GDLCODE level region continent sgdi shdi shdif
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 280 AFG Afghan… 2021 AFGr101 Subn… Centr… Asia/Pac… 0.734 0.55 0.444
2 281 AFG Afghan… 2021 AFGr102 Subn… Centr… Asia/Pac… 0.704 0.472 0.368
3 282 AFG Afghan… 2021 AFGr103 Subn… East … Asia/Pac… 0.583 0.459 0.31
4 283 AFG Afghan… 2021 AFGr104 Subn… North… Asia/Pac… 0.749 0.497 0.405
5 284 AFG Afghan… 2021 AFGr105 Subn… North… Asia/Pac… 0.667 0.444 0.332
6 285 AFG Afghan… 2021 AFGr106 Subn… South… Asia/Pac… 0.563 0.407 0.269
7 286 AFG Afghan… 2021 AFGr107 Subn… South… Asia/Pac… 0.567 0.476 0.315
8 287 AFG Afghan… 2021 AFGr108 Subn… West … Asia/Pac… 0.692 0.447 0.344
9 707 AGO Angola 2021 AGOr201 Subn… Cabin… Africa 0.938 0.681 0.66
10 708 AGO Angola 2021 AGOr202 Subn… Zaire Africa 0.892 0.615 0.581
# ℹ 1,776 more rows
# ℹ 26 more variables: shdim <dbl>, healthindex <dbl>, healthindexf <dbl>,
# healthindexm <dbl>, incindex <dbl>, incindexf <dbl>, incindexm <dbl>,
# edindex <dbl>, edindexf <dbl>, edindexm <dbl>, esch <dbl>, eschf <dbl>,
# eschm <dbl>, msch <dbl>, mschf <dbl>, mschm <dbl>, lifexp <dbl>,
# lifexpf <dbl>, lifexpm <dbl>, gnic <dbl>, gnicf <dbl>, gnicm <dbl>,
# lgnic <dbl>, lgnicf <dbl>, lgnicm <dbl>, pop <dbl>
Code
unique(df2021$continent)
[1] "Asia/Pacific" "Africa" "Europe" "America"
Code
unique(df2021$country)
[1] "Afghanistan" "Angola"
[3] "Albania" "Argentina urban"
[5] "Armenia" "Australia"
[7] "Austria" "Azerbaijan"
[9] "Burundi" "Belgium"
[11] "Benin" "Burkina Faso"
[13] "Bangladesh" "Bulgaria"
[15] "Bosnia and Herzegovina" "Belarus"
[17] "Belize" "Bolivia"
[19] "Brazil" "Barbados"
[21] "Bhutan" "Botswana"
[23] "Central African Republic CAR" "Canada"
[25] "Switzerland" "Chili"
[27] "China" "Cote d'Ivoire"
[29] "Cameroon" "Congo Democratic Republic"
[31] "Congo Brazzaville" "Colombia"
[33] "Comoros" "Cape Verde"
[35] "Costa Rica" "Cuba"
[37] "Czech Republic" "Germany"
[39] "Djibouti" "Denmark"
[41] "Dominican Republic" "Algeria"
[43] "Ecuador" "Egypt"
[45] "Eritrea" "Spain"
[47] "Estonia" "Ethiopia"
[49] "Finland" "Fiji"
[51] "France" "Gabon"
[53] "United Kingdom" "Georgia"
[55] "Ghana" "Guinea"
[57] "Gambia" "Guinea Bissau"
[59] "Equatorial Guinea" "Greece"
[61] "Guatemala" "Guyana"
[63] "Honduras" "Croatia"
[65] "Haiti" "Hungary"
[67] "Indonesia" "India"
[69] "Ireland" "Iran"
[71] "Iraq" "Italy"
[73] "Jamaica" "Jordan"
[75] "Japan" "Kazakhstan"
[77] "Kenya" "Kyrgyzstan"
[79] "Cambodia" "Kiribati"
[81] "South Korea" "Kuwait"
[83] "Lao" "Lebanon"
[85] "Liberia" "Libya"
[87] "Saint Lucia" "Lesotho"
[89] "Lithuania" "Latvia"
[91] "Morocco" "Moldova"
[93] "Madagascar" "Maldives"
[95] "Mexico" "North Macedonia"
[97] "Mali" "Myanmar"
[99] "Monte Negro" "Mongolia"
[101] "Mozambique" "Mauritania"
[103] "Mauritius" "Malawi"
[105] "Malaysia" "Namibia"
[107] "Niger" "Nigeria"
[109] "Nicaragua" "Netherlands"
[111] "Norway" "Nepal"
[113] "New Zealand" "Pakistan"
[115] "Panama" "Peru"
[117] "Philippines" "Papua New Guinea"
[119] "Poland" "Portugal"
[121] "Paraguay" "Palestine"
[123] "Romania" "Russian Federation"
[125] "Rwanda" "Saudi Arabia"
[127] "Sudan" "Senegal"
[129] "Sierra Leone" "El Salvador"
[131] "Somalia" "Serbia"
[133] "South Sudan" "Sao Tome & Principe"
[135] "Suriname" "Slovakia"
[137] "Slovenia" "Sweden"
[139] "Eswatini" "Syria"
[141] "Chad" "Togo"
[143] "Thailand" "Tajikistan"
[145] "Turkmenistan" "Timor Leste"
[147] "Tonga" "Trinidad & Tobago"
[149] "Tunisia" "Turkey"
[151] "Tuvalu" "Tanzania"
[153] "Uganda" "Ukraine"
[155] "Uruguay" "United States"
[157] "Uzbekistan" "Venezuela"
[159] "Vietnam" "Vanuatu"
[161] "Samoa" "Kosovo"
[163] "Yemen" "South Africa"
[165] "Zambia" "Zimbabwe"
Code
# Análisis exploratorio básico (EDA)
# Estadísticos para el año 2021
%>%
df2021 select(-c(...1,iso_code, year, country, GDLCODE, level, region,continent)) %>%
summary()
sgdi shdi shdif shdim
Min. :0.3690 Min. :0.2320 Min. :0.1740 Min. :0.2930
1st Qu.:0.9045 1st Qu.:0.5780 1st Qu.:0.5550 1st Qu.:0.6030
Median :0.9620 Median :0.7070 Median :0.6820 Median :0.7240
Mean :0.9406 Mean :0.6935 Mean :0.6758 Mean :0.7125
3rd Qu.:0.9900 3rd Qu.:0.8110 3rd Qu.:0.8080 3rd Qu.:0.8190
Max. :1.0600 Max. :0.9890 Max. :0.9740 Max. :0.9960
NA's :63 NA's :2 NA's :63 NA's :63
healthindex healthindexf healthindexm incindex
Min. :0.4000 Min. :0.3120 Min. :0.3900 Min. :0.2790
1st Qu.:0.6920 1st Qu.:0.6920 1st Qu.:0.6945 1st Qu.:0.5560
Median :0.7840 Median :0.7980 Median :0.7770 Median :0.6980
Mean :0.7761 Mean :0.7799 Mean :0.7743 Mean :0.6811
3rd Qu.:0.8600 3rd Qu.:0.8720 3rd Qu.:0.8540 3rd Qu.:0.8110
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
NA's :2 NA's :11 NA's :11
incindexf incindexm edindex edindexf
Min. :0.0380 Min. :0.2910 Min. :0.0610 Min. :0.0630
1st Qu.:0.4945 1st Qu.:0.5890 1st Qu.:0.4973 1st Qu.:0.4808
Median :0.6130 Median :0.7440 Median :0.6620 Median :0.6730
Mean :0.6260 Mean :0.7164 Mean :0.6423 Mean :0.6406
3rd Qu.:0.7740 3rd Qu.:0.8445 3rd Qu.:0.7897 3rd Qu.:0.7983
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :0.9940
NA's :15 NA's :15 NA's :58
edindexm esch eschf eschm
Min. :0.1100 Min. : 0.369 Min. : 1.395 Min. : 3.332
1st Qu.:0.5270 1st Qu.:11.145 1st Qu.:11.085 1st Qu.:11.363
Median :0.6630 Median :13.303 Median :13.642 Median :13.187
Mean :0.6555 Mean :13.052 Mean :13.261 Mean :13.065
3rd Qu.:0.7873 3rd Qu.:15.343 3rd Qu.:15.706 3rd Qu.:14.998
Max. :0.9930 Max. :18.000 Max. :18.000 Max. :18.000
NA's :58 NA's :36 NA's :36
msch mschf mschm lifexp
Min. : 0.330 Min. : 0.136 Min. : 0.524 Min. :45.98
1st Qu.: 5.627 1st Qu.: 5.136 1st Qu.: 6.250 1st Qu.:64.98
Median : 8.558 Median : 8.521 Median : 8.870 Median :70.99
Mean : 8.393 Mean : 8.124 Mean : 8.745 Mean :70.45
3rd Qu.:11.227 3rd Qu.:11.146 3rd Qu.:11.354 3rd Qu.:75.88
Max. :15.000 Max. :14.981 Max. :14.843 Max. :85.61
NA's :53 NA's :53 NA's :2
lifexpf lifexpm gnic gnicf
Min. :42.78 Min. :42.83 Min. : 632.5 Min. : 128.2
1st Qu.:67.49 1st Qu.:62.64 1st Qu.: 3960.0 1st Qu.: 2637.6
Median :74.38 Median :67.99 Median : 10156.4 Median : 5802.6
Mean :73.19 Mean :67.83 Mean : 16411.9 Mean : 12440.0
3rd Qu.:79.17 3rd Qu.:73.00 3rd Qu.: 21422.5 3rd Qu.: 16763.3
Max. :88.29 Max. :84.53 Max. :209791.9 Max. :162951.2
NA's :11 NA's :11 NA's :15
gnicm lgnic lgnicf lgnicm
Min. : 687.5 Min. : 6.450 Min. : 4.853 Min. : 6.533
1st Qu.: 4939.9 1st Qu.: 8.284 1st Qu.: 7.878 1st Qu.: 8.505
Median : 13755.2 Median : 9.226 Median : 8.666 Median : 9.529
Mean : 20564.4 Mean : 9.116 Mean : 8.750 Mean : 9.353
3rd Qu.: 26757.8 3rd Qu.: 9.973 3rd Qu.: 9.727 3rd Qu.:10.194
Max. :258567.3 Max. :12.254 Max. :12.001 Max. :12.463
NA's :15 NA's :15 NA's :15
pop
Min. :161.1
1st Qu.:172.5
Median :189.8
Mean :247.1
3rd Qu.:242.4
Max. :549.3
NA's :1779
Code
# Estadísticos para España en el año 2021
%>%
df2021 select(-c(...1,iso_code, year, GDLCODE, level, region,continent)) %>%
filter(country == 'Spain') %>%
summary()
country sgdi shdi shdif
Length:19 Min. :0.9750 Min. :0.8530 Min. :0.8490
Class :character 1st Qu.:0.9835 1st Qu.:0.8725 1st Qu.:0.8655
Mode :character Median :0.9850 Median :0.9000 Median :0.8900
Mean :0.9854 Mean :0.8943 Mean :0.8863
3rd Qu.:0.9870 3rd Qu.:0.9110 3rd Qu.:0.9030
Max. :0.9960 Max. :0.9400 Max. :0.9270
shdim healthindex healthindexf healthindexm
Min. :0.8560 Min. :0.9300 Min. :0.9310 Min. :0.9290
1st Qu.:0.8775 1st Qu.:0.9570 1st Qu.:0.9660 1st Qu.:0.9510
Median :0.9050 Median :0.9680 Median :0.9720 Median :0.9630
Mean :0.8996 Mean :0.9653 Mean :0.9696 Mean :0.9614
3rd Qu.:0.9175 3rd Qu.:0.9750 3rd Qu.:0.9800 3rd Qu.:0.9700
Max. :0.9510 Max. :0.9970 Max. :0.9960 Max. :0.9930
incindex incindexf incindexm edindex
Min. :0.8490 Min. :0.8190 Min. :0.8750 Min. :0.7610
1st Qu.:0.8635 1st Qu.:0.8335 1st Qu.:0.8900 1st Qu.:0.7975
Median :0.8830 Median :0.8520 Median :0.9100 Median :0.8520
Mean :0.8892 Mean :0.8584 Mean :0.9159 Mean :0.8336
3rd Qu.:0.9105 3rd Qu.:0.8790 3rd Qu.:0.9375 3rd Qu.:0.8585
Max. :0.9470 Max. :0.9150 Max. :0.9740 Max. :0.8810
edindexf edindexm esch eschf
Min. :0.7660 Min. :0.7570 Min. :15.13 Min. :15.25
1st Qu.:0.8085 1st Qu.:0.7870 1st Qu.:17.04 1st Qu.:17.40
Median :0.8500 Median :0.8400 Median :17.77 Median :18.00
Mean :0.8371 Mean :0.8271 Mean :17.42 Mean :17.60
3rd Qu.:0.8600 3rd Qu.:0.8535 3rd Qu.:18.00 3rd Qu.:18.00
Max. :0.8760 Max. :0.8880 Max. :18.00 Max. :18.00
eschm msch mschf mschm
Min. :14.99 Min. : 9.462 Min. : 9.399 Min. : 9.548
1st Qu.:16.61 1st Qu.:10.029 1st Qu.: 9.986 1st Qu.:10.159
Median :17.34 Median :10.552 Median :10.513 Median :10.548
Mean :17.10 Mean :10.498 Mean :10.442 Mean :10.562
3rd Qu.:17.82 3rd Qu.:10.898 3rd Qu.:10.805 3rd Qu.:10.955
Max. :18.00 Max. :11.444 Max. :11.290 Max. :11.634
lifexp lifexpf lifexpm gnic
Min. :80.46 Min. :83.00 Min. :77.89 Min. :27583
1st Qu.:82.19 1st Qu.:85.28 1st Qu.:79.32 1st Qu.:30418
Median :82.93 Median :85.67 Median :80.06 Median :34612
Mean :82.74 Mean :85.52 Mean :79.99 Mean :36767
3rd Qu.:83.38 3rd Qu.:86.21 3rd Qu.:80.56 3rd Qu.:41466
Max. :84.81 Max. :87.25 Max. :82.04 Max. :52774
gnicf gnicm lgnic lgnicf
Min. :22592 Min. :32745 Min. :10.22 Min. :10.03
1st Qu.:24867 1st Qu.:36170 1st Qu.:10.32 1st Qu.:10.12
Median :28224 Median :41247 Median :10.45 Median :10.25
Mean :29934 Mean :43875 Mean :10.49 Mean :10.29
3rd Qu.:33694 3rd Qu.:49565 3rd Qu.:10.63 3rd Qu.:10.43
Max. :42681 Max. :63336 Max. :10.87 Max. :10.66
lgnicm pop
Min. :10.40 Min. : NA
1st Qu.:10.50 1st Qu.: NA
Median :10.63 Median : NA
Mean :10.67 Mean :NaN
3rd Qu.:10.81 3rd Qu.: NA
Max. :11.06 Max. : NA
NA's :19
Code
# Diferencias regionales durante el período 1990-2021 (box plots)
# Comparación entre continentes
%>%
df1 plot_ly(x = ~shdi, y = ~continent, frame = ~year, color = ~continent,
type = 'box') %>%
layout(xaxis = list(title = "SHDI"),
yaxis = list(title = "Continente"))
Code
# Comparación entre varios países 1990-2021
subset(df1, country %in% c("Spain", "Portugal", "France")) %>%
plot_ly(x = ~shdi, y = ~country, frame = ~year, color = ~country,
type = 'box') %>%
layout(xaxis = list(title = "SHDI"),
yaxis = list(title = "País"))
Code
# Evolución regional dentro de un país 1990-2021 (series temporales)
plot_ly(df1[df1$country == "Spain",], x = ~year, y = ~shdi,
color = ~region, type = "scatter", mode = "lines") %>%
layout(xaxis = list(title = "Año"),
yaxis = list(title = "SHDI", type = "log"))
Code
# Correlación desarrollo humano-renta per cápita 1990-2021 (scatter plots)
plot_ly(df1,
y = ~shdi,
x = ~lgnic,
frame = ~year,
type = 'scatter',
mode = 'markers',
color = ~continent,
colors = 'Set1') %>%
layout(xaxis = list(title = "Renta nacional bruta per capita (log)"),
yaxis = list(title = "SHDI"))
Code
# Análisis exploratorio espacial (ESDA)
# Mapa de distribución espacial del SHDI en 2021
<- df2021 %>%
df2021_2 rename(GDLcode = GDLCODE)
<- inner_join(map, df2021_2, by = "GDLcode")
gdf2021 colnames(gdf2021)
[1] "GDLcode" "constant" "iso_code.x" "country.x" "region.x"
[6] "...1" "iso_code.y" "country.y" "year" "level"
[11] "region.y" "continent" "sgdi" "shdi" "shdif"
[16] "shdim" "healthindex" "healthindexf" "healthindexm" "incindex"
[21] "incindexf" "incindexm" "edindex" "edindexf" "edindexm"
[26] "esch" "eschf" "eschm" "msch" "mschf"
[31] "mschm" "lifexp" "lifexpf" "lifexpm" "gnic"
[36] "gnicf" "gnicm" "lgnic" "lgnicf" "lgnicm"
[41] "pop" "geometry"
Code
ggplot(gdf2021) +
geom_sf(aes(fill=shdi)) +
theme_bw() +
labs(title = "Distribución espacial del SHDI en 2021") +
scale_fill_viridis(option="OrRd")
Code
# El SHDI español en 2021
<- gdf2021[gdf2021$country.x == "Spain",]
gdf2021Spain ggplot(gdf2021Spain) + geom_sf(aes(fill=shdi)) +
theme_bw() +
labs(title = "Distribución espacial del SHDI español en 2021") +
scale_fill_viridis(option="OrRd")
Code
# Gráfica sólo con las regiones peninsulares
ggplot(gdf2021Spain_2 <- gdf2021Spain[-c(14,17),]) + geom_sf(aes(fill=shdi)) +
theme_bw() +
labs(title = "Distribución espacial del SHDI peninsular español en 2021") +
scale_fill_viridis(option="OrRd")
Code
# Lectura de librerías
import numpy as np
import pandas as pd
'display.max_columns', None)
pd.set_option(import matplotlib.pyplot as plt
'ggplot')
plt.style.use(from matplotlib.ticker import FormatStrFormatter
from matplotlib_scalebar.scalebar import ScaleBar
from pylab import rcParams
import matplotlib as mpl
'figure.dpi'] = 72
mpl.rcParams[import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import chart_studio
import chart_studio.plotly as save2cs
import geopandas as gpd
import warnings
'ignore')
warnings.filterwarnings(# Lectura de datos
= pd.read_csv('data/GDLdbase.csv', index_col=0)
df map = gpd.read_file("data/GDLmap.geojson")
map.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1745 entries, 0 to 1744
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 GDLcode 1745 non-null object
1 constant 1745 non-null object
2 iso_code 1742 non-null object
3 country 1742 non-null object
4 region 1742 non-null object
5 geometry 1745 non-null geometry
dtypes: geometry(1), object(5)
memory usage: 81.9+ KB
Code
map
GDLcode constant iso_code country \
0 AFGr101 World AFG Afghanistan
1 AFGr102 World AFG Afghanistan
2 AFGr103 World AFG Afghanistan
3 AFGr104 World AFG Afghanistan
4 AFGr105 World AFG Afghanistan
... ... ... ... ...
1740 ZWEr106 World ZWE Zimbabwe
1741 ZWEr107 World ZWE Zimbabwe
1742 ZWEr108 World ZWE Zimbabwe
1743 ZWEr109 World ZWE Zimbabwe
1744 ZWEr110 World ZWE Zimbabwe
region \
0 Central (Kabul Wardak Kapisa Logar Parwan Panj...
1 Central Highlands (Bamyan Daikundi)
2 East (Nangarhar Kunar Laghman Nooristan)
3 North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)
4 North East (Baghlan Takhar Badakhshan Kunduz)
... ...
1740 Matebeleland South
1741 Midlands
1742 Masvingo
1743 Harare
1744 Bulawayo
geometry
0 MULTIPOLYGON (((67.61506 34.23957, 67.59259 34...
1 MULTIPOLYGON (((65.23611 33.37125, 65.25839 33...
2 MULTIPOLYGON (((69.92137 34.02534, 69.90879 34...
3 MULTIPOLYGON (((66.38873 34.93133, 66.33110 34...
4 MULTIPOLYGON (((67.39591 35.43840, 67.45535 35...
... ...
1740 MULTIPOLYGON (((29.36831 -22.19781, 29.33401 -...
1741 MULTIPOLYGON (((29.22650 -19.48012, 29.21404 -...
1742 MULTIPOLYGON (((31.06733 -22.34189, 31.08549 -...
1743 MULTIPOLYGON (((31.08852 -17.66625, 31.11562 -...
1744 MULTIPOLYGON (((28.61305 -20.23587, 28.60440 -...
[1745 rows x 6 columns]
Code
# Preparación de datos
# Eliminación de observaciones nacionales (variable level -> National)
= df[df.level != 'National']
df1 # Selección del año de análisis, 2021
"year"]= df1["year"].astype(str)
df1[= df1[df1['year'] == '2021']
df2021 df2021
iso_code country year GDLCODE level \
280 AFG Afghanistan 2021 AFGr101 Subnat
281 AFG Afghanistan 2021 AFGr102 Subnat
282 AFG Afghanistan 2021 AFGr103 Subnat
283 AFG Afghanistan 2021 AFGr104 Subnat
284 AFG Afghanistan 2021 AFGr105 Subnat
... ... ... ... ... ...
58879 ZWE Zimbabwe 2021 ZWEr106 Subnat
58880 ZWE Zimbabwe 2021 ZWEr107 Subnat
58881 ZWE Zimbabwe 2021 ZWEr108 Subnat
58882 ZWE Zimbabwe 2021 ZWEr109 Subnat
58883 ZWE Zimbabwe 2021 ZWEr110 Subnat
region continent sgdi \
280 Central (Kabul Wardak Kapisa Logar Parwan Panj... Asia/Pacific 0.734
281 Central Highlands (Bamyan Daikundi) Asia/Pacific 0.704
282 East (Nangarhar Kunar Laghman Nooristan) Asia/Pacific 0.583
283 North (Samangan Sar-e-Pul Balkh Jawzjan Faryab) Asia/Pacific 0.749
284 North East (Baghlan Takhar Badakhshan Kunduz) Asia/Pacific 0.667
... ... ... ...
58879 Matebeleland South Africa 0.985
58880 Midlands Africa 0.958
58881 Masvingo Africa 0.962
58882 Harare Africa 0.962
58883 Bulawayo Africa 0.985
shdi shdif shdim healthindex healthindexf healthindexm incindex \
280 0.550 0.444 0.605 0.675 0.694 0.664 0.471
281 0.472 0.368 0.522 0.653 0.667 0.643 0.396
282 0.459 0.310 0.532 0.638 0.648 0.630 0.394
283 0.497 0.405 0.541 0.628 0.636 0.620 0.464
284 0.444 0.332 0.498 0.623 0.629 0.615 0.416
... ... ... ... ... ... ... ...
58879 0.585 0.581 0.590 0.665 0.681 0.652 0.530
58880 0.590 0.577 0.602 0.593 0.595 0.586 0.544
58881 0.588 0.578 0.601 0.650 0.664 0.639 0.520
58882 0.665 0.652 0.678 0.619 0.627 0.610 0.648
58883 0.693 0.689 0.699 0.656 0.670 0.644 0.688
incindexf incindexm edindex edindexf edindexm esch eschf \
280 0.280 0.553 0.524 0.451 0.604 12.898 10.399
281 0.217 0.472 0.408 0.343 0.468 12.447 10.958
282 0.215 0.470 0.385 0.214 0.510 9.858 6.043
283 0.274 0.546 0.420 0.382 0.467 11.456 10.004
284 0.234 0.494 0.338 0.249 0.407 9.288 7.202
... ... ... ... ... ... ... ...
58879 0.508 0.551 0.566 0.566 0.570 10.989 11.145
58880 0.522 0.566 0.635 0.618 0.659 12.668 12.575
58881 0.498 0.541 0.602 0.584 0.628 12.058 12.013
58882 0.624 0.672 0.732 0.707 0.759 13.156 12.470
58883 0.663 0.712 0.739 0.735 0.746 14.115 14.173
eschm msch mschf mschm lifexp lifexpf lifexpm gnic \
280 15.407 4.975 4.851 5.281 63.843 67.633 60.672 2264.832
281 14.155 1.857 1.168 2.256 62.415 65.829 59.326 1374.650
282 13.270 3.325 1.397 4.232 61.471 64.627 58.426 1355.298
283 12.857 3.058 3.124 3.305 60.843 63.825 57.822 2159.844
284 11.235 2.390 1.473 2.842 60.500 63.384 57.490 1571.877
... ... ... ... ... ... ... ... ...
58879 10.863 7.834 7.697 8.044 63.246 66.784 59.907 3339.052
58880 12.858 8.492 8.073 9.045 58.544 61.193 55.566 3670.709
58881 12.184 8.013 7.504 8.685 62.276 65.641 59.025 3127.485
58882 13.981 10.993 10.825 11.114 60.261 63.250 57.171 7314.452
58883 14.218 10.405 10.246 10.541 62.628 66.056 59.346 9516.362
gnicf gnicm lgnic lgnicf lgnicm pop
280 638.644 3894.162 7.725 6.459 8.267 NaN
281 420.676 2282.217 7.226 6.042 7.733 NaN
282 415.719 2247.852 7.212 6.030 7.718 NaN
283 613.795 3701.300 7.678 6.420 8.216 NaN
284 470.580 2634.320 7.360 6.154 7.876 NaN
... ... ... ... ... ... ...
58879 2886.675 3844.591 8.113 7.968 8.254 NaN
58880 3168.011 4233.422 8.208 8.061 8.351 NaN
58881 2706.948 3596.899 8.048 7.904 8.188 NaN
58882 6235.132 8537.414 8.898 8.738 9.052 NaN
58883 8073.914 11158.386 9.161 8.996 9.320 NaN
[1786 rows x 36 columns]
Code
'continent'].unique() df2021[
array(['Asia/Pacific', 'Africa', 'Europe', 'America'], dtype=object)
Code
'country'].unique() df2021[
array(['Afghanistan', 'Angola', 'Albania', 'Argentina urban', 'Armenia',
'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium',
'Benin', 'Burkina Faso', 'Bangladesh', 'Bulgaria',
'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bolivia', 'Brazil',
'Barbados', 'Bhutan', 'Botswana', 'Central African Republic CAR',
'Canada', 'Switzerland', 'Chili', 'China', "Cote d'Ivoire",
'Cameroon', 'Congo Democratic Republic', 'Congo Brazzaville',
'Colombia', 'Comoros', 'Cape Verde', 'Costa Rica', 'Cuba',
'Czech Republic', 'Germany', 'Djibouti', 'Denmark',
'Dominican Republic', 'Algeria', 'Ecuador', 'Egypt', 'Eritrea',
'Spain', 'Estonia', 'Ethiopia', 'Finland', 'Fiji', 'France',
'Gabon', 'United Kingdom', 'Georgia', 'Ghana', 'Guinea', 'Gambia',
'Guinea Bissau', 'Equatorial Guinea', 'Greece', 'Guatemala',
'Guyana', 'Honduras', 'Croatia', 'Haiti', 'Hungary', 'Indonesia',
'India', 'Ireland', 'Iran', 'Iraq', 'Italy', 'Jamaica', 'Jordan',
'Japan', 'Kazakhstan', 'Kenya', 'Kyrgyzstan', 'Cambodia',
'Kiribati', 'South Korea', 'Kuwait', 'Lao', 'Lebanon', 'Liberia',
'Libya', 'Saint Lucia', 'Lesotho', 'Lithuania', 'Latvia',
'Morocco', 'Moldova', 'Madagascar', 'Maldives', 'Mexico',
'North Macedonia', 'Mali', 'Myanmar', 'Monte Negro', 'Mongolia',
'Mozambique', 'Mauritania', 'Mauritius', 'Malawi', 'Malaysia',
'Namibia', 'Niger', 'Nigeria', 'Nicaragua', 'Netherlands',
'Norway', 'Nepal', 'New Zealand', 'Pakistan', 'Panama', 'Peru',
'Philippines', 'Papua New Guinea', 'Poland', 'Portugal',
'Paraguay', 'Palestine', 'Romania', 'Russian Federation', 'Rwanda',
'Saudi Arabia', 'Sudan', 'Senegal', 'Sierra Leone', 'El Salvador',
'Somalia', 'Serbia', 'South Sudan', 'Sao Tome & Principe',
'Suriname', 'Slovakia', 'Slovenia', 'Sweden', 'Eswatini', 'Syria',
'Chad', 'Togo', 'Thailand', 'Tajikistan', 'Turkmenistan',
'Timor Leste', 'Tonga', 'Trinidad & Tobago', 'Tunisia', 'Turkey',
'Tuvalu', 'Tanzania', 'Uganda', 'Ukraine', 'Uruguay',
'United States', 'Uzbekistan', 'Venezuela', 'Vietnam', 'Vanuatu',
'Samoa', 'Kosovo', 'Yemen', 'South Africa', 'Zambia', 'Zimbabwe'],
dtype=object)
Code
# Análisis exploratorio básico (EDA)
# Estadísticos para el año 2021
round(3) df2021.describe().
sgdi shdi shdif shdim healthindex healthindexf \
count 1723.000 1784.000 1723.000 1723.000 1784.000 1775.000
mean 0.941 0.693 0.676 0.712 0.776 0.780
std 0.071 0.158 0.167 0.147 0.119 0.125
min 0.369 0.232 0.174 0.293 0.400 0.312
25% 0.905 0.578 0.555 0.603 0.692 0.692
50% 0.962 0.707 0.682 0.724 0.784 0.798
75% 0.990 0.811 0.808 0.819 0.860 0.872
max 1.060 0.989 0.974 0.996 1.000 1.000
healthindexm incindex incindexf incindexm edindex edindexf \
count 1775.000 1786.000 1771.000 1771.000 1786.000 1728.000
mean 0.774 0.681 0.626 0.716 0.642 0.641
std 0.117 0.174 0.185 0.175 0.191 0.204
min 0.390 0.279 0.038 0.291 0.061 0.063
25% 0.694 0.556 0.494 0.589 0.497 0.481
50% 0.777 0.698 0.613 0.744 0.662 0.673
75% 0.854 0.811 0.774 0.844 0.790 0.798
max 1.000 1.000 1.000 1.000 1.000 0.994
edindexm esch eschf eschm msch mschf mschm \
count 1728.000 1786.000 1750.000 1750.000 1786.000 1733.000 1733.000
mean 0.656 13.052 13.261 13.065 8.393 8.124 8.745
std 0.170 3.163 3.251 2.736 3.447 3.725 3.172
min 0.110 0.369 1.395 3.332 0.330 0.136 0.524
25% 0.527 11.145 11.086 11.363 5.626 5.136 6.250
50% 0.663 13.303 13.642 13.187 8.558 8.521 8.870
75% 0.787 15.343 15.706 14.998 11.227 11.146 11.354
max 0.993 18.000 18.000 18.000 15.000 14.981 14.843
lifexp lifexpf lifexpm gnic gnicf gnicm \
count 1784.000 1775.000 1775.000 1786.000 1771.000 1771.000
mean 70.449 73.194 67.835 16411.868 12440.010 20564.405
std 7.751 8.146 7.593 18232.444 14887.574 21984.616
min 45.978 42.782 42.830 632.458 128.186 687.484
25% 64.985 67.494 62.640 3960.012 2637.606 4939.909
50% 70.986 74.375 67.990 10156.434 5802.553 13755.222
75% 75.882 79.173 72.997 21422.519 16763.350 26757.780
max 85.608 88.290 84.525 209791.914 162951.184 258567.313
lgnic lgnicf lgnicm pop
count 1786.000 1771.000 1771.000 7.000
mean 9.116 8.750 9.353 247.140
std 1.154 1.226 1.166 137.766
min 6.450 4.853 6.533 161.098
25% 8.284 7.877 8.505 172.455
50% 9.226 8.666 9.529 189.798
75% 9.972 9.727 10.194 242.433
max 12.254 12.001 12.463 549.310
Code
# Estadísticos para España en el año 2021
"country == 'Spain'").describe().round(3) df2021.query(
sgdi shdi shdif shdim healthindex healthindexf \
count 19.000 19.000 19.000 19.000 19.000 19.000
mean 0.985 0.894 0.886 0.900 0.965 0.970
std 0.004 0.026 0.024 0.027 0.017 0.017
min 0.975 0.853 0.849 0.856 0.930 0.931
25% 0.984 0.872 0.865 0.878 0.957 0.966
50% 0.985 0.900 0.890 0.905 0.968 0.972
75% 0.987 0.911 0.903 0.918 0.975 0.980
max 0.996 0.940 0.927 0.951 0.997 0.996
healthindexm incindex incindexf incindexm edindex edindexf \
count 19.000 19.000 19.000 19.000 19.000 19.000
mean 0.961 0.889 0.858 0.916 0.834 0.837
std 0.016 0.031 0.030 0.031 0.037 0.033
min 0.929 0.849 0.819 0.875 0.761 0.766
25% 0.951 0.863 0.833 0.890 0.798 0.808
50% 0.963 0.883 0.852 0.910 0.852 0.850
75% 0.970 0.911 0.879 0.938 0.858 0.860
max 0.993 0.947 0.915 0.974 0.881 0.876
edindexm esch eschf eschm msch mschf mschm lifexp \
count 19.000 19.000 19.000 19.000 19.000 19.000 19.000 19.000
mean 0.827 17.415 17.605 17.097 10.498 10.442 10.562 82.744
std 0.040 0.823 0.733 0.896 0.570 0.540 0.604 1.102
min 0.757 15.127 15.253 14.988 9.462 9.399 9.548 80.461
25% 0.787 17.038 17.402 16.614 10.029 9.986 10.160 82.190
50% 0.840 17.773 18.000 17.341 10.552 10.513 10.548 82.932
75% 0.853 18.000 18.000 17.820 10.898 10.805 10.955 83.376
max 0.888 18.000 18.000 18.000 11.444 11.290 11.634 84.810
lifexpf lifexpm gnic gnicf gnicm lgnic lgnicf \
count 19.000 19.000 19.000 19.000 19.000 19.000 19.000
mean 85.525 79.993 36767.182 29934.452 43874.659 10.492 10.287
std 1.120 1.051 7792.902 6216.995 9460.374 0.204 0.201
min 82.999 77.891 27582.996 22592.294 32745.203 10.225 10.025
25% 85.275 79.324 30417.830 24866.672 36169.910 10.323 10.121
50% 85.670 80.065 34612.370 28224.467 41246.942 10.452 10.248
75% 86.214 80.560 41466.415 33694.492 49564.842 10.632 10.425
max 87.253 82.042 52773.822 42681.253 63336.277 10.874 10.662
lgnicm pop
count 19.000 0.0
mean 10.668 NaN
std 0.208 NaN
min 10.397 NaN
25% 10.496 NaN
50% 10.627 NaN
75% 10.811 NaN
max 11.056 NaN
Code
# Diferencias regionales durante el período 1990-2021 (box plots)
# Comparación entre continentes
px.box(df1,= 'shdi',
x = 'continent',
y = [0, 1],
range_x= 'continent',
color = 'region',
hover_name= ['country'],
hover_data = 'year',
animation_frame=dict(continent = "Continente",
labels="SHDI")
shdi )
Code
# Comparación entre países
'country'].isin(['Spain', 'Portugal', 'France'])],
px.box(df1[df1[= 'shdi',
x = 'country',
y = [0.70, 1],
range_x= 'country',
color = 'region',
hover_name= ['country'],
hover_data = 'year',
animation_frame=dict(country = "País",
labels="SHDI")
shdi )
Code
# Evolución regional dentro de un país 1990-2021 (series temporales)
= px.line(
fig 'country'].isin(['Spain'])],
df1[df1[="year",
x="shdi",
y= True,
log_y="region",
color="country",
hover_name= ['country'],
hover_data=dict(shdi="SHDI",
labels= "Año"),
year ="country",
facet_col= 2,
facet_col_wrap = 0.01,
facet_row_spacing = 500
height
)=False) fig.update_layout(showlegend
Code
# Correlación desarrollo humano-renta per cápita 1990-2021 (scatter plots)
px.scatter(df1,= "shdi",
y = "lgnic",
x = [0, 1],
range_y= [5, 13],
range_x= "region",
hover_name = ['country'],
hover_data= "continent",
color #size = "pop", size_max = 60,
= 'ols',
trendline= 'year',
animation_frame=dict(country = "País",
labels= "Continente",
continent ="Renta nacional bruta per capita (log)",
lgnic ="SHDI")
shdi )
Code
# Análisis exploratorio espacial (ESDA)
# Mapa de distribución espacial del SHDI en 2021
= df2021.copy()
df2021_2 = df2021_2.rename(columns = {"GDLCODE": "GDLcode"})
df2021_2 = map.merge(df2021_2, on = 'GDLcode', how = 'left')
gdf2021 gdf2021.columns
Index(['GDLcode', 'constant', 'iso_code_x', 'country_x', 'region_x',
'geometry', 'iso_code_y', 'country_y', 'year', 'level', 'region_y',
'continent', 'sgdi', 'shdi', 'shdif', 'shdim', 'healthindex',
'healthindexf', 'healthindexm', 'incindex', 'incindexf', 'incindexm',
'edindex', 'edindexf', 'edindexm', 'esch', 'eschf', 'eschm', 'msch',
'mschf', 'mschm', 'lifexp', 'lifexpf', 'lifexpm', 'gnic', 'gnicf',
'gnicm', 'lgnic', 'lgnicf', 'lgnicm', 'pop'],
dtype='object')
Code
gdf2021
GDLcode constant iso_code_x country_x \
0 AFGr101 World AFG Afghanistan
1 AFGr102 World AFG Afghanistan
2 AFGr103 World AFG Afghanistan
3 AFGr104 World AFG Afghanistan
4 AFGr105 World AFG Afghanistan
... ... ... ... ...
1740 ZWEr106 World ZWE Zimbabwe
1741 ZWEr107 World ZWE Zimbabwe
1742 ZWEr108 World ZWE Zimbabwe
1743 ZWEr109 World ZWE Zimbabwe
1744 ZWEr110 World ZWE Zimbabwe
region_x \
0 Central (Kabul Wardak Kapisa Logar Parwan Panj...
1 Central Highlands (Bamyan Daikundi)
2 East (Nangarhar Kunar Laghman Nooristan)
3 North (Samangan Sar-e-Pul Balkh Jawzjan Faryab)
4 North East (Baghlan Takhar Badakhshan Kunduz)
... ...
1740 Matebeleland South
1741 Midlands
1742 Masvingo
1743 Harare
1744 Bulawayo
geometry iso_code_y \
0 MULTIPOLYGON (((67.61506 34.23957, 67.59259 34... AFG
1 MULTIPOLYGON (((65.23611 33.37125, 65.25839 33... AFG
2 MULTIPOLYGON (((69.92137 34.02534, 69.90879 34... AFG
3 MULTIPOLYGON (((66.38873 34.93133, 66.33110 34... AFG
4 MULTIPOLYGON (((67.39591 35.43840, 67.45535 35... AFG
... ... ...
1740 MULTIPOLYGON (((29.36831 -22.19781, 29.33401 -... ZWE
1741 MULTIPOLYGON (((29.22650 -19.48012, 29.21404 -... ZWE
1742 MULTIPOLYGON (((31.06733 -22.34189, 31.08549 -... ZWE
1743 MULTIPOLYGON (((31.08852 -17.66625, 31.11562 -... ZWE
1744 MULTIPOLYGON (((28.61305 -20.23587, 28.60440 -... ZWE
country_y year level \
0 Afghanistan 2021 Subnat
1 Afghanistan 2021 Subnat
2 Afghanistan 2021 Subnat
3 Afghanistan 2021 Subnat
4 Afghanistan 2021 Subnat
... ... ... ...
1740 Zimbabwe 2021 Subnat
1741 Zimbabwe 2021 Subnat
1742 Zimbabwe 2021 Subnat
1743 Zimbabwe 2021 Subnat
1744 Zimbabwe 2021 Subnat
region_y continent sgdi \
0 Central (Kabul Wardak Kapisa Logar Parwan Panj... Asia/Pacific 0.734
1 Central Highlands (Bamyan Daikundi) Asia/Pacific 0.704
2 East (Nangarhar Kunar Laghman Nooristan) Asia/Pacific 0.583
3 North (Samangan Sar-e-Pul Balkh Jawzjan Faryab) Asia/Pacific 0.749
4 North East (Baghlan Takhar Badakhshan Kunduz) Asia/Pacific 0.667
... ... ... ...
1740 Matebeleland South Africa 0.985
1741 Midlands Africa 0.958
1742 Masvingo Africa 0.962
1743 Harare Africa 0.962
1744 Bulawayo Africa 0.985
shdi shdif shdim healthindex healthindexf healthindexm incindex \
0 0.550 0.444 0.605 0.675 0.694 0.664 0.471
1 0.472 0.368 0.522 0.653 0.667 0.643 0.396
2 0.459 0.310 0.532 0.638 0.648 0.630 0.394
3 0.497 0.405 0.541 0.628 0.636 0.620 0.464
4 0.444 0.332 0.498 0.623 0.629 0.615 0.416
... ... ... ... ... ... ... ...
1740 0.585 0.581 0.590 0.665 0.681 0.652 0.530
1741 0.590 0.577 0.602 0.593 0.595 0.586 0.544
1742 0.588 0.578 0.601 0.650 0.664 0.639 0.520
1743 0.665 0.652 0.678 0.619 0.627 0.610 0.648
1744 0.693 0.689 0.699 0.656 0.670 0.644 0.688
incindexf incindexm edindex edindexf edindexm esch eschf \
0 0.280 0.553 0.524 0.451 0.604 12.898 10.399
1 0.217 0.472 0.408 0.343 0.468 12.447 10.958
2 0.215 0.470 0.385 0.214 0.510 9.858 6.043
3 0.274 0.546 0.420 0.382 0.467 11.456 10.004
4 0.234 0.494 0.338 0.249 0.407 9.288 7.202
... ... ... ... ... ... ... ...
1740 0.508 0.551 0.566 0.566 0.570 10.989 11.145
1741 0.522 0.566 0.635 0.618 0.659 12.668 12.575
1742 0.498 0.541 0.602 0.584 0.628 12.058 12.013
1743 0.624 0.672 0.732 0.707 0.759 13.156 12.470
1744 0.663 0.712 0.739 0.735 0.746 14.115 14.173
eschm msch mschf mschm lifexp lifexpf lifexpm gnic \
0 15.407 4.975 4.851 5.281 63.843 67.633 60.672 2264.832
1 14.155 1.857 1.168 2.256 62.415 65.829 59.326 1374.650
2 13.270 3.325 1.397 4.232 61.471 64.627 58.426 1355.298
3 12.857 3.058 3.124 3.305 60.843 63.825 57.822 2159.844
4 11.235 2.390 1.473 2.842 60.500 63.384 57.490 1571.877
... ... ... ... ... ... ... ... ...
1740 10.863 7.834 7.697 8.044 63.246 66.784 59.907 3339.052
1741 12.858 8.492 8.073 9.045 58.544 61.193 55.566 3670.709
1742 12.184 8.013 7.504 8.685 62.276 65.641 59.025 3127.485
1743 13.981 10.993 10.825 11.114 60.261 63.250 57.171 7314.452
1744 14.218 10.405 10.246 10.541 62.628 66.056 59.346 9516.362
gnicf gnicm lgnic lgnicf lgnicm pop
0 638.644 3894.162 7.725 6.459 8.267 NaN
1 420.676 2282.217 7.226 6.042 7.733 NaN
2 415.719 2247.852 7.212 6.030 7.718 NaN
3 613.795 3701.300 7.678 6.420 8.216 NaN
4 470.580 2634.320 7.360 6.154 7.876 NaN
... ... ... ... ... ... ...
1740 2886.675 3844.591 8.113 7.968 8.254 NaN
1741 3168.011 4233.422 8.208 8.061 8.351 NaN
1742 2706.948 3596.899 8.048 7.904 8.188 NaN
1743 6235.132 8537.414 8.898 8.738 9.052 NaN
1744 8073.914 11158.386 9.161 8.996 9.320 NaN
[1745 rows x 41 columns]
Code
= plt.subplots(1,1)
fig, ax ="shdi",
gdf2021.plot(column=True,
legend=ax)
ax'Distribución espacial del SHDI en 2021')
plt.title(
plt.tight_layout() plt.show()
Code
# El SHDI español en 2021
= gdf2021.query("country_x =='Spain'").reset_index(drop=True)
gdf2021Spain = plt.subplots(1,1)
fig, ax ="shdi",
gdf2021Spain.plot(column=True,
legend=ax)
ax'Distribución del SHDI español en 2021')
plt.title(
plt.tight_layout() plt.show()
Code
# Gráfica sólo con las regiones peninsulares
= gdf2021Spain.drop(labels=[13,16])
gdf2021Spain_2 = plt.subplots(1,1)
fig, ax ="shdi",
gdf2021Spain_2.plot(column=True,
legend=ax)
ax'Distribución del SHDI peninsular español en 2021')
plt.title(
plt.tight_layout() plt.show()