Excerpt from the Gapminder data. The main object in this package is the gapminder data frame or “tibble”.

Installation

Install gapminder from CRAN:

install.packages("gapminder")

Or you can install gapminder from GitHub:

devtools::install_github("jennybc/gapminder")

DATA

gapminder_df <- readr::read_csv(here::here("data", "raw", "gapminder.csv"))
## Parsed with column specification:
## cols(
##   country = col_character(),
##   continent = col_character(),
##   year = col_integer(),
##   lifeExp = col_double(),
##   pop = col_integer(),
##   gdpPercap = col_double()
## )
DT::datatable(gapminder_df, caption = "gapminder dataset sourced from the gapminder r package")

Visual summary of the data

library(skimr)
skim(gapminder_df)
## Skim summary statistics
##  n obs: 1704 
##  n variables: 6 
## 
## Variable type: character 
##   variable missing complete    n min max empty n_unique
##  continent       0     1704 1704   4   8     0        5
##    country       0     1704 1704   4  24     0      142
## 
## Variable type: integer 
##  variable missing complete    n    mean       sd    p0        p25  median
##       pop       0     1704 1704 3e+07    1.1e+08 60011 2793664    7e+06  
##      year       0     1704 1704  1979.5 17.27     1952    1965.75  1979.5
##       p75       p100     hist
##  2e+07       1.3e+09 ▇▁▁▁▁▁▁▁
##   1993.25 2007       ▇▃▇▃▃▇▃▇
## 
## Variable type: numeric 
##   variable missing complete    n    mean      sd     p0     p25  median
##  gdpPercap       0     1704 1704 7215.33 9857.45 241.17 1202.06 3531.85
##    lifeExp       0     1704 1704   59.47   12.92  23.6    48.2    60.71
##      p75      p100     hist
##  9325.46 113523.13 ▇▁▁▁▁▁▁▁
##    70.85     82.6  ▁▂▅▅▅▅▇▃

Relationship between GDP per capita and Life Expectancy

Linear model of Life expectancy as a function of GDP per capita

library(broom) # great for extracting model parameter summaries
lm(lifeExp ~ gdpPercap, data = gapminder_df) %>% broom::glance()
##   r.squared adj.r.squared    sigma statistic       p.value df    logLik
## 1  0.340713     0.3403256 10.49132  879.5766 3.565724e-156  2 -6422.205
##        AIC      BIC deviance df.residual
## 1 12850.41 12866.73 187335.3        1702
library(ggplot2)

gapminder_df %>% ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
    geom_point() + scale_x_log10() 

Linear model of Life expectancy as a function of GDP per capita

library(broom) # great for extracting model parameter summaries
lm(lifeExp ~ log10(gdpPercap), data = gapminder_df) %>% broom::glance()
##   r.squared adj.r.squared    sigma statistic p.value df   logLik      AIC
## 1 0.6522466     0.6520423 7.619535  3192.273       0  2 -5877.21 11760.42
##        BIC deviance df.residual
## 1 11776.74 98813.56        1702
meta_tbl <- readr::read_csv(here::here("data", "metadata", "gapminder_meta.csv"))

Add smooth lines

library(ggplot2)

gapminder_df %>% ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
    geom_point() + scale_x_log10() + geom_smooth()
## `geom_smooth()` using method = 'loess'