Excerpt from the Gapminder data. The main object in this package is the gapminder
data frame or “tibble”.
Install gapminder
from CRAN:
install.packages("gapminder")
Or you can install gapminder
from GitHub:
devtools::install_github("jennybc/gapminder")
gapminder_df <- readr::read_csv(here::here("data", "raw", "gapminder.csv"))
## Parsed with column specification:
## cols(
## country = col_character(),
## continent = col_character(),
## year = col_integer(),
## lifeExp = col_double(),
## pop = col_integer(),
## gdpPercap = col_double()
## )
DT::datatable(gapminder_df, caption = "gapminder dataset sourced from the gapminder r package")
library(skimr)
skim(gapminder_df)
## Skim summary statistics
## n obs: 1704
## n variables: 6
##
## Variable type: character
## variable missing complete n min max empty n_unique
## continent 0 1704 1704 4 8 0 5
## country 0 1704 1704 4 24 0 142
##
## Variable type: integer
## variable missing complete n mean sd p0 p25 median
## pop 0 1704 1704 3e+07 1.1e+08 60011 2793664 7e+06
## year 0 1704 1704 1979.5 17.27 1952 1965.75 1979.5
## p75 p100 hist
## 2e+07 1.3e+09 ▇▁▁▁▁▁▁▁
## 1993.25 2007 ▇▃▇▃▃▇▃▇
##
## Variable type: numeric
## variable missing complete n mean sd p0 p25 median
## gdpPercap 0 1704 1704 7215.33 9857.45 241.17 1202.06 3531.85
## lifeExp 0 1704 1704 59.47 12.92 23.6 48.2 60.71
## p75 p100 hist
## 9325.46 113523.13 ▇▁▁▁▁▁▁▁
## 70.85 82.6 ▁▂▅▅▅▅▇▃
library(broom) # great for extracting model parameter summaries
lm(lifeExp ~ gdpPercap, data = gapminder_df) %>% broom::glance()
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.340713 0.3403256 10.49132 879.5766 3.565724e-156 2 -6422.205
## AIC BIC deviance df.residual
## 1 12850.41 12866.73 187335.3 1702
library(ggplot2)
gapminder_df %>% ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() + scale_x_log10()
library(broom) # great for extracting model parameter summaries
lm(lifeExp ~ log10(gdpPercap), data = gapminder_df) %>% broom::glance()
## r.squared adj.r.squared sigma statistic p.value df logLik AIC
## 1 0.6522466 0.6520423 7.619535 3192.273 0 2 -5877.21 11760.42
## BIC deviance df.residual
## 1 11776.74 98813.56 1702
meta_tbl <- readr::read_csv(here::here("data", "metadata", "gapminder_meta.csv"))
library(ggplot2)
gapminder_df %>% ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() + scale_x_log10() + geom_smooth()
## `geom_smooth()` using method = 'loess'