Code from Week 1

Due by 11:59 PM on Sunday, December 31, 0000

Course Packet

my_packages <- c("tidyverse", "fs", "devtools")
install.packages(my_packages)

devtools::install_github("kjhealy/socviz")
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0           ✔ purrr   0.3.0      
## ✔ tibble  2.0.99.9000     ✔ dplyr   0.8.0.9000 
## ✔ tidyr   0.8.3           ✔ stringr 1.4.0      
## ✔ readr   1.3.1           ✔ forcats 0.4.0
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()  masks stats::filter()
## ✖ purrr::is_null() masks testthat::is_null()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ dplyr::matches() masks testthat::matches()
library(socviz)
## 
## Attaching package: 'socviz'
## The following object is masked from 'package:kjhutils':
## 
##     %nin%

To extract the course packet to your desktop:

setup_course_notes()

First steps

library(gapminder)

gapminder
## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # … with 1,694 more rows
p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y = lifeExp))

p

p + geom_point() 

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y=lifeExp))
p + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Life Expectancy vs GDP, using a smoother.

Figure 1: Life Expectancy vs GDP, using a smoother.

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y=lifeExp))
p + geom_point() + geom_smooth(method = "lm") 
Life Expectancy vs GDP, points and an ill-advised linear fit.

Figure 2: Life Expectancy vs GDP, points and an ill-advised linear fit.

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y=lifeExp))
p + geom_point() +
    geom_smooth(method = "gam") +
    scale_x_log10()
Life Expectancy vs GDP scatterplot, with a GAM smoother and a log scale on the x-axis.

Figure 3: Life Expectancy vs GDP scatterplot, with a GAM smoother and a log scale on the x-axis.

Mapping aesthetics vs setting them

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y = lifeExp,
                          color = "purple"))
p + geom_point() +
    geom_smooth(method = "loess") +
    scale_x_log10()
What has gone wrong here?

Figure 4: What has gone wrong here?

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y = lifeExp))
p + geom_point(color = "purple") +
    geom_smooth(method = "loess") +
    scale_x_log10()
Setting the color attribute of the points directly.

Figure 5: Setting the color attribute of the points directly.

p <- ggplot(data = gapminder, mapping = aes(x = gdpPercap, y=lifeExp))
p + geom_point(alpha = 0.3) + geom_smooth(method = "gam") +
    scale_x_log10(labels = scales::dollar) +
    labs(x = "GDP Per Capita", y = "Life Expectancy in Years",
         title = "Economic Growth and Life Expectancy",
         subtitle = "Data points are country-years",
         caption = "Source: Gapminder.")
A more polished plot of Life Expectancy vs GDP.

Figure 6: A more polished plot of Life Expectancy vs GDP.

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y = lifeExp,
                          color = continent))
p + geom_point() +
    geom_smooth(method = "loess") +
    scale_x_log10()
Mapping the continent variable to the color aesthetic.

Figure 7: Mapping the continent variable to the color aesthetic.

p <- ggplot(data = gapminder,
            mapping = aes(x = gdpPercap,
                          y = lifeExp,
                          color = continent,
                          fill = continent))
p + geom_point() +
    geom_smooth(method = "loess") +
    scale_x_log10()
Mapping the continent variable to the color aesthetic, and correcting the error bars using the fill aesthetic.

Figure 8: Mapping the continent variable to the color aesthetic, and correcting the error bars using the fill aesthetic.

Aesthetics can be mapped per geom

p <- ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp))
p + geom_point(mapping = aes(color = continent)) +
    geom_smooth(method = "loess") +
    scale_x_log10()
Mapping aesthetics on a per-geom basis. Here color is mapped to continent for the points but not the smoother.

Figure 9: Mapping aesthetics on a per-geom basis. Here color is mapped to continent for the points but not the smoother.