Upgrade to Pro — share decks privately, control downloads, hide ads and more …

90thTokyo.R

Sponsored · Your Podcast. Everywhere. Effortlessly. Share. Educate. Inspire. Entertain. You do you. We'll handle the rest.
Avatar for kilometer kilometer
March 06, 2021

 90thTokyo.R

Introduction of nested data handling using purrr package (90th Tokyo.R #TokyoR).

Avatar for kilometer

kilometer

March 06, 2021
Tweet

More Decks by kilometer

Other Decks in Programming

Transcript

  1. BeginneR Advanced Hoxo_m If I have seen further it is

    by standing on the shoulders of Giants. -- Sir Isaac Newton, 1676
  2. 1JQFBMHFCSB X %>% f X %>% f(y) X %>% f

    %>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} 「dplyr再⼊⾨(基本編)」yutanihilation https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-ji-ben-bian
  3. data.frame Long Wide Nested plot Figures Data table read_csv write_csv

    pivot_longer pivot_wider group_nest unnest ggplot ggsave wrap_plots map
  4. ?data.frame data.frame( x = c(1:3), y = letters[1:3], z =

    seq(3, 5, by = 1)) ## x y z ## 1 1 a 3 ## 2 2 b 4 ## 3 3 c 5
  5. ?data.frame data.frame( x = c(1:3), y = letters[1:3], z =

    seq(3, 5, by = 1)) ## x y z ## 1 1 a 3 ## 2 2 b 4 ## 3 3 c 5 observation variable
  6. ?data.frame a <- data.frame( x = c(1:3), y = letters[1:3],

    z = seq(3, 5, by = 1)) a$x ## [1] 1 2 3
  7. ?data.frame a %>% mutate(new = x + 1) a %>%

    mutate(new = x + y) ## x y z new ## 1 1 a 3 2 ## 2 2 b 4 3 ## 3 3 c 5 4 ## x y z new ## 1 1 a 3 4 ## 2 2 b 4 6 ## 3 3 c 5 8
  8. data.frame Long Wide Nested pivot_longer pivot_wider group_nest unnest map mutate

    filter select rename summarize Verbs “Data Manipula,on in R with dplyr” Griesemer J. 2019 library(tidyverse)
  9. library(palmerpenguins) penguins %>% head() # A tibble: 6 x 8

    species island bill_length_mm bill_depth_mm flipper_length_… <fct> <fct> <dbl> <dbl> <int> 1 Adelie Torge… 39.1 18.7 181 2 Adelie Torge… 39.5 17.4 186 3 Adelie Torge… 40.3 18 195 4 Adelie Torge… NA NA NA 5 Adelie Torge… 36.7 19.3 193 6 Adelie Torge… 39.3 20.6 190 # … with 3 more variables: body_mass_g <int>, sex <fct>, # year <int> Artwork by @allison_horst
  10. ggplot(data = penguins) + aes(x = body_mass_g, y = bill_length_mm,

    color = species) + geom_point() + geom_smooth(method = “lm”, se = F)
  11. penguins_xy <- penguins %>% mutate(x = body_mass_g, y = bill_length_mm)

    penguins_xy %>% filter(species == “Adelie”) %>% lm(y ~ x, data = .) Call: lm(formula = y ~ x, data = .) Coefficients: (Intercept) x 26.994139 0.003188
  12. penguins_xy %>% filter(species == “Adelie”) %>% lm(y ~ x, data

    = .) %>% summary() Call: lm(formula = y ~ x, data = .) Residuals: Min 1Q Median 3Q Max -6.4208 -1.3690 0.1874 1.4825 5.6168 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 2.699e+01 1.483e+00 18.201 < 2e-16 *** x 3.188e-03 3.977e-04 8.015 2.95e-13 *** --- Residual standard error: 2.234 on 149 degrees of freedom (1 observation deleted due to missingness) Multiple R-squared: 0.3013, Adjusted R-squared: 0.2966 F-statistic: 64.24 on 1 and 149 DF, p-value: 2.955e-13
  13. penguins_xy %>% filter(species == “Adelie”) %>% lm(y ~ x, data

    = .) %>% summary() penguins_xy %>% filter(species == “Chinstrap”) %>% lm(y ~ x, data = .) %>% summary() penguins_xy %>% filter(species == “Gentoo”) %>% lm(y ~ x, data = .) %>% summary()
  14. penguins_xy %>% group_nest(species) # A tibble: 3 x 2 species

    data <fct> <list<tbl_df[,9]>> 1 Adelie [152 × 9] 2 Chinstrap [68 × 9] 3 Gentoo [124 × 9]
  15. penguins_xy %>% group_nest(species) # A tibble: 3 x 2 species

    data <fct> <list<tbl_df[,9]>> 1 Adelie [152 × 9] 2 Chinstrap [68 × 9] 3 Gentoo [124 × 9] penguins_xy %>% group_nest(species, island) # A tibble: 5 x 3 species island data <fct> <fct> <list<tbl_df[,8]>> 1 Adelie Biscoe [44 × 8] 2 Adelie Dream [56 × 8] 3 Adelie Torgersen [52 × 8] 4 Chinstrap Dream [68 × 8] 5 Gentoo Biscoe [124 × 8]
  16. penguins_xy %>% group_nest(species) # A tibble: 3 x 2 species

    data <fct> <list<tbl_df[,9]>> 1 Adelie [152 × 9] 2 Chinstrap [68 × 9] 3 Gentoo [124 × 9] penguins_xy %>% group_nest(species) %>% .$data %>% .[[1]] # A tibble: 152 x 9 island bill_length_mm bill_depth_mm flipper_length_… <fct> <dbl> <dbl> <int> 1 Torge… 39.1 18.7 181 2 Torge… 39.5 17.4 186 3 Torge… 40.3 18 195 4 Torge… NA NA NA 5 Torge… 36.7 19.3 193 6 Torge… 39.3 20.6 190
  17. penguins_lm <- penguins_xy %>% group_nest(species) %>% mutate(fit = map(data, ~

    lm(y ~ x, data = .)), summary = map(fit, summary)) # A tibble: 3 x 4 species data fit summary <fct> <list<tbl_df[,9]>> <list> <list> 1 Adelie [152 × 9] <lm> <smmry.lm> 2 Chinstrap [68 × 9] <lm> <smmry.lm> 3 Gentoo [124 × 9] <lm> <smmry.lm>
  18. penguins_xy %>% filter(species == “Adelie”) %>% lm(y ~ x, data

    = .) %>% summary() penguins_lm <- penguins_xy %>% group_nest(species) %>% mutate(fit = map(data, ~ lm(y ~ x, data = .)), summary = map(fit, summary)) # A tibble: 3 x 4 species data fit summary <fct> <list<tbl_df[,9]>> <list> <list> 1 Adelie [152 × 9] <lm> <smmry.lm> 2 Chinstrap [68 × 9] <lm> <smmry.lm> 3 Gentoo [124 × 9] <lm> <smmry.lm>
  19. penguins_lm <- penguins_xy %>% group_nest(species) %>% mutate(fit = map(data, ~

    lm(y ~ x, data = .)), summary = map(fit, summary)) penguins_lm$summary[[1]] Call: lm(formula = y ~ x, data = .) Residuals: Min 1Q Median 3Q Max -6.4208 -1.3690 0.1874 1.4825 5.6168 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 2.699e+01 1.483e+00 18.201 < 2e-16 *** x 3.188e-03 3.977e-04 8.015 2.95e-13 ***
  20. penguins_lm <- penguins_xy %>% group_nest(species) %>% mutate(fit = map(data, ~

    lm(y ~ x, data = .)), summary = map(fit, summary), a = map_dbl(fit, ~ .$coefficients[2]), R2 = map_dbl(summary, ~ .$r.squared)) # A tibble: 3 x 6 species data fit summary a R2 <fct> <list<tbl_df[,9]>> <list> <list> <dbl> <dbl> 1 Adelie [152 × 9] <lm> <smmry.lm> 0.00319 0.301 2 Chinstrap [68 × 9] <lm> <smmry.lm> 0.00446 0.264 3 Gentoo [124 × 9] <lm> <smmry.lm> 0.00409 0.448 map_dbl(), map_chr() map_dfc(), map_dfr() Wrapper functions
  21. ?map dat <- 1:4 f(num = dat) f <- function(num){

    num * 4 } [1] 4 8 12 16 dat <- list(1:4, 7:4) f(num = dat) Error in num*4 : non-numeric argument to binary operator
  22. ?map dat <- list(1:4, 7:4) f <- function(num){ num *

    4 } f(num = dat) map(.x = dat, .f = f) [[1]] [1] 4 8 12 16 [[2]] [1] 28 24 20 16
  23. ?map f <- function(num){ num * 4 } result <-

    NULL for(i in 1:length(dat)){ result[[i]] <- f(dat[[i]]) } by using for dat <- list(1:4, 7:4) map(.x = dat, .f = f)
  24. ?map dat <- list(1:4, 7:4) map(.x = dat, .f =

    f) f <- function(num){ num * 4 } map(dat, f) map(.x = dat, ~ f(num = .x)) map(.x = dat, function(num){num * 4}) map(dat, ~ {.x * 4}) map(dat, ~ {. * 4})
  25. group_nest -> mutate -> map penguins_xy %>% group_nest(species) %>% mutate(fit

    = map(data, ~ lm(y ~ x, data = .)) # A tibble: 3 x 4 species data fit <fct> <list<tbl_df[,9]>> <list> 1 Adelie [152 × 9] -> lm(y ~ x) -> <lm> 2 Chinstrap [68 × 9] -> lm(y ~ x) -> <lm> 3 Gentoo [124 × 9] -> lm(y ~ x) -> <lm>
  26. group_nest -> mutate -> map penguins_xy %>% group_nest(species) %>% mutate(fit

    = map(data, ~ lm(y ~ x, data = .)) penguins_xy %>% group_nest(species) %>% mutate(fit = map(data, function(dat){ lm(y ~ x, data = dat)}) f <- function(dat){ lm(y ~ x, data = dat } penguins_xy %>% group_nest(species) %>% mutate(fit = map(data, f))
  27. data.frame Long Wide Nested plot Figures Data table read_csv write_csv

    pivot_longer pivot_wider group_nest unnest ggplot ggsave wrap_plots map