## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE--------------------------------------------------- library(dtplyr) library(data.table) library(dplyr) ## ----------------------------------------------------------------------------- df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5) dt <- lazy_dt(df) ## ----------------------------------------------------------------------------- dt ## ----------------------------------------------------------------------------- dt %>% show_query() ## ----------------------------------------------------------------------------- dt %>% arrange(a, b, c) %>% show_query() dt %>% filter(b == c) %>% show_query() dt %>% filter(b == c, c == d) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% select(a:b) %>% show_query() dt %>% summarise(a = mean(a)) %>% show_query() dt %>% transmute(a2 = a * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% transmute(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% rename(x = a, y = b) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% distinct() %>% show_query() dt %>% distinct(a, b) %>% show_query() dt %>% distinct(a, b, .keep_all = TRUE) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% distinct(c = a + b) %>% show_query() dt %>% distinct(c = a + b, .keep_all = TRUE) %>% show_query() ## ----------------------------------------------------------------------------- dt2 <- lazy_dt(data.frame(a = 1)) dt %>% inner_join(dt2, by = "a") %>% show_query() dt %>% right_join(dt2, by = "a") %>% show_query() dt %>% left_join(dt2, by = "a") %>% show_query() dt %>% anti_join(dt2, by = "a") %>% show_query() ## ----------------------------------------------------------------------------- dt %>% full_join(dt2, by = "a") %>% show_query() ## ----------------------------------------------------------------------------- dt3 <- lazy_dt(data.frame(b = 1, a = 1)) dt %>% left_join(dt3, by = "a") %>% show_query() dt %>% full_join(dt3, by = "b") %>% show_query() ## ----------------------------------------------------------------------------- dt %>% semi_join(dt2, by = "a") %>% show_query() ## ----------------------------------------------------------------------------- dt %>% intersect(dt2) %>% show_query() dt %>% setdiff(dt2) %>% show_query() dt %>% union(dt2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a, arrange = FALSE) %>% summarise(b = mean(b)) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a) %>% filter(b < mean(b)) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% filter(a == 1) %>% select(-a) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a) %>% filter(b < mean(b)) %>% summarise(c = max(c)) %>% show_query() ## ----------------------------------------------------------------------------- dt3 <- lazy_dt(data.frame(x = 1, y = 2)) dt4 <- lazy_dt(data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 7)) dt3 %>% left_join(dt4) %>% select(x, a:c) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% select(X = a, Y = b) %>% filter(X == 1) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% filter(a == 1) %>% mutate(b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% filter(x == 1) %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt2 <- data.table(a = 1:10) dt_inplace <- lazy_dt(dt2, immutable = FALSE) dt_inplace %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- bench::mark( filter = dt %>% filter(a == b, c == d), mutate = dt %>% mutate(a = a * 2, a4 = a2 * 2, a8 = a4 * 2) %>% show_query(), summarise = dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query(), check = FALSE )[1:6]