## ----include = FALSE---------------------------------------------------------- clean_output <- function(x, options) { x <- gsub("0x[0-9a-f]+", "0xdeadbeef", x) x <- gsub("dataframe_[0-9]*_[0-9]*", " dataframe_42_42 ", x) x <- gsub("[0-9]*\\.___row_number ASC", "42.___row_number ASC", x) x <- gsub("─", "-", x) x } local({ hook_source <- knitr::knit_hooks$get("document") knitr::knit_hooks$set(document = clean_output) }) knitr::opts_chunk$set( collapse = TRUE, eval = identical(Sys.getenv("IN_PKGDOWN"), "true") || (getRversion() >= "4.1" && rlang::is_installed(c("conflicted", "nycflights13", "lubridate"))), comment = "#>" ) Sys.setenv(DUCKPLYR_FALLBACK_COLLECT = 0) ## ----attach------------------------------------------------------------------- library(conflicted) library(dplyr) conflict_prefer("filter", "dplyr") conflict_prefer("lag", "dplyr") ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble( logical = TRUE, integer = 1L, numeric = 1.1, character = "a", Date = as.Date("2025-01-11"), POSIXct = as.POSIXct("2025-01-11 19:23:00", tz = "UTC"), difftime = as.difftime(1, units = "secs"), ) |> compute() ## ----error = TRUE------------------------------------------------------------- try({ duckplyr::duckdb_tibble() duckplyr::duckdb_tibble(a = 1, .prudence = "stingy") |> select(-a) }) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1, b = 2, c = 3, .prudence = "stingy") |> mutate((a + b) * c) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble( a = c(1, 2, NA), b = c(2, NA, 3), c = c(NA, 3, 4), .prudence = "stingy" ) |> mutate(a > b, b != c, c < a, a >= b, b <= c) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1, b = 2, c = 3, .prudence = "stingy") |> mutate(a + b, a / b, a - b, a * b) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1, b = 2, c = -3, .prudence = "stingy") |> mutate(log10(a), log(b), abs(c)) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = FALSE, b = TRUE, c = NA, .prudence = "stingy") |> mutate(!a, a & b, b | c) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1, b = NA, .prudence = "stingy") |> mutate(is.na(b), if_else(is.na(b), 0, 1), as.integer(b)) duckplyr::duckdb_tibble( a = as.POSIXct("2025-01-11 19:23:46", tz = "UTC"), .prudence = "stingy") |> mutate(strftime(a, "%H:%M:%S")) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = "abbc", .prudence = "stingy") |> mutate(grepl("b", a), substr(a, 2L, 3L), sub("b", "B", a), gsub("b", "B", a)) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble( a = as.POSIXct("2025-01-11 19:23:46", tz = "UTC"), .prudence = "stingy" ) |> mutate( hour = lubridate::hour(a), minute = lubridate::minute(a), second = lubridate::second(a), wday = lubridate::wday(a) ) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1:3, b = c(1, 2, 2), .prudence = "stingy") |> summarize( sum(a), n(), n_distinct(b), ) duckplyr::duckdb_tibble(a = 1:3, b = c(1, 2, NA), .prudence = "stingy") |> summarize( mean(b, na.rm = TRUE), median(a), sd(b), ) duckplyr::duckdb_tibble(a = 1:3, .prudence = "stingy") |> summarize( min(a), max(a), any(a > 1), all(a > 1), ) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1:3, .prudence = "stingy") |> mutate(lag(a), lead(a)) duckplyr::duckdb_tibble(a = 1:3, .prudence = "stingy") |> mutate(lag(a, 2), lead(a, n = 2)) duckplyr::duckdb_tibble(a = 1:3, .prudence = "stingy") |> mutate(lag(a, default = 0), lead(a, default = 4)) duckplyr::duckdb_tibble(a = 1:3, b = c(2, 3, 1), .prudence = "stingy") |> mutate(lag(a, order_by = b), lead(a, order_by = b)) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = c(1, 2, 2, 3), .prudence = "stingy") |> mutate(row_number()) ## ----------------------------------------------------------------------------- b <- 4 duckplyr::duckdb_tibble(a = 1, b = 2, .prudence = "stingy") |> mutate(.data$a + .data$b, .env$b) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1:3, .prudence = "stingy") |> mutate(a %in% c(1, 3)) |> collect() duckplyr::last_rel() ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1:3, .prudence = "stingy") |> arrange(desc(a)) |> explain() ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1, .prudence = "stingy") |> mutate(suppressWarnings(a + 1)) ## ----------------------------------------------------------------------------- duckplyr::flights_df() |> duckplyr::as_duckdb_tibble() |> distinct(day) |> summarize(paste(day, collapse = " ")) # fallback duckplyr::flights_df() |> distinct(day) |> summarize(paste(day, collapse = " ")) ## ----------------------------------------------------------------------------- duckplyr::flights_df() |> duckplyr::as_duckdb_tibble() |> distinct(day) |> explain() withr::with_envvar( c(DUCKPLYR_OUTPUT_ORDER = "TRUE"), duckplyr::flights_df() |> duckplyr::as_duckdb_tibble() |> distinct(day) |> explain() ) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = 1:100) |> summarize(sum(a)) duckplyr::duckdb_tibble(a = 1:1000000) |> summarize(sum(a)) tibble(a = 1:100) |> summarize(sum(a)) tibble(a = 1:1000000) |> summarize(sum(a)) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = integer(), b = logical()) |> summarize(sum(a), any(b), all(b), min(a), max(a)) tibble(a = integer(), b = logical()) |> summarize(sum(a), any(b), all(b), min(a), max(a)) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = c(TRUE, FALSE)) |> summarize(min(a), max(a)) tibble(a = c(TRUE, FALSE)) |> summarize(min(a), max(a)) ## ----error = TRUE------------------------------------------------------------- try({ duckplyr::duckdb_tibble(a = c(1, 2, NA, 1)) |> summarize(n_distinct(a)) duckplyr::duckdb_tibble(a = c(1, 2, NA, 1), .prudence = "stingy") |> summarize(n_distinct(a, na.rm = TRUE)) tibble(a = c(1, 2, NA, 1)) |> summarize(n_distinct(a)) tibble(a = c(1, 2, NA, 1)) |> summarize(n_distinct(a, na.rm = TRUE)) }) ## ----------------------------------------------------------------------------- duckplyr::duckdb_tibble(a = c(NA, NaN)) |> mutate(is.na(a)) tibble(a = c(NA, NaN)) |> mutate(is.na(a))