## ----include = FALSE---------------------------------------------------------- clean_output <- function(x, options) { x <- gsub("0x[0-9a-f]+", "0xdeadbeef", x) x <- gsub("dataframe_[0-9]*_[0-9]*", " dataframe_42_42 ", x) x <- gsub("[0-9]*\\.___row_number ASC", "42.___row_number ASC", x) x <- gsub("─", "-", x) x } local({ hook_source <- knitr::knit_hooks$get("document") knitr::knit_hooks$set(document = clean_output) }) knitr::opts_chunk$set( collapse = TRUE, eval = identical(Sys.getenv("IN_PKGDOWN"), "true") || (getRversion() >= "4.1" && rlang::is_installed(c("conflicted", "dbplyr", "nycflights13", "callr")) && duckplyr:::can_load_extension("httpfs")), comment = "#>" ) options(conflicts.policy = list(warn = FALSE)) Sys.setenv(DUCKPLYR_FALLBACK_COLLECT = 0) ## ----attach------------------------------------------------------------------- library(conflicted) library(duckplyr) conflict_prefer("filter", "dplyr") ## ----------------------------------------------------------------------------- df <- duckdb_tibble(x = 1:3, y = letters[1:3]) df ## ----------------------------------------------------------------------------- flights_df() |> as_duckdb_tibble() ## ----------------------------------------------------------------------------- path_duckdb <- tempfile(fileext = ".duckdb") con <- DBI::dbConnect(duckdb::duckdb(path_duckdb)) DBI::dbWriteTable(con, "data", data.frame(x = 1:3, y = letters[1:3])) dbplyr_data <- tbl(con, "data") dbplyr_data dbplyr_data |> explain() ## ----------------------------------------------------------------------------- dbplyr_data |> as_duckdb_tibble() dbplyr_data |> as_duckdb_tibble() |> explain() ## ----------------------------------------------------------------------------- DBI::dbDisconnect(con) ## ----error = TRUE------------------------------------------------------------- try({ duckdb_tibble(a = 1) |> group_by(a) |> as_duckdb_tibble() duckdb_tibble(a = 1) |> rowwise() |> as_duckdb_tibble() readr::read_csv("a\n1", show_col_types = FALSE) |> as_duckdb_tibble() }) ## ----------------------------------------------------------------------------- path_csv_1 <- tempfile(fileext = ".csv") writeLines("x,y\n1,a\n2,b\n3,c", path_csv_1) read_csv_duckdb(path_csv_1) ## ----------------------------------------------------------------------------- path_csv_2 <- tempfile(fileext = ".csv") writeLines("x,y\n4,d\n5,e\n6,f", path_csv_2) read_csv_duckdb(c(path_csv_1, path_csv_2)) ## ----------------------------------------------------------------------------- db_exec("INSTALL httpfs") db_exec("LOAD httpfs") ## ----------------------------------------------------------------------------- url <- "https://blobs.duckdb.org/flight-data-partitioned/Year=2024/data_0.parquet" flights_parquet <- read_parquet_duckdb(url) flights_parquet ## ----------------------------------------------------------------------------- sql_attach <- paste0( "ATTACH DATABASE '", path_duckdb, "' AS external (READ_ONLY)" ) db_exec(sql_attach) ## ----------------------------------------------------------------------------- read_sql_duckdb("SELECT * FROM external.data") ## ----------------------------------------------------------------------------- simple_data <- duckdb_tibble(a = 1) |> mutate(b = 2) simple_data |> explain() simple_data_computed <- simple_data |> compute() ## ----------------------------------------------------------------------------- simple_data_computed |> explain() ## ----------------------------------------------------------------------------- duckdb_tibble(a = 1) |> mutate(b = 2) |> collect() ## ----------------------------------------------------------------------------- path_csv_out <- tempfile(fileext = ".csv") duckdb_tibble(a = 1) |> mutate(b = 2) |> compute_csv(path_csv_out) writeLines(readLines(path_csv_out)) ## ----------------------------------------------------------------------------- path_parquet_out <- tempfile(fileext = ".parquet") duckdb_tibble(a = 1) |> mutate(b = 2) |> compute_parquet(path_parquet_out) |> explain() ## ----------------------------------------------------------------------------- read_sql_duckdb("SELECT current_setting('memory_limit') AS memlimit") db_exec("PRAGMA memory_limit = '1GB'") read_sql_duckdb("SELECT current_setting('memory_limit') AS memlimit") ## ----error = TRUE------------------------------------------------------------- try({ flights_parquet |> group_by(Month) }) ## ----------------------------------------------------------------------------- flights_parquet |> count(Month, DayofMonth) |> group_by(Month)