database best practices · Issue #46 · ropensci-books/drake (original) (raw)
Thanks so much for this reprex, Amanda. I think your example ran smoothly because make()
does not actually attempt to reload the connection from the cache. Below, I force it to happen (note the "load 2 items: this_con, seeded" message).
library(drake) library(DBI) library(tidyverse) pkgconfig::set_config("drake::strings_in_dots" = "literals")
Funs
connect_to_db <- function() { dbConnect(RSQLite::SQLite(), "db") }
seed_db <- function(conn) { dbWriteTable(conn, "mtcars", mtcars, overwrite = TRUE) }
get_res <- function(conn) { dbGetQuery(conn, "SELECT * FROM mtcars") %>% as_tibble() }
update_db_records <- function(tbl, conn) { new <- tbl %>% map_dfr(log)
dbWriteTable(conn, "mtcars", new, append = FALSE, overwrite = TRUE ) }
test_is_original <- function() { testthat::expect_equal( mtcars %>% as_tibble(), get_res(con) ) }
test_is_log <- function() { testthat::expect_equal( mtcars %>% map_dfr(log), get_res(con) ) }
Create db and write mtcars table
con <- connect_to_db() seed_db(con) test_is_original()
Connection for updating is a target in the plan
plan <- drake_plan( this_con = connect_to_db(), seeded = get_res(this_con), processed_data = update_db_records(seeded, this_con) )
Do the planned database work.
make(plan, verbose = 4) #> cache /tmp/RtmpCzTQKd/reprex2c0718c63f5/.drake #> analyze environment #> analyze 8 imports: seed_db, test_is_original, update_db_records, test_is... #> analyze 3 targets: this_con, seeded, processed_data #> construct graph edges #> construct vertex attributes #> construct graph #> import get_res #> import update_db_records #> import RSQLite::SQLite #> import connect_to_db #> target this_con #> store this_con #> target seeded #> store seeded #> target processed_data #> store processed_data
Just remove process_data
so the next make() loads the connection from the cache.
clean(processed_data)
make(plan, verbose = 4)
#> cache /tmp/RtmpCzTQKd/reprex2c0718c63f5/.drake
#> analyze environment
#> analyze 8 imports: seed_db, test_is_original, update_db_records, test_is...
#> import get_res
#> import update_db_records
#> import RSQLite::SQLite
#> import connect_to_db
#> skip this_con
#> skip seeded
#> load 2 items: this_con, seeded
#> target processed_data
#> fail processed_data
#> Error: Target processed_data
failed. Call diagnose(processed_data)
for details. Error message:
#> external pointer is not valid
Created on 2018-10-17 by the reprex package (v0.2.1)
Interestingly, this is not a problem if you use an in-memory cache.
library(drake) library(DBI) library(tidyverse) pkgconfig::set_config("drake::strings_in_dots" = "literals")
Funs
connect_to_db <- function() { dbConnect(RSQLite::SQLite(), "db") }
seed_db <- function(conn) { dbWriteTable(conn, "mtcars", mtcars, overwrite = TRUE) }
get_res <- function(conn) { dbGetQuery(conn, "SELECT * FROM mtcars") %>% as_tibble() }
update_db_records <- function(tbl, conn) { new <- tbl %>% map_dfr(log)
dbWriteTable(conn, "mtcars", new, append = FALSE, overwrite = TRUE ) }
test_is_original <- function() { testthat::expect_equal( mtcars %>% as_tibble(), get_res(con) ) }
test_is_log <- function() { testthat::expect_equal( mtcars %>% map_dfr(log), get_res(con) ) }
Create db and write mtcars table
con <- connect_to_db() seed_db(con) test_is_original()
Connection for updating is a target in the plan
plan <- drake_plan( this_con = connect_to_db(), seeded = get_res(this_con), processed_data = update_db_records(seeded, this_con) )
Do the planned database work.
cache <- storr::storr_environment() make(plan, verbose = 4, cache = cache) #> analyze environment #> analyze 9 imports: seed_db, test_is_original, update_db_records, cache, ... #> analyze 3 targets: this_con, seeded, processed_data #> construct graph edges #> construct vertex attributes #> construct graph #> import get_res #> import update_db_records #> import RSQLite::SQLite #> import connect_to_db #> target this_con #> store this_con #> target seeded #> store seeded #> target processed_data #> store processed_data
Just remove process_data
so the next make() loads the connection from the cache.
clean(processed_data, cache = cache) make(plan, verbose = 4, cache = cache) #> analyze environment #> analyze 9 imports: seed_db, test_is_original, update_db_records, cache, ... #> import get_res #> import update_db_records #> import RSQLite::SQLite #> import connect_to_db #> skip this_con #> skip seeded #> load 2 items: this_con, seeded #> target processed_data #> store processed_data
Created on 2018-10-17 by the reprex package (v0.2.1)