From d5103dccd2a9e9dd259cd8c2852ae34aa13dd626 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 10 Aug 2024 20:26:20 -0700 Subject: [PATCH 01/11] Add extended tests + CI job to run those --- dev/tasks/r/github.linux.extra.packages.yml | 51 +++++++++++++++ dev/tasks/tasks.yml | 4 ++ .../testthat/test-extra-package-roundtrip.R | 62 +++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 dev/tasks/r/github.linux.extra.packages.yml create mode 100644 r/tests/testthat/test-extra-package-roundtrip.R diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml new file mode 100644 index 0000000000000..fa1e856d5b61d --- /dev/null +++ b/dev/tasks/r/github.linux.extra.packages.yml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + extra-packages: + name: "extra package roundtrip tests" + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + env: + ARROW_R_DEV: "FALSE" + RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/latest" + ARROW_R_FORCE_EXTRA_PACKAGE_TESTS: TRUE + steps: + {{ macros.github_checkout_arrow()|indent }} + + - uses: r-lib/actions/setup-r@v2 + - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + working-directory: 'arrow/r' + extra-packages: | + any::rcmdcheck + any::readr + any::data.table + - name: Build arrow package + run: | + R CMD build --no-build-vignettes arrow/r + R CMD INSTALL --install-tests --no-test-load --no-byte-compile arrow_*.tar.gz + - name: run tests + run: | + testthat::test_package("arrow", filter = "extra-package-roundtrip") + shell: Rscript {0} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 6e1f7609a980f..a9da7eb2889a0 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1309,6 +1309,10 @@ tasks: ci: github template: r/github.linux.rchk.yml + test-r-extra-packages: + ci: github + template: r/github.linux.extra.packages.yml + test-r-linux-as-cran: ci: github template: r/github.linux.cran.yml diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R new file mode 100644 index 0000000000000..f8d62b7d9ed83 --- /dev/null +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +skip_on_cran() + +# So that we can force these in CI +load_or_skip <- function(pkg) { + if (identical(tolower(Sys.getenv("ARROW_R_FORCE_EXTRA_PACKAGE_TESTS")), "true")) { + # because of this indirection on the package name we also avoid a CHECK note and + # we don't otherwise need to Suggest this + requireNamespace(pkg, quietly = TRUE) + } else { + skip_if(!requireNamespace(pkg, quietly = TRUE)) + } + attachNamespace(pkg) +} + +library(tibble) + +test_that("readr read csvs roundtrip", { + load_or_skip("readr") + + tbl <- example_data[, c("dbl", "lgl", "false", "chr")] + + tf <- tempfile() + on.exit(unlink(tf)) + write.csv(tbl, tf, row.names = FALSE) + + # we should still be able to turn this into a table + new_df <- read_csv(tf, show_col_types = FALSE) + expect_equal(tbl, as_tibble(arrow_table(new_df))) + + # we should still be able to turn this into a table + new_df <- read_csv(tf, show_col_types = FALSE, lazy = TRUE) + expect_equal(tbl, as_tibble(arrow_table(new_df))) +}) + +test_that("data.table objects roundtrip", { + # indirection to avoid CHECK note and we don't otherwise need to Suggest this + load_or_skip("data.table") + + DT <- as.data.table(example_data) + + # we should still be able to turn this into a table + expect_equal(example_data, as_tibble(arrow_table(DT))) + + # TODO: parquet write? +}) From 2f34dbacf0e37e5e80d5a5b7d2fbf7b43ec55693 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 10 Aug 2024 20:43:04 -0700 Subject: [PATCH 02/11] data.table to parquet --- .../testthat/test-extra-package-roundtrip.R | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index f8d62b7d9ed83..28f0cbe9e5802 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -42,11 +42,20 @@ test_that("readr read csvs roundtrip", { # we should still be able to turn this into a table new_df <- read_csv(tf, show_col_types = FALSE) - expect_equal(tbl, as_tibble(arrow_table(new_df))) + expect_equal(new_df, as_tibble(arrow_table(new_df))) # we should still be able to turn this into a table new_df <- read_csv(tf, show_col_types = FALSE, lazy = TRUE) - expect_equal(tbl, as_tibble(arrow_table(new_df))) + expect_equal(new_df, as_tibble(arrow_table(new_df))) + + # and can roundtrip to a parquet file + pq_tmp_file <- tempfile() + write_parquet(new_df, pq_tmp_file) + new_df_read <- read_parquet(pq_tmp_file) + + # we should still be able to turn this into a table + expect_equal(new_df, new_df_read) + }) test_that("data.table objects roundtrip", { @@ -55,8 +64,14 @@ test_that("data.table objects roundtrip", { DT <- as.data.table(example_data) + # write to parquet + pq_tmp_file <- tempfile() + write_parquet(DT, pq_tmp_file) + DT_read <- read_parquet(pq_tmp_file) + # we should still be able to turn this into a table - expect_equal(example_data, as_tibble(arrow_table(DT))) + expect_equal(DT, DT_read) - # TODO: parquet write? + # and the attributes are the same, aside from the internal selfref pointer + expect_mapequal(attributes(DT_read), attributes(DT)[names(attributes(DT)) != ".internal.selfref"]) }) From c35abd43d3e81ab988c0b713ab9ff3f3f501c6bb Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 10 Aug 2024 20:46:12 -0700 Subject: [PATCH 03/11] remove a comment --- r/tests/testthat/test-extra-package-roundtrip.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index 28f0cbe9e5802..ee5e10fb2b1ed 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -59,7 +59,6 @@ test_that("readr read csvs roundtrip", { }) test_that("data.table objects roundtrip", { - # indirection to avoid CHECK note and we don't otherwise need to Suggest this load_or_skip("data.table") DT <- as.data.table(example_data) From f28d4d8ffb70679eae30a0bf2f02f34b837a459a Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 10 Aug 2024 21:04:26 -0700 Subject: [PATCH 04/11] Also set index + keys --- r/tests/testthat/test-extra-package-roundtrip.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index ee5e10fb2b1ed..12dbfdb6583de 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -71,6 +71,21 @@ test_that("data.table objects roundtrip", { # we should still be able to turn this into a table expect_equal(DT, DT_read) + # attributes are the same, aside from the internal selfref pointer + expect_mapequal(attributes(DT_read), attributes(DT)[names(attributes(DT)) != ".internal.selfref"]) + + # and we can set keys + indices + setkey(DT, chr) + setindex(DT, dbl) + + # write to parquet + pq_tmp_file <- tempfile() + write_parquet(DT, pq_tmp_file) + DT_read <- read_parquet(pq_tmp_file) + + # we should still be able to turn this into a table + expect_equal(DT, DT_read) + # and the attributes are the same, aside from the internal selfref pointer expect_mapequal(attributes(DT_read), attributes(DT)[names(attributes(DT)) != ".internal.selfref"]) }) From 29a88b22cacb363bba5ad61aacd3690e0f5b3481 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 10 Aug 2024 21:13:25 -0700 Subject: [PATCH 05/11] Add an explanatory comment about how to add new packages --- r/tests/testthat/test-extra-package-roundtrip.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index 12dbfdb6583de..ce921f1086b3c 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -17,6 +17,10 @@ skip_on_cran() +# Any additional pacakge that we test here that is not already in DESCRIPTION should be +# added to dev/tasks/r/github.linux.extra.packages.yml in the r-lib/actions/setup-r-dependencies@v2 +# step so that they are installed + available in that CI job. + # So that we can force these in CI load_or_skip <- function(pkg) { if (identical(tolower(Sys.getenv("ARROW_R_FORCE_EXTRA_PACKAGE_TESTS")), "true")) { From 2fe6301e324ba3b6f03f0da36af6d5b40b63216e Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sun, 11 Aug 2024 13:36:04 -0700 Subject: [PATCH 06/11] Also create new columns in with data.table --- r/tests/testthat/test-extra-package-roundtrip.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index ce921f1086b3c..f3def7f9e2c6c 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -65,6 +65,9 @@ test_that("readr read csvs roundtrip", { test_that("data.table objects roundtrip", { load_or_skip("data.table") + # https://rdatatable.gitlab.io/data.table/articles/datatable-importing.html#testing-using-testthat + .datatable.aware=TRUE + DT <- as.data.table(example_data) # write to parquet @@ -78,9 +81,10 @@ test_that("data.table objects roundtrip", { # attributes are the same, aside from the internal selfref pointer expect_mapequal(attributes(DT_read), attributes(DT)[names(attributes(DT)) != ".internal.selfref"]) - # and we can set keys + indices + # and we can set keys + indices + create new columns setkey(DT, chr) setindex(DT, dbl) + DT[, dblshift := data.table::shift(dbl, 1)] # write to parquet pq_tmp_file <- tempfile() From e2952c0b731b789388b593338c1d2574698f8bd6 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 14 Aug 2024 09:06:09 -0700 Subject: [PATCH 07/11] refine tests --- .../testthat/test-extra-package-roundtrip.R | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index f3def7f9e2c6c..8f04a229747a0 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -17,7 +17,7 @@ skip_on_cran() -# Any additional pacakge that we test here that is not already in DESCRIPTION should be +# Any additional package that we test here that is not already in DESCRIPTION should be # added to dev/tasks/r/github.linux.extra.packages.yml in the r-lib/actions/setup-r-dependencies@v2 # step so that they are installed + available in that CI job. @@ -59,13 +59,12 @@ test_that("readr read csvs roundtrip", { # we should still be able to turn this into a table expect_equal(new_df, new_df_read) - }) test_that("data.table objects roundtrip", { load_or_skip("data.table") - # https://rdatatable.gitlab.io/data.table/articles/datatable-importing.html#testing-using-testthat + # https://github.com/Rdatatable/data.table/blob/83fd2c05ce2d8555ceb8ba417833956b1b574f7e/R/cedta.R#L25-L27 .datatable.aware=TRUE DT <- as.data.table(example_data) @@ -76,11 +75,9 @@ test_that("data.table objects roundtrip", { DT_read <- read_parquet(pq_tmp_file) # we should still be able to turn this into a table + # the .internal.selfref attribute is automatically ignored by testthat/waldo expect_equal(DT, DT_read) - # attributes are the same, aside from the internal selfref pointer - expect_mapequal(attributes(DT_read), attributes(DT)[names(attributes(DT)) != ".internal.selfref"]) - # and we can set keys + indices + create new columns setkey(DT, chr) setindex(DT, dbl) @@ -93,7 +90,19 @@ test_that("data.table objects roundtrip", { # we should still be able to turn this into a table expect_equal(DT, DT_read) +}) + +test_that("units roundtrip", { + load_or_skip("units") + + tbl <- example_data + units(tbl$dbl) <- "s" - # and the attributes are the same, aside from the internal selfref pointer - expect_mapequal(attributes(DT_read), attributes(DT)[names(attributes(DT)) != ".internal.selfref"]) + # and can roundtrip to a parquet file + pq_tmp_file <- tempfile() + write_parquet(tbl, pq_tmp_file) + tbl_read <- read_parquet(pq_tmp_file) + + # we should still be able to turn this into a table + expect_equal(tbl, tbl_read) }) From 62ced17f2676b9c3abcf19c2054db48c557e41a1 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 14 Aug 2024 09:06:52 -0700 Subject: [PATCH 08/11] Add units package --- dev/tasks/r/github.linux.extra.packages.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml index fa1e856d5b61d..991c65dd0eb80 100644 --- a/dev/tasks/r/github.linux.extra.packages.yml +++ b/dev/tasks/r/github.linux.extra.packages.yml @@ -38,9 +38,10 @@ jobs: with: working-directory: 'arrow/r' extra-packages: | + any::data.table any::rcmdcheck any::readr - any::data.table + any::unites - name: Build arrow package run: | R CMD build --no-build-vignettes arrow/r From 4403b59536b8d8c9ca506c42d9ebb7cd4f853d76 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 14 Aug 2024 22:12:13 -0700 Subject: [PATCH 09/11] units not unites --- dev/tasks/r/github.linux.extra.packages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml index 991c65dd0eb80..b8ba691178ddc 100644 --- a/dev/tasks/r/github.linux.extra.packages.yml +++ b/dev/tasks/r/github.linux.extra.packages.yml @@ -41,7 +41,7 @@ jobs: any::data.table any::rcmdcheck any::readr - any::unites + any::units - name: Build arrow package run: | R CMD build --no-build-vignettes arrow/r From 014e4be9115befed6aa729b8ebdb15a726e09895 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 16 Aug 2024 14:12:54 -0700 Subject: [PATCH 10/11] PR comments --- dev/tasks/r/github.linux.extra.packages.yml | 3 +-- .../testthat/test-extra-package-roundtrip.R | 23 ++++++++----------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml index b8ba691178ddc..6e87fba73baed 100644 --- a/dev/tasks/r/github.linux.extra.packages.yml +++ b/dev/tasks/r/github.linux.extra.packages.yml @@ -22,12 +22,11 @@ jobs: extra-packages: name: "extra package roundtrip tests" - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest strategy: fail-fast: false env: ARROW_R_DEV: "FALSE" - RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/latest" ARROW_R_FORCE_EXTRA_PACKAGE_TESTS: TRUE steps: {{ macros.github_checkout_arrow()|indent }} diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R index 8f04a229747a0..09a87ef19d561 100644 --- a/r/tests/testthat/test-extra-package-roundtrip.R +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -33,7 +33,7 @@ load_or_skip <- function(pkg) { attachNamespace(pkg) } -library(tibble) +library(dplyr) test_that("readr read csvs roundtrip", { load_or_skip("readr") @@ -69,10 +69,9 @@ test_that("data.table objects roundtrip", { DT <- as.data.table(example_data) - # write to parquet - pq_tmp_file <- tempfile() - write_parquet(DT, pq_tmp_file) - DT_read <- read_parquet(pq_tmp_file) + # Table -> collect which is what writing + reading to parquet uses under the hood to roundtrip + tab <- as_arrow_table(DT) + DT_read <- collect(tab) # we should still be able to turn this into a table # the .internal.selfref attribute is automatically ignored by testthat/waldo @@ -83,10 +82,9 @@ test_that("data.table objects roundtrip", { setindex(DT, dbl) DT[, dblshift := data.table::shift(dbl, 1)] - # write to parquet - pq_tmp_file <- tempfile() - write_parquet(DT, pq_tmp_file) - DT_read <- read_parquet(pq_tmp_file) + # Table -> collect + tab <- as_arrow_table(DT) + DT_read <- collect(tab) # we should still be able to turn this into a table expect_equal(DT, DT_read) @@ -98,10 +96,9 @@ test_that("units roundtrip", { tbl <- example_data units(tbl$dbl) <- "s" - # and can roundtrip to a parquet file - pq_tmp_file <- tempfile() - write_parquet(tbl, pq_tmp_file) - tbl_read <- read_parquet(pq_tmp_file) + # Table -> collect which is what writing + reading to parquet uses under the hood to roundtrip + tab <- as_arrow_table(tbl) + tbl_read <- collect(tab) # we should still be able to turn this into a table expect_equal(tbl, tbl_read) From ba3c1bfe298631cf0a6a426842ac869b7c53a193 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Fri, 16 Aug 2024 14:30:59 -0700 Subject: [PATCH 11/11] options --- dev/tasks/r/github.linux.extra.packages.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml index 6e87fba73baed..bb486c72a06a9 100644 --- a/dev/tasks/r/github.linux.extra.packages.yml +++ b/dev/tasks/r/github.linux.extra.packages.yml @@ -32,6 +32,8 @@ jobs: {{ macros.github_checkout_arrow()|indent }} - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r-dependencies@v2 with: