Skip to content

Commit

Permalink
Merge pull request #3563 from krlmlr/h-ubsan
Browse files Browse the repository at this point in the history
Fix UBSAN and Valgrind errors
  • Loading branch information
krlmlr authored May 9, 2018
2 parents fbbf157 + 03862ec commit e47f9c9
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 15 deletions.
5 changes: 3 additions & 2 deletions R/rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
#' @export
#' @examples
#' df <- expand.grid(x = 1:3, y = 3:1)
#' df %>% rowwise() %>% do(i = seq(.$x, .$y))
#' .Last.value %>% summarise(n = length(i))
#' df_done <- df %>% rowwise() %>% do(i = seq(.$x, .$y))
#' df_done
#' df_done %>% summarise(n = length(i))
rowwise <- function(data) {
stopifnot(is.data.frame(data))

Expand Down
4 changes: 2 additions & 2 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
## Release summary

Bug fixes, minor changes, now importing the tidyselect package.
Same-version update to fix UBSAN and valgrind errors, first release attempt on April 19. Changes compared to the last release attempt: https://github.com/tidyverse/dplyr/pull/3563.

Double-checked with UBSAN, unable to replicate errors seen on CRAN.
Bug fixes, minor changes, now importing the tidyselect package.

## Test environments

Expand Down
37 changes: 28 additions & 9 deletions inst/include/dplyr/Result/DelayedProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,29 @@ class DelayedProcessor : public IDelayedProcessor {
DelayedProcessor(const RObject& first_result, int ngroups_, const SymbolString& name_) :
res(no_init(ngroups_)), pos(0), seen_na_only(true), name(name_)
{
if (!try_handle(first_result))
LOG_VERBOSE;

if (!try_handle(first_result)) {
stop("cannot handle result of type %i for column '%s'", first_result.sexp_type(), name.get_utf8_cstring());
}
copy_most_attributes(res, first_result);
}

DelayedProcessor(int pos_, const RObject& chunk, SEXP res_, const SymbolString& name_) :
res(as<Vec>(res_)), pos(pos_), seen_na_only(false), name(name_)
pos(pos_), seen_na_only(false), name(name_)
{
LOG_VERBOSE;

copy_most_attributes(res, chunk);

// We need to copy carefully here to avoid accessing uninitialized
// parts of res_, which triggers valgrind failures and is inefficient
R_xlen_t orig_length = Rf_xlength(res_);
SEXP short_res_ = Rf_xlengthgets(res_, pos);
res = Rf_xlengthgets(as<Vec>(short_res_), orig_length);

// try_handle() changes pos as a side effect, needs to be done after copying
// (we don't care about the unnecessary copy in the failure case)
if (!try_handle(chunk)) {
stop("cannot handle result of type %i in promotion for column '%s'",
chunk.sexp_type(), name.get_utf8_cstring()
Expand All @@ -92,22 +106,27 @@ class DelayedProcessor : public IDelayedProcessor {
}

virtual bool try_handle(const RObject& chunk) {
LOG_VERBOSE;

check_supported_type(chunk, name);
check_length(Rf_length(chunk), 1, "a summary value", name);

int rtype = TYPEOF(chunk);
if (valid_conversion<RTYPE>(rtype)) {
// copy, and memoize the copied value
const typename Vec::stored_type& converted_chunk = (res[pos++] = as<STORAGE>(chunk));
if (!Vec::is_na(converted_chunk))
seen_na_only = false;
return true;
} else {
if (!valid_conversion<RTYPE>(rtype)) {
return false;
}

// copy, and memoize the copied value
const typename Vec::stored_type& converted_chunk = (res[pos++] = as<STORAGE>(chunk));
if (!Vec::is_na(converted_chunk))
seen_na_only = false;

return true;
}

virtual IDelayedProcessor* promote(const RObject& chunk) {
LOG_VERBOSE;

if (!can_promote(chunk)) {
LOG_VERBOSE << "can't promote";
return 0;
Expand Down
4 changes: 4 additions & 0 deletions inst/include/tools/pointer_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class pointer_vector {
inline ~pointer_vector() {
typedef typename Vector::size_type size_type;
size_type n = data.size();

// shortcut to avoid decreasing iterator past begin()
if (n == 0) return;

iterator it = data.end();
--it;
for (size_type i = 0; i < n; --it, i++) delete *it;
Expand Down
5 changes: 3 additions & 2 deletions man/rowwise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ SEXP flatten_bindable(SEXP x) {

List rbind__impl(List dots, const SymbolString& id) {
int ndata = dots.size();

LOG_VERBOSE << "binding at most " << ndata << " chunks";

R_xlen_t n = 0;
std::vector<SEXP> chunks;
std::vector<R_xlen_t> df_nrows;
Expand All @@ -200,6 +203,9 @@ List rbind__impl(List dots, const SymbolString& id) {
ndata = chunks.size();
pointer_vector<Collecter> columns;


LOG_VERBOSE << "binding " << ndata << " chunks";

SymbolVector names;

k = 0;
Expand Down Expand Up @@ -274,6 +280,9 @@ List rbind__impl(List dots, const SymbolString& id) {
}

int nc = columns.size();

LOG_VERBOSE << "result has " << nc << " columns";

int has_id = id.is_empty() ? 0 : 1;

List out(no_init(nc + has_id));
Expand All @@ -298,6 +307,8 @@ List rbind__impl(List dots, const SymbolString& id) {
out.attr("names") = out_names;
set_rownames(out, n);

LOG_VERBOSE << "result has " << n << " rows";

// infer the classes and extra info (groups, etc ) from the first (#1692)
if (ndata) {
SEXP first = chunks[0];
Expand All @@ -319,6 +330,8 @@ List rbind__impl(List dots, const SymbolString& id) {

// [[Rcpp::export]]
List bind_rows_(List dots, SEXP id) {
LOG_VERBOSE;

if (Rf_isNull(id))
return rbind__impl(dots, SymbolString());
else
Expand Down

0 comments on commit e47f9c9

Please sign in to comment.