From f96c3c82bf0e98e30fdd590c83464c9603e914bd Mon Sep 17 00:00:00 2001 From: Derek Burk Date: Wed, 11 Jun 2025 15:16:40 -0500 Subject: [PATCH 1/2] Refactor column resizing code to avoid using non-API functions SETLENGTH and SET_TRUELENGTH --- src/column.cpp | 38 +++++++++---------------------- src/column_utils.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 28 deletions(-) create mode 100644 src/column_utils.h diff --git a/src/column.cpp b/src/column.cpp index 3f43a9e..4562d0f 100644 --- a/src/column.cpp +++ b/src/column.cpp @@ -4,6 +4,7 @@ #include "column.h" #include "string_utils.h" #include "iconv.h" +#include "column_utils.h" #include using namespace Rcpp; @@ -101,47 +102,28 @@ void ColumnInteger::setValue(int i, const char* x_start, const char* x_end) { } void ColumnCharacter::resize(int n) { - if (n == n_) - return; - - if (n > 0 && n < n_) { - SETLENGTH(values_, n); - SET_TRUELENGTH(values_, n); - } else { - values_ = Rf_lengthgets(values_, n); - } + if (n == n_) return; + values_ = resize_character_vector(values_, n_, n); n_ = n; - } -void ColumnDouble::resize(int n) { - if (n == n_) - return; - if (n > 0 && n < n_) { - SETLENGTH(values_, n); - SET_TRUELENGTH(values_, n); - } else { - values_ = Rf_lengthgets(values_, n); - } +void ColumnDouble::resize(int n) { + if (n == n_) return; + values_ = resize_numeric_vector(values_, n_, n); n_ = n; valuepointer = REAL(values_); } -void ColumnInteger::resize(int n) { - if (n == n_) - return; - if (n > 0 && n < n_) { - SETLENGTH(values_, n); - SET_TRUELENGTH(values_, n); - } else { - values_ = Rf_lengthgets(values_, n); - } +void ColumnInteger::resize(int n) { + if (n == n_) return; + values_ = resize_integer_vector(values_, n_, n); n_ = n; valuepointer = INTEGER(values_); } + std::vector createAllColumns(CharacterVector types, Rcpp::List var_opts, Iconv* pEncoder_) { int num_cols = static_cast(types.size()); std::vector out; diff --git a/src/column_utils.h b/src/column_utils.h new file mode 100644 index 0000000..f4b766e --- /dev/null +++ b/src/column_utils.h @@ -0,0 +1,56 @@ +#ifndef VECTOR_UTILS_H +#define VECTOR_UTILS_H + +#include +#include +#include // for memcpy + +inline SEXP resize_numeric_vector(SEXP original, int old_size, int new_size) { + SEXP new_vec = PROTECT(Rf_allocVector(REALSXP, new_size)); + double* new_data = REAL(new_vec); + + std::fill_n(new_data, new_size, NA_REAL); + + if (original != R_NilValue && old_size > 0) { + int copy_len = std::min(old_size, new_size); + memcpy(new_data, REAL(original), copy_len * sizeof(double)); + } + + UNPROTECT(1); + return new_vec; +} + +inline SEXP resize_integer_vector(SEXP original, int old_size, int new_size) { + SEXP new_vec = PROTECT(Rf_allocVector(INTSXP, new_size)); + int* new_data = INTEGER(new_vec); + + std::fill_n(new_data, new_size, NA_INTEGER); + + if (original != R_NilValue && old_size > 0) { + int copy_len = std::min(old_size, new_size); + memcpy(new_data, INTEGER(original), copy_len * sizeof(int)); + } + + UNPROTECT(1); + return new_vec; +} + +inline SEXP resize_character_vector(SEXP original, int old_size, int new_size) { + SEXP new_vec = PROTECT(Rf_allocVector(STRSXP, new_size)); + + for (int i = 0; i < new_size; ++i) { + SET_STRING_ELT(new_vec, i, NA_STRING); + } + + if (original != R_NilValue && old_size > 0) { + int copy_len = std::min(old_size, new_size); + for (int i = 0; i < copy_len; ++i) { + SET_STRING_ELT(new_vec, i, STRING_ELT(original, i)); + } + } + + UNPROTECT(1); + return new_vec; +} + +#endif // VECTOR_UTILS_H From db850e427624a8101150985080b03f09b491a8b3 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Thu, 12 Jun 2025 21:25:24 -0500 Subject: [PATCH 2/2] another take at resizing --- src/column.cpp | 8 +++---- src/column_utils.h | 56 ---------------------------------------------- 2 files changed, 4 insertions(+), 60 deletions(-) delete mode 100644 src/column_utils.h diff --git a/src/column.cpp b/src/column.cpp index 4562d0f..7492a34 100644 --- a/src/column.cpp +++ b/src/column.cpp @@ -4,7 +4,7 @@ #include "column.h" #include "string_utils.h" #include "iconv.h" -#include "column_utils.h" + #include using namespace Rcpp; @@ -103,14 +103,14 @@ void ColumnInteger::setValue(int i, const char* x_start, const char* x_end) { void ColumnCharacter::resize(int n) { if (n == n_) return; - values_ = resize_character_vector(values_, n_, n); + values_ = Rf_lengthgets(values_, n); n_ = n; } void ColumnDouble::resize(int n) { if (n == n_) return; - values_ = resize_numeric_vector(values_, n_, n); + values_ = Rf_lengthgets(values_, n); n_ = n; valuepointer = REAL(values_); } @@ -118,7 +118,7 @@ void ColumnDouble::resize(int n) { void ColumnInteger::resize(int n) { if (n == n_) return; - values_ = resize_integer_vector(values_, n_, n); + values_ = Rf_lengthgets(values_, n); n_ = n; valuepointer = INTEGER(values_); } diff --git a/src/column_utils.h b/src/column_utils.h deleted file mode 100644 index f4b766e..0000000 --- a/src/column_utils.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef VECTOR_UTILS_H -#define VECTOR_UTILS_H - -#include -#include -#include // for memcpy - -inline SEXP resize_numeric_vector(SEXP original, int old_size, int new_size) { - SEXP new_vec = PROTECT(Rf_allocVector(REALSXP, new_size)); - double* new_data = REAL(new_vec); - - std::fill_n(new_data, new_size, NA_REAL); - - if (original != R_NilValue && old_size > 0) { - int copy_len = std::min(old_size, new_size); - memcpy(new_data, REAL(original), copy_len * sizeof(double)); - } - - UNPROTECT(1); - return new_vec; -} - -inline SEXP resize_integer_vector(SEXP original, int old_size, int new_size) { - SEXP new_vec = PROTECT(Rf_allocVector(INTSXP, new_size)); - int* new_data = INTEGER(new_vec); - - std::fill_n(new_data, new_size, NA_INTEGER); - - if (original != R_NilValue && old_size > 0) { - int copy_len = std::min(old_size, new_size); - memcpy(new_data, INTEGER(original), copy_len * sizeof(int)); - } - - UNPROTECT(1); - return new_vec; -} - -inline SEXP resize_character_vector(SEXP original, int old_size, int new_size) { - SEXP new_vec = PROTECT(Rf_allocVector(STRSXP, new_size)); - - for (int i = 0; i < new_size; ++i) { - SET_STRING_ELT(new_vec, i, NA_STRING); - } - - if (original != R_NilValue && old_size > 0) { - int copy_len = std::min(old_size, new_size); - for (int i = 0; i < copy_len; ++i) { - SET_STRING_ELT(new_vec, i, STRING_ELT(original, i)); - } - } - - UNPROTECT(1); - return new_vec; -} - -#endif // VECTOR_UTILS_H