From 39cc1c4bfdda6a44d94f1b9f55db1ac888b28e28 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Thu, 15 Jan 2026 15:25:58 +0200 Subject: [PATCH 1/3] GH-48866: [C++][Gandiva] Truncate subseconds beyond milliseconds in `castTIMESTAMP_utf8` and `castTIME_utf8` --- cpp/src/gandiva/precompiled/time.cc | 19 +++++---- cpp/src/gandiva/precompiled/time_test.cc | 54 +++++++++++++++++------- 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 8bbd0930991..388c0488ccd 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -747,11 +747,12 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l // adjust the milliseconds if (sub_seconds_len > 0) { - if (sub_seconds_len > 3) { - const char* msg = "Invalid millis for timestamp value "; - set_error_for_date(length, input, msg, context); - return 0; + // Truncate to 3 digits (milliseconds precision) if more digits are provided + while (sub_seconds_len > 3) { + ts_fields[TimeFields::kSubSeconds] /= 10; + sub_seconds_len--; } + // Pad with zeros if less than 3 digits while (sub_seconds_len < 3) { ts_fields[TimeFields::kSubSeconds] *= 10; sub_seconds_len++; @@ -867,12 +868,12 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { // adjust the milliseconds if (sub_seconds_len > 0) { - if (sub_seconds_len > 3) { - const char* msg = "Invalid millis for time value "; - set_error_for_date(length, input, msg, context); - return 0; + // Truncate to 3 digits (milliseconds precision) if more digits are provided + while (sub_seconds_len > 3) { + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] /= 10; + sub_seconds_len--; } - + // Pad with zeros if less than 3 digits while (sub_seconds_len < 3) { time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10; sub_seconds_len++; diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 0d3b348754a..82b38d1b577 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -122,15 +122,26 @@ TEST(TestTime, TestCastTimestamp) { "Not a valid time for timestamp value 2000-01-01 00:00:100"); context.Reset(); - EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), 0); - EXPECT_EQ(context.get_error(), - "Invalid millis for timestamp value 2000-01-01 00:00:00.0001"); - context.Reset(); - - EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), 0); - EXPECT_EQ(context.get_error(), - "Invalid millis for timestamp value 2000-01-01 00:00:00.1000"); - context.Reset(); + // Test truncation of subseconds to 3 digits (milliseconds) + // "2000-01-01 00:00:00.0001" should truncate to "2000-01-01 00:00:00.000" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.000", 23)); + + // "2000-01-01 00:00:00.1000" should truncate to "2000-01-01 00:00:00.100" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.100", 23)); + + // "2000-01-01 00:00:00.123456789" should truncate to "2000-01-01 00:00:00.123" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123456789", 29), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123", 23)); + + // "2000-01-01 00:00:00.1999" should truncate to "2000-01-01 00:00:00.199" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1999", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23)); + + // "2000-01-01 00:00:00.1994" should truncate to "2000-01-01 00:00:00.199" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1994", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23)); } TEST(TestTime, TestCastTimeUtf8) { @@ -166,13 +177,26 @@ TEST(TestTime, TestCastTimeUtf8) { EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100"); context.Reset(); - EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0); - EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.0001"); - context.Reset(); + // Test truncation of subseconds to 3 digits (milliseconds) + // "00:00:00.0001" should truncate to "00:00:00.000" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), + castTIME_utf8(context_ptr, "00:00:00.000", 12)); - EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0); - EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.1000"); - context.Reset(); + // "00:00:00.1000" should truncate to "00:00:00.100" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), + castTIME_utf8(context_ptr, "00:00:00.100", 12)); + + // "9:45:30.123456789" should truncate to "9:45:30.123" + EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.123456789", 17), + castTIME_utf8(context_ptr, "9:45:30.123", 11)); + + // "00:00:00.1999" should truncate to "00:00:00.199" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1999", 13), + castTIME_utf8(context_ptr, "00:00:00.199", 12)); + + // "00:00:00.1994" should truncate to "00:00:00.199" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1994", 13), + castTIME_utf8(context_ptr, "00:00:00.199", 12)); } #ifndef _WIN32 From ff4e773dfcf315e1608fa0817cef631b8f12cc15 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Thu, 22 Jan 2026 19:02:48 +0200 Subject: [PATCH 2/3] GH-48866: Address comment to factor out common code --- cpp/src/gandiva/precompiled/time.cc | 49 +++++++++++++++-------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 388c0488ccd..1a0ac0aae6e 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -566,6 +566,27 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) { seconds < 60; } +// Normalize sub-seconds value to milliseconds precision (3 digits). +// Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits +FORCE_INLINE +int32_t normalize_subseconds_to_millis(int32_t subseconds, int32_t num_digits) { + if (num_digits <= 0 || num_digits == 3) { + // No need to adjust + return subseconds; + } + // Calculate the power of 10 adjustment needed + int32_t digit_diff = num_digits - 3; + while (digit_diff > 0) { + subseconds /= 10; + digit_diff--; + } + while (digit_diff < 0) { + subseconds *= 10; + digit_diff++; + } + return subseconds; +} + // MONTHS_BETWEEN returns number of months between dates date1 and date2. // If date1 is later than date2, then the result is positive. // If date1 is earlier than date2, then the result is negative. @@ -746,18 +767,8 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l } // adjust the milliseconds - if (sub_seconds_len > 0) { - // Truncate to 3 digits (milliseconds precision) if more digits are provided - while (sub_seconds_len > 3) { - ts_fields[TimeFields::kSubSeconds] /= 10; - sub_seconds_len--; - } - // Pad with zeros if less than 3 digits - while (sub_seconds_len < 3) { - ts_fields[TimeFields::kSubSeconds] *= 10; - sub_seconds_len++; - } - } + ts_fields[TimeFields::kSubSeconds] = + normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); // handle timezone if (encountered_zone) { int err = 0; @@ -867,18 +878,8 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { } // adjust the milliseconds - if (sub_seconds_len > 0) { - // Truncate to 3 digits (milliseconds precision) if more digits are provided - while (sub_seconds_len > 3) { - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] /= 10; - sub_seconds_len--; - } - // Pad with zeros if less than 3 digits - while (sub_seconds_len < 3) { - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10; - sub_seconds_len++; - } - } + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = normalize_subseconds_to_millis( + time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours]; int32_t input_minutes = time_fields[TimeFields::kMinutes - TimeFields::kHours]; From b818113a5e688130a139e14f573e5fc4169ca906 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Fri, 23 Jan 2026 18:07:41 +0200 Subject: [PATCH 3/3] GH-48866: Fix code formatting --- cpp/src/gandiva/precompiled/time.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 1a0ac0aae6e..1fe951a69a1 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -768,7 +768,7 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l // adjust the milliseconds ts_fields[TimeFields::kSubSeconds] = - normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); + normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); // handle timezone if (encountered_zone) { int err = 0; @@ -878,8 +878,9 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { } // adjust the milliseconds - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = normalize_subseconds_to_millis( - time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = + normalize_subseconds_to_millis( + time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours]; int32_t input_minutes = time_fields[TimeFields::kMinutes - TimeFields::kHours];