Skip to content

Commit 849a584

Browse files
jbrockmendelclaude
andauthored
BUG: raise OverflowError on datetime64 unit conversion overflow (numpy#31085)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6a85991 commit 849a584

5 files changed

Lines changed: 157 additions & 25 deletions

File tree

numpy/_core/src/multiarray/_datetime.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,4 +335,45 @@ datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt);
335335
NPY_NO_EXPORT npy_hash_t
336336
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td);
337337

338+
/*
339+
* Scale a datetime or timedelta value by num/denom, checking for overflow.
340+
*
341+
* Positive values compute *dt * num / denom.
342+
* Negative values compute (*dt * num - (denom - 1)) / denom to round
343+
* toward negative infinity.
344+
*
345+
* NPY_DATETIME_NAT is NPY_MIN_INT64 (i.e. -NPY_MAX_INT64 - 1).
346+
* The asymmetric neg_limit formula ensures that a valid *dt * num never
347+
* produces NPY_MIN_INT64, which would be misinterpreted as NaT.
348+
*
349+
* NaT values pass through unchanged.
350+
*
351+
* Returns 0 on success, -1 on overflow (with PyExc_OverflowError set).
352+
*/
353+
static inline int
354+
_datetime_scale_with_overflow_check(
355+
npy_int64 *dt, npy_int64 num, npy_int64 denom,
356+
const char *type_name)
357+
{
358+
if (*dt == NPY_DATETIME_NAT) {
359+
return 0;
360+
}
361+
npy_int64 pos_limit = NPY_MAX_INT64 / num;
362+
npy_int64 neg_limit = (NPY_MAX_INT64 - denom + 1) / num;
363+
364+
if (*dt > pos_limit || *dt < -neg_limit) {
365+
PyErr_Format(PyExc_OverflowError,
366+
"Overflow when converting between "
367+
"%s units", type_name);
368+
return -1;
369+
}
370+
if (*dt < 0) {
371+
*dt = (*dt * num - (denom - 1)) / denom;
372+
}
373+
else {
374+
*dt = *dt * num / denom;
375+
}
376+
return 0;
377+
}
378+
338379
#endif /* NUMPY_CORE_SRC_MULTIARRAY__DATETIME_H_ */

numpy/_core/src/multiarray/datetime.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,16 @@ NpyDatetime_ConvertDatetime64ToDatetimeStruct(
444444
return -1;
445445
}
446446

447-
/* TODO: Change to a mechanism that avoids the potential overflow */
448-
dt *= meta->num;
447+
/* Check for overflow and apply meta->num scaling */
448+
if (meta->num > 1) {
449+
if (_datetime_scale_with_overflow_check(
450+
&dt, (npy_int64)meta->num, 1, "datetime64") < 0) {
451+
return -1;
452+
}
453+
}
454+
else {
455+
dt *= meta->num;
456+
}
449457

450458
/*
451459
* Note that care must be taken with the / and % operators
@@ -3178,13 +3186,11 @@ cast_timedelta_to_timedelta(PyArray_DatetimeMetaData *src_meta,
31783186
return -1;
31793187
}
31803188

3181-
/* Apply the scaling */
3182-
if (src_dt < 0) {
3183-
*dst_dt = (src_dt * num - (denom - 1)) / denom;
3184-
}
3185-
else {
3186-
*dst_dt = src_dt * num / denom;
3189+
/* Apply the scaling, checking for overflow */
3190+
if (_datetime_scale_with_overflow_check(&src_dt, num, denom, "timedelta64") < 0) {
3191+
return -1;
31873192
}
3193+
*dst_dt = src_dt;
31883194

31893195
return 0;
31903196
}

numpy/_core/src/multiarray/dtype_transfer.c

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -821,14 +821,8 @@ _strided_to_strided_datetime_cast(
821821
while (N > 0) {
822822
memcpy(&dt, src, sizeof(dt));
823823

824-
if (dt != NPY_DATETIME_NAT) {
825-
/* Apply the scaling */
826-
if (dt < 0) {
827-
dt = (dt * num - (denom - 1)) / denom;
828-
}
829-
else {
830-
dt = dt * num / denom;
831-
}
824+
if (_datetime_scale_with_overflow_check(&dt, num, denom, "datetime64") < 0) {
825+
return -1;
832826
}
833827

834828
memcpy(dst, &dt, sizeof(dt));
@@ -857,14 +851,8 @@ _aligned_strided_to_strided_datetime_cast(
857851
while (N > 0) {
858852
dt = *(npy_int64 *)src;
859853

860-
if (dt != NPY_DATETIME_NAT) {
861-
/* Apply the scaling */
862-
if (dt < 0) {
863-
dt = (dt * num - (denom - 1)) / denom;
864-
}
865-
else {
866-
dt = dt * num / denom;
867-
}
854+
if (_datetime_scale_with_overflow_check(&dt, num, denom, "datetime64") < 0) {
855+
return -1;
868856
}
869857

870858
*(npy_int64 *)dst = dt;

numpy/_core/tests/test_casting_unittests.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,12 @@ def test_time_to_time(self, from_dt, to_dt,
481481
arr, out = self.get_data_variation(
482482
orig_arr, orig_out, aligned, contig)
483483
out[...] = 0
484-
cast._simple_strided_call((arr, out))
484+
try:
485+
cast._simple_strided_call((arr, out))
486+
except OverflowError:
487+
# Extreme values (e.g. INT64_MAX) can overflow when
488+
# scaled by the unit conversion factor. gh-16352
489+
break
485490
assert_array_equal(out.view("int64"), expected_out.view("int64"))
486491

487492
def string_with_modified_length(self, dtype, change_length):

numpy/_core/tests/test_datetime.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,98 @@ def cast2():
993993
numpy.datetime64("2014").astype("<M8[fs]")
994994
assert_raises(OverflowError, cast2)
995995

996+
def test_cast_overflow_safe_unit_conversion(self):
997+
# Overflow when converting datetime64 between linear units
998+
# (the fast-path cast), e.g. seconds -> nanoseconds.
999+
# INT64_MAX / 1e9 ≈ 9.2e9 seconds ≈ 292 years from epoch,
1000+
# so dates beyond ~2262 overflow when cast to ns.
1001+
1002+
# gh-16352: upconversion to finer units overflows
1003+
arr = np.array(["2367-12-31 12:00:00"], dtype="datetime64[h]")
1004+
with pytest.raises(OverflowError, match="Overflow"):
1005+
arr.astype("datetime64[ns]")
1006+
1007+
# gh-16352: scalar case
1008+
val = np.datetime64("3000-01-01", "s")
1009+
with pytest.raises(OverflowError, match="Overflow"):
1010+
val.astype("datetime64[ns]")
1011+
1012+
# gh-22346: downconversion to coarser units overflows near INT64_MIN
1013+
dt = np.datetime64(np.iinfo(np.int64).min + 1, "s")
1014+
with pytest.raises(OverflowError, match="Overflow"):
1015+
dt.astype("M8[m]")
1016+
1017+
# negative overflow (far in the past)
1018+
val_neg = np.datetime64("0001-01-01", "s")
1019+
with pytest.raises(OverflowError, match="Overflow"):
1020+
val_neg.astype("datetime64[ns]")
1021+
1022+
# timedelta overflow (strided cast path in dtype_transfer.c)
1023+
td = np.timedelta64(2**62, "s")
1024+
with pytest.raises(OverflowError, match="Overflow"):
1025+
td.astype("timedelta64[ns]")
1026+
1027+
# timedelta overflow (scalar cast path in datetime.c via
1028+
# cast_timedelta_to_timedelta)
1029+
td_big = np.timedelta64(2**62, "s")
1030+
with pytest.raises(OverflowError, match="Overflow"):
1031+
np.array(td_big, dtype="timedelta64[ns]")
1032+
1033+
# timedelta exact boundary: INT64_MAX // 1e9 = 9223372036
1034+
td_ok = np.timedelta64(9223372036, "s")
1035+
result_td = td_ok.astype("timedelta64[ns]")
1036+
assert result_td == np.timedelta64(9223372036000000000, "ns")
1037+
1038+
td_bad = np.timedelta64(9223372037, "s")
1039+
with pytest.raises(OverflowError, match="Overflow"):
1040+
td_bad.astype("timedelta64[ns]")
1041+
1042+
# negative timedelta overflow
1043+
td_neg = np.timedelta64(-9223372037, "s")
1044+
with pytest.raises(OverflowError, match="Overflow"):
1045+
td_neg.astype("timedelta64[ns]")
1046+
1047+
# timedelta NaT passthrough
1048+
td_nat = np.timedelta64("NaT", "s")
1049+
result_td_nat = td_nat.astype("timedelta64[ns]")
1050+
assert np.isnat(result_td_nat)
1051+
1052+
# valid conversions near the boundary should still work
1053+
val_ok = np.datetime64("2020-01-01", "s")
1054+
result = val_ok.astype("datetime64[ns]")
1055+
assert result == np.datetime64("2020-01-01", "ns")
1056+
1057+
arr_ok = np.array(["2000-01-01", "2020-06-15"], dtype="datetime64[s]")
1058+
result_arr = arr_ok.astype("datetime64[ns]")
1059+
expected = np.array(["2000-01-01", "2020-06-15"], dtype="datetime64[ns]")
1060+
assert_equal(result_arr, expected)
1061+
1062+
# NaT should pass through without raising
1063+
arr_nat = np.array(["NaT", "2020-01-01"], dtype="datetime64[s]")
1064+
result_nat = arr_nat.astype("datetime64[ns]")
1065+
assert np.isnat(result_nat[0])
1066+
assert result_nat[1] == np.datetime64("2020-01-01", "ns")
1067+
1068+
# Exact boundary: INT64_MAX // 1e9 = 9223372036 seconds is OK,
1069+
# 9223372037 seconds overflows when cast to ns.
1070+
ok_boundary = np.datetime64(9223372036, "s")
1071+
result_boundary = ok_boundary.astype("datetime64[ns]")
1072+
assert result_boundary == np.datetime64(9223372036, "s")
1073+
1074+
bad_boundary = np.datetime64(9223372037, "s")
1075+
with pytest.raises(OverflowError, match="Overflow"):
1076+
bad_boundary.astype("datetime64[ns]")
1077+
1078+
# Exercise the num != 1 code path (e.g. "2s" metadata)
1079+
arr_2s = np.array([3], dtype="datetime64[2s]")
1080+
result_2s = arr_2s.astype("datetime64[s]")
1081+
assert result_2s[0] == np.datetime64(6, "s")
1082+
1083+
# Overflow with num != 1
1084+
arr_2s_big = np.array([np.iinfo(np.int64).max // 2], dtype="datetime64[2s]")
1085+
with pytest.raises(OverflowError, match="Overflow"):
1086+
arr_2s_big.astype("datetime64[ns]")
1087+
9961088
def test_pyobject_roundtrip(self):
9971089
# All datetime types should be able to roundtrip through object
9981090
a = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0,

0 commit comments

Comments
 (0)