diff --git a/.gitignore b/.gitignore index bbdc098..b14c2c0 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,4 @@ dmypy.json # IDE stuff .vscode +.idea diff --git a/Cargo.lock b/Cargo.lock index 0115370..d28fa54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -504,7 +504,7 @@ dependencies = [ [[package]] name = "python-calamine" -version = "0.3.1" +version = "0.3.2" dependencies = [ "calamine", "chrono", diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1378af1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +-e . +pytest~=8.0 +pandas[excel]~=2.0 +numpy~=1.0 diff --git a/src/types/cell.rs b/src/types/cell.rs index df84d87..47fc947 100644 --- a/src/types/cell.rs +++ b/src/types/cell.rs @@ -1,6 +1,7 @@ use std::convert::From; use calamine::DataType; +use chrono::Datelike; use pyo3::prelude::*; #[derive(Debug, Clone)] @@ -28,8 +29,22 @@ impl<'py> IntoPyObject<'py> for CellValue { CellValue::String(v) => Ok(v.into_pyobject(py)?.into_any()), CellValue::Bool(v) => Ok(v.into_pyobject(py)?.to_owned().into_any()), CellValue::Time(v) => Ok(v.into_pyobject(py)?.into_any()), - CellValue::Date(v) => Ok(v.into_pyobject(py)?.into_any()), - CellValue::DateTime(v) => Ok(v.into_pyobject(py)?.into_any()), + CellValue::Date(v) => { + if v.year() > 9999 || v.year() <= 1000 { + let formatted = v.format("%Y-%m-%d").to_string(); + Ok(formatted.into_pyobject(py)?.into_any()) + } else { + Ok(v.into_pyobject(py)?.into_any()) + } + } + CellValue::DateTime(v) => { + if v.year() > 9999 || v.year() <= 1000 { + let formatted = v.format("%Y-%m-%dT%H:%M:%S%.f").to_string(); + Ok(formatted.into_pyobject(py)?.into_any()) + } else { + Ok(v.into_pyobject(py)?.into_any()) + } + } CellValue::Timedelta(v) => Ok(v.into_pyobject(py)?.into_any()), CellValue::Empty => Ok("".into_pyobject(py)?.into_any()), } diff --git a/tests/data/out_of_range_dates.ods b/tests/data/out_of_range_dates.ods new file mode 100644 index 0000000..cba1fc4 Binary files /dev/null and b/tests/data/out_of_range_dates.ods differ diff --git a/tests/data/out_of_range_dates.xlsx b/tests/data/out_of_range_dates.xlsx new file mode 100644 index 0000000..9f86143 Binary files /dev/null and b/tests/data/out_of_range_dates.xlsx differ diff --git a/tests/test_out_of_range_dates.py b/tests/test_out_of_range_dates.py new file mode 100644 index 0000000..b46a022 --- /dev/null +++ b/tests/test_out_of_range_dates.py @@ -0,0 +1,91 @@ +""" +Tests verifying that date/datetime cells whose year falls outside Python's +datetime range are returned as ISO 8601 strings rather than raising an error. + +Boundary: year <= 1000 or year > 9999 → str; otherwise → date/datetime. + +Fixtures: + out_of_range_dates.xlsx — two sheets with future date serials (year 10000) + out_of_range_dates.ods — two sheets with past ISO dates (year 500) +""" + +from pathlib import Path + +from python_calamine import CalamineWorkbook + +PATH = Path(__file__).parent / "data" +XLSX = PATH / "out_of_range_dates.xlsx" +ODS = PATH / "out_of_range_dates.ods" + + +# --------------------------------------------------------------------------- +# xlsx — future dates (serial 2958466+ maps to year 10000) +# --------------------------------------------------------------------------- + + +def test_xlsx_future_date_returns_string(): + sheet = CalamineWorkbook.from_object(XLSX).get_sheet_by_name("future_date") + rows = sheet.to_python() + cell = rows[0][0] + assert isinstance(cell, str), f"expected str for out-of-range date, got {type(cell)}: {cell!r}" + + +def test_xlsx_future_date_string_is_iso_format(): + sheet = CalamineWorkbook.from_object(XLSX).get_sheet_by_name("future_date") + cell = sheet.to_python()[0][0] + # chrono formats with %Y-%m-%d; year 10000 gives "10000-01-01" or similar + parts = cell.split("-") + assert len(parts) == 3, f"expected YYYY-MM-DD, got {cell!r}" + assert int(parts[0]) > 9999, f"expected year > 9999, got {cell!r}" + + +def test_xlsx_future_datetime_returns_string(): + sheet = CalamineWorkbook.from_object(XLSX).get_sheet_by_name("future_datetime") + rows = sheet.to_python() + cell = rows[0][0] + assert isinstance(cell, str), f"expected str for out-of-range datetime, got {type(cell)}: {cell!r}" + + +def test_xlsx_future_datetime_string_contains_time_component(): + sheet = CalamineWorkbook.from_object(XLSX).get_sheet_by_name("future_datetime") + cell = sheet.to_python()[0][0] + assert "T" in cell, f"expected ISO datetime with 'T' separator, got {cell!r}" + date_part = cell.split("T")[0] + year = int(date_part.split("-")[0]) + assert year > 9999, f"expected year > 9999 in {cell!r}" + + +# --------------------------------------------------------------------------- +# ODS — past dates (ISO value "0500-06-15", year 500) +# --------------------------------------------------------------------------- + + +def test_ods_past_date_returns_string(): + sheet = CalamineWorkbook.from_object(ODS).get_sheet_by_name("past_date") + rows = sheet.to_python() + cell = rows[0][0] + assert isinstance(cell, str), f"expected str for out-of-range date, got {type(cell)}: {cell!r}" + + +def test_ods_past_date_string_is_iso_format(): + sheet = CalamineWorkbook.from_object(ODS).get_sheet_by_name("past_date") + cell = sheet.to_python()[0][0] + parts = cell.split("-") + assert len(parts) == 3, f"expected YYYY-MM-DD, got {cell!r}" + assert int(parts[0]) <= 1000, f"expected year <= 1000, got {cell!r}" + + +def test_ods_past_datetime_returns_string(): + sheet = CalamineWorkbook.from_object(ODS).get_sheet_by_name("past_datetime") + rows = sheet.to_python() + cell = rows[0][0] + assert isinstance(cell, str), f"expected str for out-of-range datetime, got {type(cell)}: {cell!r}" + + +def test_ods_past_datetime_string_contains_time_component(): + sheet = CalamineWorkbook.from_object(ODS).get_sheet_by_name("past_datetime") + cell = sheet.to_python()[0][0] + assert "T" in cell, f"expected ISO datetime with 'T' separator, got {cell!r}" + date_part = cell.split("T")[0] + year = int(date_part.split("-")[0]) + assert year <= 1000, f"expected year <= 1000 in {cell!r}"