Skip to content

Commit 3dd2b33

Browse files
authored
Merge pull request #114 from dh-tech/feature/intersection
Interval logic improvements: type/range validation, intersection method, contains/in functionality
2 parents ca9fb7b + cfdef42 commit 3dd2b33

7 files changed

Lines changed: 222 additions & 49 deletions

File tree

.github/workflows/unit_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
runs-on: ubuntu-latest
2121
strategy:
2222
matrix:
23-
python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
23+
python: ["3.10", "3.11", "3.12", "3.13"]
2424
defaults:
2525
run:
2626
working-directory: .

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ name = "undate"
77
description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals"
88
readme = "README.md"
99
license = { text = "Apache-2" }
10-
requires-python = ">= 3.9"
10+
requires-python = ">= 3.10"
1111
dynamic = ["version"]
1212
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"]
1313
authors = [
@@ -31,7 +31,6 @@ keywords = [
3131
classifiers = [
3232
"Development Status :: 2 - Pre-Alpha",
3333
"Programming Language :: Python :: 3",
34-
"Programming Language :: Python :: 3.9",
3534
"Programming Language :: Python :: 3.10",
3635
"Programming Language :: Python :: 3.11",
3736
"Programming Language :: Python :: 3.12",

src/undate/interval.py

Lines changed: 75 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import datetime
2-
31
# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
42
from typing import Optional, Union
53

@@ -25,31 +23,30 @@ class UndateInterval:
2523
latest: Union[Undate, None]
2624
label: Union[str, None]
2725

28-
# TODO: let's think about adding an optional precision / length /size field
29-
# using DatePrecision
26+
# TODO: think about adding an optional precision / length /size field
27+
# using DatePrecision for intervals of any standard duration (decade, century)
3028

3129
def __init__(
3230
self,
3331
earliest: Optional[Undate] = None,
3432
latest: Optional[Undate] = None,
3533
label: Optional[str] = None,
3634
):
37-
# for now, assume takes two undate objects;
38-
# support conversion from datetime
39-
if earliest and not isinstance(earliest, Undate):
40-
# NOTE: some overlap with Undate._comparison_type method
41-
# maybe support conversion from other formats later
42-
if isinstance(earliest, datetime.date):
43-
earliest = Undate.from_datetime_date(earliest)
44-
else:
35+
# takes two undate objects; allows conversion from supported types
36+
if earliest:
37+
try:
38+
earliest = Undate.to_undate(earliest)
39+
except TypeError as err:
4540
raise ValueError(
4641
f"earliest date {earliest} cannot be converted to Undate"
47-
)
48-
if latest and not isinstance(latest, Undate):
49-
if isinstance(latest, datetime.date):
50-
latest = Undate.from_datetime_date(latest)
51-
else:
52-
raise ValueError(f"latest date {latest} cannot be converted to Undate")
42+
) from err
43+
if latest:
44+
try:
45+
latest = Undate.to_undate(latest)
46+
except TypeError as err:
47+
raise ValueError(
48+
f"latest date {latest} cannot be converted to Undate"
49+
) from err
5350

5451
# check that the interval is valid
5552
if latest and earliest and latest <= earliest:
@@ -78,6 +75,9 @@ def __repr__(self) -> str:
7875
return "<UndateInterval %s>" % self
7976

8077
def __eq__(self, other) -> bool:
78+
# currently doesn't support comparison with any other types
79+
if not isinstance(other, UndateInterval):
80+
return NotImplemented
8181
# consider interval equal if both dates are equal
8282
return self.earliest == other.earliest and self.latest == other.latest
8383

@@ -122,3 +122,60 @@ def duration(self) -> Timedelta:
122122
# is there any meaningful way to calculate duration
123123
# if one year is known and the other is not?
124124
raise NotImplementedError
125+
126+
def __contains__(self, other: object) -> bool:
127+
"""Determine if another interval or date falls within this
128+
interval. Supports comparison with :class:`UndateInterval`
129+
or anything that can be converted with :meth:`Undate.to_undate`."""
130+
# support comparison with another interval or anything
131+
# that can be converted to an Undate
132+
if isinstance(other, UndateInterval):
133+
# compare based on earliest/latest bounds
134+
other_earliest = other.earliest
135+
other_latest = other.latest
136+
else:
137+
# otherwise, try to convert to an Undate
138+
try:
139+
other = Undate.to_undate(other)
140+
other_latest = other_earliest = other
141+
except TypeError:
142+
# if conversion fails, then we don't support comparison
143+
raise
144+
145+
# if either bound of the current interval is None,
146+
# then it is an open interval and we don't need to check the other value.
147+
# if the other value is set, then check that it falls within the
148+
# bounds of this interval
149+
return (
150+
self.earliest is None
151+
or other_earliest is not None
152+
and other_earliest >= self.earliest
153+
) and (
154+
self.latest is None
155+
or other_latest is not None
156+
and other_latest <= self.latest
157+
)
158+
159+
def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]:
160+
"""Determine the intersection or overlap between two :class:`UndateInterval`
161+
objects and return a new interval. Returns None if there is no overlap.
162+
"""
163+
try:
164+
# when both values are defined, return the inner bounds;
165+
# if not, return whichever is not None, or None
166+
earliest = (
167+
max(self.earliest, other.earliest)
168+
if self.earliest and other.earliest
169+
else self.earliest or other.earliest
170+
)
171+
latest = (
172+
min(self.latest, other.latest)
173+
if self.latest and other.latest
174+
else self.latest or other.latest
175+
)
176+
177+
# if this results in an invalid interval, initialization
178+
# will throw an exception
179+
return UndateInterval(earliest, latest)
180+
except ValueError:
181+
return None

src/undate/undate.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
import datetime
44
from enum import auto
5+
56
import re
67
from typing import TYPE_CHECKING
78

89
if TYPE_CHECKING:
910
from undate.interval import UndateInterval
11+
1012
try:
1113
# StrEnum was only added in python 3.11
1214
from enum import StrEnum
@@ -72,6 +74,10 @@ def __init__(
7274
label: Optional[str] = None,
7375
calendar: Optional[Union[str, Calendar]] = None,
7476
):
77+
# everything is optional but something is required
78+
if all([val is None for val in [year, month, day]]):
79+
raise ValueError("At least one of year, month, or day must be specified")
80+
7581
# keep track of initial values and which values are known
7682
# TODO: add validation: if str, must be expected length
7783
self.initial_values: Dict[str, Optional[Union[int, str]]] = {
@@ -242,23 +248,19 @@ def format(self, format) -> str:
242248

243249
raise ValueError(f"Unsupported format '{format}'")
244250

245-
def _comparison_type(self, other: object) -> "Undate":
251+
@classmethod
252+
def _comparison_type(cls, other: object) -> "Undate":
246253
"""Common logic for type handling in comparison methods.
247254
Converts to Undate object if possible, otherwise raises
248-
NotImplemented error. Currently only supports conversion
249-
from :class:`datetime.date`
255+
NotImplementedError exception. Uses :meth:`to_undate` for conversion.
250256
"""
251-
252-
# support datetime.date by converting to undate
253-
if isinstance(other, datetime.date):
254-
other = Undate.from_datetime_date(other)
255-
256-
# recommended to support comparison with arbitrary objects
257-
if not isinstance(other, Undate):
257+
# convert if possible; return NotImplemented if not
258+
try:
259+
return cls.to_undate(other)
260+
except TypeError:
261+
# recommended to support comparison with arbitrary objects
258262
return NotImplemented
259263

260-
return other
261-
262264
def __eq__(self, other: object) -> bool:
263265
# Note: assumes label differences don't matter for comparing dates
264266

@@ -268,6 +270,8 @@ def __eq__(self, other: object) -> bool:
268270

269271
other = self._comparison_type(other)
270272
if other is NotImplemented:
273+
# return NotImplemented to indicate comparison is not supported
274+
# with this type
271275
return NotImplemented
272276

273277
# if both dates are fully known, then earliest/latest check
@@ -359,10 +363,23 @@ def __contains__(self, other: object) -> bool:
359363
]
360364
)
361365

362-
@staticmethod
363-
def from_datetime_date(dt_date: datetime.date):
364-
"""Initialize an :class:`Undate` object from a :class:`datetime.date`"""
365-
return Undate(dt_date.year, dt_date.month, dt_date.day)
366+
@classmethod
367+
def to_undate(cls, other: object) -> "Undate":
368+
"""Converted arbitrary object to Undate, if possible. Raises TypeError
369+
if conversion is not possible.
370+
371+
Currently suppports:
372+
- :class:`datetime.date` or :class:`datetime.datetime`
373+
374+
"""
375+
match other:
376+
case Undate():
377+
return other
378+
case datetime.date() | datetime.datetime():
379+
return Undate(other.year, other.month, other.day)
380+
381+
case _:
382+
raise TypeError(f"Conversion from {type(other)} is not supported")
366383

367384
@property
368385
def known_year(self) -> bool:

tests/test_converters/test_edtf.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import pytest
22
from undate.converters.edtf import EDTFDateConverter
3-
from undate.date import DatePrecision
43
from undate import Undate, UndateInterval
54

65

@@ -64,8 +63,8 @@ def test_to_string(self):
6463

6564
# if converter can't generate a string for the date,
6665
# it should return a value error
67-
empty_undate = Undate()
68-
empty_undate.precision = DatePrecision.DECADE
69-
with pytest.raises(ValueError):
70-
EDTFDateConverter().to_string(empty_undate)
66+
# empty_undate = Undate() # undate with no date information no longer supported
67+
# empty_undate.precision = DatePrecision.DECADE
68+
# with pytest.raises(ValueError):
69+
# EDTFDateConverter().to_string(empty_undate)
7170
# TODO: override missing digit and confirm replacement

tests/test_interval.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ def test_eq(self):
8282
)
8383
assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5))
8484

85+
def test_eq_type_check(self):
86+
# doesn't currently support comparison with anything else
87+
interval = UndateInterval(Undate(900))
88+
# returns NotImplemented if comparison with this type is not supported
89+
assert interval.__eq__("foo") == NotImplemented
90+
8591
def test_not_eq(self):
8692
assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval(
8793
Undate(2022), Undate(2024)
@@ -143,3 +149,85 @@ def test_duration(self):
143149
# one year set and the other not currently raises not implemented error
144150
with pytest.raises(NotImplementedError):
145151
UndateInterval(Undate(2000), Undate(month=10)).duration()
152+
153+
def test_intersection(self):
154+
century11th = UndateInterval(Undate(1001), Undate(1100))
155+
century20th = UndateInterval(Undate(1901), Undate(2000))
156+
# no intersection
157+
assert century11th.intersection(century20th) is None
158+
# should work in either direction
159+
assert century20th.intersection(century11th) is None
160+
161+
decade1990s = UndateInterval(Undate(1990), Undate(1999))
162+
# intersection of an interval completely contained in another
163+
# returns an interval equivalent to the smaller one
164+
assert century20th.intersection(decade1990s) == decade1990s
165+
assert decade1990s.intersection(century20th) == decade1990s
166+
167+
# partial overlap
168+
nineties_oughts = UndateInterval(Undate(1990), Undate(2009))
169+
assert century20th.intersection(nineties_oughts) == UndateInterval(
170+
Undate(1990), Undate(2000)
171+
)
172+
173+
# intersections between half open intervals
174+
after_c11th = UndateInterval(Undate(1001), None)
175+
assert after_c11th.intersection(century20th) == century20th
176+
assert after_c11th.intersection(decade1990s) == decade1990s
177+
178+
before_20th = UndateInterval(None, Undate(1901))
179+
assert before_20th.intersection(decade1990s) is None
180+
assert before_20th.intersection(century11th) == century11th
181+
assert before_20th.intersection(after_c11th) == UndateInterval(
182+
Undate(1001), Undate(1901)
183+
)
184+
185+
def test_contains(self):
186+
century11th = UndateInterval(Undate(1001), Undate(1100))
187+
century20th = UndateInterval(Undate(1901), Undate(2000))
188+
decade1990s = UndateInterval(Undate(1990), Undate(1999))
189+
# an interval DOES contain itself
190+
for interval in [century11th, century20th, decade1990s]:
191+
assert interval in interval
192+
193+
# checking if an interval is within another interval
194+
assert decade1990s in century20th
195+
assert decade1990s not in century11th
196+
assert century11th not in decade1990s
197+
assert century20th not in decade1990s
198+
# a specific date can be contained by an interval
199+
y2k = Undate(2000)
200+
assert y2k in century20th
201+
assert y2k not in century11th
202+
# partially known date should work too
203+
april_someyear = Undate("198X", 4)
204+
assert april_someyear in century20th
205+
assert april_someyear not in century11th
206+
# conversion from datetime.date also works
207+
assert datetime.date(1922, 5, 1) in century20th
208+
# unsupported types result in a type error
209+
with pytest.raises(TypeError):
210+
assert "nineteen-eighty-four" in century20th
211+
212+
# contains check with half-open intervals
213+
after_c11th = UndateInterval(Undate(1001), None)
214+
before_20th = UndateInterval(None, Undate(1901))
215+
# neither of them contains the other
216+
assert after_c11th not in before_20th
217+
assert before_20th not in after_c11th
218+
# nor are they contained by a smaller range
219+
assert after_c11th not in decade1990s
220+
assert before_20th not in decade1990s
221+
222+
# all of our previous test dates are in the 1900s,
223+
# so they are after the 11th century and not before the 20th
224+
for period in [decade1990s, y2k, april_someyear]:
225+
assert period in after_c11th
226+
assert period not in before_20th
227+
228+
# fully open interval - is this even meaningful?
229+
whenever = UndateInterval(None, None)
230+
assert decade1990s in whenever
231+
# NOTE: an interval contains itself or an equivalent interval,
232+
# but that may not make sense for open intervals...
233+
assert whenever in whenever

0 commit comments

Comments
 (0)