Skip to content

Commit 5fd7505

Browse files
committed
Add grammar & converter for Christian holidays
- Adapted from Hale/Eliot dataset code - Converted with help from OpenCode and Claude
1 parent 60328de commit 5fd7505

7 files changed

Lines changed: 314 additions & 8 deletions

File tree

src/undate/converters/combined.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
2-
**Experimental** combined parser. Supports EDTF, Gregorian, Hebrew, and Hijri
3-
where dates are unambiguous. Year-only dates are parsed as EDTF in
4-
Gregorian calendar.
2+
Combined parser. Supports EDTF, Gregorian, Hebrew, Hijri, and Christian
3+
liturgical dates where dates are unambiguous. Year-only dates are parsed
4+
as EDTF in Gregorian calendar.
55
"""
66

77
from typing import Union
@@ -16,6 +16,7 @@
1616
from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
1717
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
1818
from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
19+
from undate.converters.holidays import HolidayTransformer
1920

2021

2122
class CombinedDateTransformer(Transformer):
@@ -35,6 +36,7 @@ def start(self, children):
3536
hebrew=HebrewDateTransformer(),
3637
islamic=IslamicDateTransformer(),
3738
gregorian=GregorianDateTransformer(),
39+
holidays=HolidayTransformer(),
3840
)
3941

4042

@@ -47,14 +49,16 @@ def start(self, children):
4749
class OmnibusDateConverter(BaseDateConverter):
4850
"""
4951
Combination parser that aggregates existing parser grammars.
50-
Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous.
51-
(Year-only dates are parsed as EDTF in Gregorian calendar.)
52+
Supports EDTF, Gregorian, Hebrew, Hijri, and Christian liturgical dates
53+
where dates are unambiguous. Year-only dates are parsed as EDTF in
54+
Gregorian calendar.
5255
5356
Does not support serialization.
5457
5558
Example usage::
5659
57-
Undate.parse("Tammuz 4816", "omnibus")
60+
Undate.parse("Tammuz 4812", "omnibus")
61+
Undate.parse("Easter 1916", "omnibus")
5862
5963
"""
6064

src/undate/converters/grammars/combined.lark

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
%import .undate_common.DATE_PUNCTUATION
66
%ignore DATE_PUNCTUATION
77

8-
start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date )
8+
start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date | holidays__holiday_date)
99

1010
// Renaming of the import variables is required, as they receive the namespace of this file.
1111
// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
@@ -30,6 +30,8 @@ start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__
3030
// gregorian calendar, in multiple languages
3131
%import .gregorian.gregorian_date -> gregorian__gregorian_date
3232

33+
// relative import from holidays.lark
34+
%import .holidays.holiday_date -> holidays__holiday_date
3335

3436
// override hebrew date to omit year-only, since year without calendar is ambiguous
3537
// NOTE: potentially support year with calendar label
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
%import common.WS
2+
%ignore WS
3+
4+
%import .undate_common.DATE_PUNCTUATION
5+
%ignore DATE_PUNCTUATION
6+
7+
8+
holiday_date: movable_feast year | fixed_date year?
9+
10+
// holidays that shift depending on the year
11+
movable_feast: EASTER | EASTER_MONDAY | HOLY_SATURDAY | ASCENSION
12+
| PENTECOST | WHIT_MONDAY | TRINITY | ASH_WEDNESDAY | SHROVE_TUESDAY
13+
14+
// holidays that are always on the same date
15+
fixed_date: EPIPHANY | CANDLEMASS | ST_PATRICKS | ALL_FOOLS | ST_CYPRIANS
16+
17+
year: /\d{4}/
18+
19+
// all patterns use case-insensitive regex
20+
21+
// Fixed-date holidays
22+
EPIPHANY: /epiphany/i
23+
CANDLEMASS: /candlemass?/i // recognize with both one and 2 s
24+
ST_PATRICKS: /st\.?\s*patrick'?s?\s*day/i
25+
ALL_FOOLS: /(april|all)\s*fools?\s*day/i
26+
ST_CYPRIANS: /st\.?\s*cyprian'?s?\s*day/i
27+
28+
// Moveable feasts
29+
EASTER: /easter/i
30+
EASTER_MONDAY: /easter\s*monday/i
31+
HOLY_SATURDAY: /holy\s*saturday/i
32+
ASCENSION: /ascension\s*day|ascension/i
33+
PENTECOST: /pentecost/i
34+
WHIT_MONDAY: /whit\s*monday|whitsun\s*monday/i
35+
TRINITY: /trinity\s*sunday|trinity/i
36+
ASH_WEDNESDAY: /ash\s*wednesday/i
37+
SHROVE_TUESDAY: /shrove\s*tuesday/i

src/undate/converters/holidays.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""
2+
Holiday date Converter: parse Christian liturgical dates and convert to Gregorian.
3+
"""
4+
5+
import datetime
6+
7+
from lark import Lark, Transformer, Tree, Token
8+
from lark.exceptions import UnexpectedInput
9+
10+
from convertdate import holidays
11+
from undate import Undate, Calendar
12+
from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH
13+
14+
# To add a new holiday:
15+
# 1. Add a name and pattern to holidays.lark grammar file
16+
# 2. Include the in appropriate section (fixed or movable)
17+
# 3. Add an entry to FIXED_HOLIDAYS or MOVEABLE_FEASTS; must match grammar terminal name
18+
19+
20+
# holidays that fall on the same date every year
21+
# key must match grammar term; value is tuple of numeric month, day
22+
FIXED_HOLIDAYS = {
23+
"EPIPHANY": (1, 6), # January 6
24+
"CANDLEMASS": (2, 2), # February 2; 40th day & end of epiphany
25+
"ST_PATRICKS": (3, 17), # March 17
26+
"ALL_FOOLS": (4, 1), # All / April fools day, April 1
27+
"ST_CYPRIANS": (9, 16), # St. Cyprian's Feast day: September 16
28+
}
29+
30+
# holidays that shift depending on the year; value is days relative to Easter
31+
MOVEABLE_FEASTS = {
32+
"EASTER": 0, # Easter, no offset
33+
"HOLY_SATURDAY": -1, # day before Easter
34+
"EASTER_MONDAY": 1, # day after Easter
35+
"ASCENSION": 39, # fortieth day of Easter
36+
"PENTECOST": 49, # 7 weeks after Easter
37+
"WHIT_MONDAY": 50, # Monday after Pentecost
38+
"TRINITY": 56, # first Sunday after Pentecost
39+
"ASH_WEDNESDAY": -46, # Wednesday of the 7th week before Easter
40+
"SHROVE_TUESDAY": -47, # day before Ash Wednesday
41+
}
42+
43+
44+
parser = Lark.open(
45+
str(GRAMMAR_FILE_PATH / "holidays.lark"), rel_to=__file__, start="holiday_date"
46+
)
47+
48+
49+
class HolidayTransformer(Transformer):
50+
calendar = Calendar.GREGORIAN
51+
52+
def year(self, items):
53+
value = "".join([str(i) for i in items])
54+
return Token("year", value)
55+
# return Tree(data="year", children=[value])
56+
57+
def movable_feast(self, items):
58+
# moveable feast day can't be calculated without the year,
59+
# so pass through
60+
return items[0]
61+
62+
def fixed_date(self, items):
63+
item = items[0]
64+
holiday_name = item.type.split("__")[-1]
65+
# token_type = item.type
66+
# token type is holiday fixed-date name; use to determine month/day
67+
month, day = FIXED_HOLIDAYS.get(holiday_name)
68+
return Tree("fixed_date", [Token("month", month), Token("day", day)])
69+
# for key in FIXED_HOLIDAYS:
70+
# if token_type == key or token_type == f"holidays__{key}":
71+
# month, day = FIXED_HOLIDAYS[key]
72+
# return Tree("fixed_date", [Token("month", month), Token("day", day)])
73+
# raise ValueError(f"Unknown fixed holiday: {item.type}")
74+
75+
def holiday_date(self, items):
76+
parts = self._get_date_parts(items)
77+
return Undate(**parts)
78+
79+
def _get_date_parts(self, items) -> dict[str, int | str]:
80+
# recursive method to take parsed tokens and trees and generate
81+
# a dictionary of year, month, day for initializing an undate object
82+
# handles nested tree with month/day (for fixed date holidays)
83+
# and includes movable feast logic, after year is determined.
84+
85+
parts = {}
86+
date_parts = ["year", "month", "day"]
87+
movable_feast = None
88+
for child in items:
89+
field = value = None
90+
# if this is a token, get type and value
91+
if isinstance(child, Token):
92+
# month/day from fixed date holiday
93+
if child.type in date_parts:
94+
field = child.type
95+
value = child.value
96+
# check for movable feast terminal
97+
elif child.type in MOVEABLE_FEASTS:
98+
# collect but don't handle until we know the year
99+
movable_feast = child.type
100+
# handle namespaced token type; happens when called from combined grammar
101+
elif (
102+
"__" in child.type and child.type.split("__")[-1] in MOVEABLE_FEASTS
103+
):
104+
# collect but don't handle until we know the year
105+
movable_feast = child.type.split("__")[-1]
106+
107+
# if a tree, check for type and anonymous token
108+
if isinstance(child, Tree):
109+
# if tree is a date field (i.e., year), get the value
110+
if child.data in date_parts:
111+
field = child.data
112+
# in this case we expect one value;
113+
# convert anonymous token to value
114+
value = child.children[0]
115+
# if tree has children, recurse to get date parts
116+
elif child.children:
117+
parts.update(self._get_date_parts(child.children))
118+
119+
# if date fields were found, add to dictionary
120+
if field and value:
121+
# currently all date parts are integer only
122+
parts[str(field)] = int(value)
123+
124+
# if date is a movable feast, calculate relative to Easter based on the year
125+
if movable_feast is not None:
126+
offset = MOVEABLE_FEASTS[movable_feast]
127+
holiday_date = datetime.date(
128+
*holidays.easter(parts["year"])
129+
) + datetime.timedelta(days=offset)
130+
parts.update({"month": holiday_date.month, "day": holiday_date.day})
131+
132+
return parts
133+
134+
135+
class HolidayDateConverter(BaseDateConverter):
136+
"""
137+
Converter for Christian liturgical dates.
138+
139+
Supports fixed-date holidays (Epiphany, Candlemass, etc.) and
140+
Easter-relative moveable feasts (Easter, Ash Wednesday, Pentecost, etc.).
141+
142+
Example usage::
143+
144+
Undate.parse("Easter 1942", "holidays")
145+
Undate.parse("Ash Wednesday 1942", "holidays")
146+
Undate.parse("Epiphany", "holidays")
147+
148+
Does not support serialization.
149+
"""
150+
151+
name = "holidays"
152+
153+
def __init__(self):
154+
self.transformer = HolidayTransformer()
155+
156+
def parse(self, value: str) -> Undate:
157+
if not value:
158+
raise ValueError("Parsing empty string is not supported")
159+
160+
try:
161+
parsetree = parser.parse(value)
162+
# transform the parse tree into an undate or undate interval
163+
undate_obj = self.transformer.transform(parsetree)
164+
# set the input holiday text as a label on the undate object
165+
undate_obj.label = value
166+
return undate_obj
167+
except UnexpectedInput as err:
168+
raise ValueError(f"Could not parse '{value}' as a holiday date") from err
169+
170+
def to_string(self, undate: Undate) -> str:
171+
raise ValueError("Holiday converter does not support serialization")

src/undate/date.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def weekday(self) -> Optional[int]:
261261
thursday_week = self.astype("datetime64[W]")
262262
days_from_thursday = (self - thursday_week).astype(int)
263263
# if monday is 0, thursday is 3
264-
return (days_from_thursday + 3) % 7
264+
return int((days_from_thursday + 3) % 7)
265265

266266
return None
267267

@@ -280,6 +280,18 @@ def __sub__(self, other):
280280
# NOTE: add should not be subclassed because we want to return a Date, not a delta
281281

282282

283+
class Weekday(IntEnum):
284+
"""Weekday as an integer, compatible with :meth:`datetime.date.weekday`."""
285+
286+
MONDAY = 0
287+
TUESDAY = 1
288+
WEDNESDAY = 2
289+
THURSDAY = 3
290+
FRIDAY = 4
291+
SATURDAY = 5
292+
SUNDAY = 6
293+
294+
283295
class DatePrecision(IntEnum):
284296
"""date precision, to indicate date precision independent from how much
285297
of the date is known."""

tests/test_converters/test_combined_parser.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
("13 Jan 1602", Undate(1602, 1, 13, calendar="Gregorian")),
2525
("2022 ugu. 4", Undate(2022, 11, 4, calendar="Gregorian")),
2626
("18 avril", Undate(month=4, day=18, calendar="Gregorian")),
27+
# Christian liturgical dates
28+
("Easter 1942", Undate(1942, 4, 5)),
29+
("Epiphany 1921", Undate(1921, 1, 6)),
30+
("Pentecost 2016", Undate(2016, 5, 15)),
31+
("Ash Wednesday 2000", Undate(2000, 3, 8)),
32+
("Whit Monday 2023", Undate(2023, 5, 29)),
2733
]
2834

2935

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import pytest
2+
3+
from undate import Undate, Calendar
4+
from undate.date import Weekday
5+
from undate.converters.holidays import HolidayDateConverter
6+
7+
8+
class TestHolidayConverter:
9+
converter = HolidayDateConverter()
10+
11+
@pytest.mark.parametrize(
12+
"input_string,expected",
13+
[
14+
("Epiphany 1921", Undate(1921, 1, 6)),
15+
("candlemas 1913", Undate(1913, 2, 2)),
16+
("Candlemass 1862", Undate(1862, 2, 2)),
17+
("st. patrick's day 1823", Undate(1823, 3, 17)),
18+
("st patrick's day 1901", Undate(1901, 3, 17)),
19+
("all fools day 1933", Undate(1933, 4, 1)),
20+
("st. cyprian's day 1902", Undate(1902, 9, 16)),
21+
],
22+
)
23+
def test_fixed_holidays(self, input_string, expected):
24+
assert self.converter.parse(input_string) == expected
25+
26+
@pytest.mark.parametrize(
27+
"input_string,expected,expected_weekday",
28+
[
29+
("Easter 1900", Undate(1900, 4, 15), Weekday.SUNDAY),
30+
("easter monday 1925", Undate(1925, 4, 13), Weekday.MONDAY),
31+
("holy saturday 2018", Undate(2018, 3, 31), Weekday.SATURDAY),
32+
("Ash Wednesday 2000", Undate(2000, 3, 8), Weekday.WEDNESDAY),
33+
("shrove tuesday 1940", Undate(1940, 2, 6), Weekday.TUESDAY),
34+
("Ascension 1988", Undate(1988, 5, 12), Weekday.THURSDAY),
35+
("Ascension Day 1999", Undate(1999, 5, 13), Weekday.THURSDAY),
36+
("Pentecost 2016", Undate(2016, 5, 15), Weekday.SUNDAY),
37+
("whit monday 2005", Undate(2005, 5, 16), Weekday.MONDAY),
38+
("whitsun monday 2023", Undate(2023, 5, 29), Weekday.MONDAY),
39+
("trinity 1978", Undate(1978, 5, 21), Weekday.SUNDAY),
40+
("Trinity Sunday 1967", Undate(1967, 5, 21), Weekday.SUNDAY),
41+
],
42+
)
43+
def test_moveable_feasts(self, input_string, expected, expected_weekday):
44+
result = self.converter.parse(input_string)
45+
assert result == expected
46+
assert result.label == input_string
47+
assert result.earliest.weekday == expected_weekday
48+
49+
def test_holiday_without_year(self):
50+
result = self.converter.parse("Epiphany")
51+
assert result.label == "Epiphany"
52+
assert result.format("EDTF") == "XXXX-01-06"
53+
assert not result.known_year
54+
assert result.calendar == Calendar.GREGORIAN
55+
56+
def test_undate_parse(self):
57+
# accessible through main undate parse method
58+
assert Undate.parse("Epiphany 1942", "holidays") == Undate(1942, 1, 6)
59+
60+
def test_parse_empty(self):
61+
with pytest.raises(ValueError, match="empty string"):
62+
self.converter.parse("")
63+
64+
def test_parse_error(self):
65+
with pytest.raises(ValueError, match="Could not parse"):
66+
self.converter.parse("Not a holiday")
67+
68+
def test_moveable_without_year(self):
69+
with pytest.raises(ValueError, match="Could not parse"):
70+
self.converter.parse("Easter")
71+
72+
def test_to_string_error(self):
73+
with pytest.raises(ValueError, match="does not support"):
74+
self.converter.to_string(Undate(1916))

0 commit comments

Comments
 (0)