Skip to content

Commit 96bafaf

Browse files
authored
Fix deepcopy for And, Or, and Not expressions (#3295)
Closes #3297 # Rationale for this change `copy.deepcopy()` on `And`, `Or`, and `Not` expressions raises `TypeError` because Pydantic v2's default `__deepcopy__` calls `cls.__new__(cls)` with no args, but these classes require positional arguments in `__new__`. The fix adds `__deepcopy__` to `And`, `Or`, and `Not` that deepcopy their fields and call the constructor directly. ## Are these changes tested? Yes. 11 new unit tests covering all expression types, balanced trees, nested expressions, and deepcopy followed by pickle. ## Are there any user-facing changes? Yes, `copy.deepcopy()` on expressions now works instead of raising an exception.
1 parent 144bf72 commit 96bafaf

2 files changed

Lines changed: 83 additions & 0 deletions

File tree

pyiceberg/expressions/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from __future__ import annotations
1919

20+
import copy
2021
from abc import ABC, abstractmethod
2122
from collections.abc import Callable, Iterable, Sequence
2223
from functools import cached_property
@@ -339,6 +340,10 @@ def __invert__(self) -> BooleanExpression:
339340
# De Morgan's law: not (A and B) = (not A) or (not B)
340341
return Or(~self.left, ~self.right)
341342

343+
def __deepcopy__(self, memo: dict[int, Any]) -> And:
344+
"""Return a deep copy of the And expression."""
345+
return And(copy.deepcopy(self.left, memo), copy.deepcopy(self.right, memo))
346+
342347
def __getnewargs__(self) -> tuple[BooleanExpression, BooleanExpression]:
343348
"""Pickle the And class."""
344349
return (self.left, self.right)
@@ -386,6 +391,10 @@ def __invert__(self) -> BooleanExpression:
386391
# De Morgan's law: not (A or B) = (not A) and (not B)
387392
return And(~self.left, ~self.right)
388393

394+
def __deepcopy__(self, memo: dict[int, Any]) -> Or:
395+
"""Return a deep copy of the Or expression."""
396+
return Or(copy.deepcopy(self.left, memo), copy.deepcopy(self.right, memo))
397+
389398
def __getnewargs__(self) -> tuple[BooleanExpression, BooleanExpression]:
390399
"""Pickle the Or class."""
391400
return (self.left, self.right)
@@ -428,6 +437,10 @@ def __invert__(self) -> BooleanExpression:
428437
"""Transform the Expression into its negated version."""
429438
return self.child
430439

440+
def __deepcopy__(self, memo: dict[int, Any]) -> Not:
441+
"""Return a deep copy of the Not expression."""
442+
return Not(copy.deepcopy(self.child, memo))
443+
431444
def __getnewargs__(self) -> tuple[BooleanExpression]:
432445
"""Pickle the Not class."""
433446
return (self.child,)

tests/expressions/test_expressions.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# under the License.
1717
# pylint:disable=redefined-outer-name,eval-used
1818

19+
import copy
1920
import pickle
2021
import uuid
2122
from decimal import Decimal
@@ -1292,6 +1293,75 @@ def test_bind_ambiguous_name() -> None:
12921293
assert "Invalid schema, multiple fields for name foo.bar: 2 and 3" in str(exc_info)
12931294

12941295

1296+
def test_deepcopy_and() -> None:
1297+
expr = And(EqualTo("x", 1), EqualTo("y", 2))
1298+
copied = copy.deepcopy(expr)
1299+
assert copied == expr
1300+
assert copied is not expr
1301+
1302+
1303+
def test_deepcopy_or() -> None:
1304+
expr = Or(EqualTo("x", 1), EqualTo("y", 2))
1305+
copied = copy.deepcopy(expr)
1306+
assert copied == expr
1307+
assert copied is not expr
1308+
1309+
1310+
def test_deepcopy_not() -> None:
1311+
expr = Not(EqualTo("x", 1))
1312+
copied = copy.deepcopy(expr)
1313+
assert copied == expr
1314+
assert copied is not expr
1315+
1316+
1317+
def test_deepcopy_equal_to() -> None:
1318+
expr = EqualTo("x", 1)
1319+
copied = copy.deepcopy(expr)
1320+
assert copied == expr
1321+
assert copied is not expr
1322+
1323+
1324+
def test_deepcopy_always_true() -> None:
1325+
copied = copy.deepcopy(AlwaysTrue())
1326+
assert copied is AlwaysTrue()
1327+
1328+
1329+
def test_deepcopy_always_false() -> None:
1330+
copied = copy.deepcopy(AlwaysFalse())
1331+
assert copied is AlwaysFalse()
1332+
1333+
1334+
def test_deepcopy_always_true_then_pickle() -> None:
1335+
copied = copy.deepcopy(AlwaysTrue())
1336+
restored = pickle.loads(pickle.dumps(copied))
1337+
assert restored is AlwaysTrue()
1338+
1339+
1340+
def test_deepcopy_balanced_and() -> None:
1341+
expr = And(EqualTo("a", 1), EqualTo("b", 2), EqualTo("c", 3), EqualTo("d", 4))
1342+
copied = copy.deepcopy(expr)
1343+
assert copied == expr
1344+
1345+
1346+
def test_deepcopy_balanced_or() -> None:
1347+
expr = Or(EqualTo("a", 1), EqualTo("b", 2), EqualTo("c", 3), EqualTo("d", 4))
1348+
copied = copy.deepcopy(expr)
1349+
assert copied == expr
1350+
1351+
1352+
def test_deepcopy_nested_expression() -> None:
1353+
expr = And(Or(EqualTo("a", 1), EqualTo("b", 2)), Not(EqualTo("c", 3)))
1354+
copied = copy.deepcopy(expr)
1355+
assert copied == expr
1356+
1357+
1358+
def test_deepcopy_then_pickle() -> None:
1359+
expr = And(EqualTo("x", 1), EqualTo("y", 2))
1360+
copied = copy.deepcopy(expr)
1361+
restored = pickle.loads(pickle.dumps(copied))
1362+
assert restored == expr
1363+
1364+
12951365
# __ __ ___
12961366
# | \/ |_ _| _ \_ _
12971367
# | |\/| | || | _/ || |

0 commit comments

Comments
 (0)