Skip to content

Commit baadb81

Browse files
committed
Add lower, upper
1 parent 0ef157b commit baadb81

4 files changed

Lines changed: 100 additions & 8 deletions

File tree

src/blosc2/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,7 @@ def _raise(exc):
649649
logical_not,
650650
logical_or,
651651
logical_xor,
652+
lower,
652653
max,
653654
maximum,
654655
mean,
@@ -681,6 +682,7 @@ def _raise(exc):
681682
tan,
682683
tanh,
683684
trunc,
685+
upper,
684686
var,
685687
where,
686688
)
@@ -852,6 +854,7 @@ def _raise(exc):
852854
"logical_not",
853855
"logical_or",
854856
"logical_xor",
857+
"lower",
855858
"matmul",
856859
"matrix_transpose",
857860
"max",
@@ -923,6 +926,7 @@ def _raise(exc):
923926
"unpack_array",
924927
"unpack_array2",
925928
"unpack_tensor",
929+
"upper",
926930
"validate_expr",
927931
"var",
928932
"vecdot",

src/blosc2/ndarray.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5048,6 +5048,54 @@ def endswith(
50485048
return blosc2.LazyExpr(new_op=(a, "endswith", suffix))
50495049

50505050

5051+
@_incomplete_lazyfunc
5052+
def lower(a: str | blosc2.Array) -> NDArray:
5053+
"""
5054+
Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.lower.html
5055+
Return an array with the elements converted to lowercase.
5056+
Call str.lower element-wise.
5057+
For 8-bit strings, this method is locale-dependent.
5058+
5059+
Parameters
5060+
----------
5061+
a : blosc2.Array
5062+
Input array of bytes_ or str_ dtype
5063+
kwargs: Any
5064+
kwargs accepted by the :func:`empty` constructor
5065+
5066+
Returns
5067+
-------
5068+
out: blosc2.Array, of bytes_ or str_ dtype
5069+
Has the same shape as element.
5070+
5071+
"""
5072+
return blosc2.LazyExpr(new_op=(a, "lower", None))
5073+
5074+
5075+
@_incomplete_lazyfunc
5076+
def upper(a: str | blosc2.Array) -> NDArray:
5077+
"""
5078+
Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.upper.html
5079+
Return an array with the elements converted to uppercase.
5080+
Call str.lower element-wise.
5081+
For 8-bit strings, this method is locale-dependent.
5082+
5083+
Parameters
5084+
----------
5085+
a : blosc2.Array
5086+
Input array of bytes_ or str_ dtype
5087+
kwargs: Any
5088+
kwargs accepted by the :func:`empty` constructor
5089+
5090+
Returns
5091+
-------
5092+
out: blosc2.Array, of bytes_ or str_ dtype
5093+
Has the same shape as element.
5094+
5095+
"""
5096+
return blosc2.LazyExpr(new_op=(a, "upper", None))
5097+
5098+
50515099
def lazywhere(value1=None, value2=None):
50525100
"""Decorator to apply a where condition to a LazyExpr."""
50535101

src/blosc2/utils.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ def _string_startswith(a, b):
5454
return np.char.startswith(a, b)
5555

5656

57+
def _string_lower(a):
58+
return np.char.lower(a)
59+
60+
61+
def _string_upper(a):
62+
return np.char.upper(a)
63+
64+
5765
def _string_endswith(a, b):
5866
return np.char.endswith(a, b)
5967

@@ -97,6 +105,8 @@ def _format_expr_scalar(value):
97105
safe_numpy_globals["contains"] = _string_contains
98106
safe_numpy_globals["startswith"] = _string_startswith
99107
safe_numpy_globals["endswith"] = _string_endswith
108+
safe_numpy_globals["upper"] = _string_upper
109+
safe_numpy_globals["lower"] = _string_lower
100110

101111

102112
elementwise_funcs = [
@@ -155,6 +165,7 @@ def _format_expr_scalar(value):
155165
"logical_not",
156166
"logical_or",
157167
"logical_xor",
168+
"lower",
158169
"maximum",
159170
"minimum",
160171
"multiply",
@@ -178,6 +189,7 @@ def _format_expr_scalar(value):
178189
"tan",
179190
"tanh",
180191
"trunc",
192+
"upper",
181193
"where",
182194
]
183195

@@ -931,13 +943,6 @@ def process_key(key, shape):
931943
return key, mask
932944

933945

934-
incomplete_lazyfunc_map = {
935-
"contains": lambda *args: np.char.find(*args) != -1,
936-
"startswith": lambda *args: np.char.startswith(*args),
937-
"endswith": lambda *args: np.char.endswith(*args),
938-
} | safe_numpy_globals # clip and logaddexp available in safe_numpy_globals
939-
940-
941946
def is_inside_ne_evaluate() -> bool:
942947
"""
943948
Whether the current code is being executed from an ne_evaluate call
@@ -968,7 +973,7 @@ def filler(inputs_tuple, output, offset):
968973

969974
def wrapper(*args, **kwargs):
970975
if is_inside_ne_evaluate(): # haven't been able to use miniexpr so use numpy
971-
return incomplete_lazyfunc_map[func.__name__](*args, **kwargs)
976+
return safe_numpy_globals[func.__name__](*args, **kwargs)
972977
return func(*args, **kwargs)
973978

974979
return wrapper

tests/ndarray/test_lazyexpr.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,41 @@ def test_stringops(values):
597597
np.testing.assert_array_equal(expr_lazy[:], res_numexpr)
598598

599599

600+
def test_stringops2():
601+
# test all supported string ops for bytes and strings
602+
for t in ("bytes", "string"):
603+
if t == "bytes":
604+
a1 = np.array([b"abc", b"def", b"aterr", b"oot", b"zu", b"ab c"])
605+
a2 = a2_blosc = b"a"
606+
else:
607+
a1 = np.array(["abc", "def", "aterr", "oot", "zu", "ab c"])
608+
a2 = a2_blosc = "a"
609+
a1_blosc = blosc2.asarray(a1)
610+
for func, npfunc in zip(
611+
(blosc2.startswith, blosc2.endswith, blosc2.contains),
612+
(np.char.startswith, np.char.endswith, lambda *args: np.char.find(*args) != -1),
613+
strict=True,
614+
):
615+
expr_lazy = func(a1_blosc, a2_blosc)
616+
res_numexpr = npfunc(a1, a2)
617+
assert expr_lazy.shape == res_numexpr.shape
618+
assert expr_lazy.dtype == blosc2.bool_
619+
np.testing.assert_array_equal(expr_lazy[:], res_numexpr)
620+
621+
np.testing.assert_array_equal((a1_blosc < a2_blosc)[:], a1 < a2)
622+
np.testing.assert_array_equal((a1_blosc <= a2_blosc)[:], a1 <= a2)
623+
np.testing.assert_array_equal((a1_blosc == a2_blosc)[:], a1 == a2)
624+
np.testing.assert_array_equal((a1_blosc != a2_blosc)[:], a1 != a2)
625+
np.testing.assert_array_equal((a1_blosc >= a2_blosc)[:], a1 >= a2)
626+
np.testing.assert_array_equal((a1_blosc > a2_blosc)[:], a1 > a2)
627+
628+
for func, npfunc in zip((blosc2.lower, blosc2.upper), (np.char.lower, np.char.upper), strict=True):
629+
expr_lazy = func(a1_blosc)
630+
res_numexpr = npfunc(a1)
631+
assert expr_lazy.shape == res_numexpr.shape
632+
np.testing.assert_array_equal(expr_lazy[:], res_numexpr)
633+
634+
600635
def test_negate(dtype_fixture, shape_fixture):
601636
nelems = np.prod(shape_fixture)
602637
na1 = np.linspace(-1, 1, nelems, dtype=dtype_fixture).reshape(shape_fixture)

0 commit comments

Comments
 (0)