@@ -1010,8 +1010,14 @@ def __str__(self) -> str:
10101010
10111011 sep = " " .join ("─" * (w + 2 ) for w in widths .values ())
10121012
1013+ def fmt_cell (value , width : int ) -> str :
1014+ s = str (value )
1015+ if len (s ) > width :
1016+ s = s [: width - 1 ] + "…"
1017+ return f" { s :<{width }} "
1018+
10131019 def fmt_row (values : dict ) -> str :
1014- return " " .join (f" { values [n ]!s:<{ widths [n ]} } " for n in self .col_names )
1020+ return " " .join (fmt_cell ( values [n ], widths [n ]) for n in self .col_names )
10151021
10161022 # -- batch-fetch values (one read per column, not one per cell) --
10171023 def rows_to_dicts (positions ) -> list [dict ]:
@@ -1162,7 +1168,10 @@ def save(self, urlpath: str, *, overwrite: bool = False) -> None:
11621168 # --- columns ---
11631169 for col in self ._schema .columns :
11641170 name = col .name
1165- col_storage = self ._resolve_column_storage (col , default_chunks , default_blocks )
1171+ # Use dtype-aware defaults so large-itemsize columns (e.g. U4096) get
1172+ # sensible chunk/block sizes rather than the uint8-based defaults.
1173+ dtype_chunks , dtype_blocks = compute_chunks_blocks ((capacity ,), dtype = col .dtype )
1174+ col_storage = self ._resolve_column_storage (col , dtype_chunks , dtype_blocks )
11661175 disk_col = file_storage .create_column (
11671176 name ,
11681177 dtype = col .dtype ,
@@ -1212,25 +1221,26 @@ def load(cls, urlpath: str) -> CTable:
12121221 capacity = max (phys_size , 1 )
12131222
12141223 mem_storage = InMemoryTableStorage ()
1215- default_chunks , default_blocks = compute_chunks_blocks ((capacity ,))
1224+ bool_chunks , bool_blocks = compute_chunks_blocks ((capacity ,), dtype = np . dtype ( np . bool_ ))
12161225
12171226 mem_valid = mem_storage .create_valid_rows (
12181227 shape = (capacity ,),
1219- chunks = default_chunks ,
1220- blocks = default_blocks ,
1228+ chunks = bool_chunks ,
1229+ blocks = bool_blocks ,
12211230 )
12221231 if phys_size > 0 :
12231232 mem_valid [:phys_size ] = disk_valid [:]
12241233
12251234 mem_cols : dict [str , blosc2 .NDArray ] = {}
12261235 for col in schema .columns :
12271236 name = col .name
1237+ col_chunks , col_blocks = compute_chunks_blocks ((capacity ,), dtype = col .dtype )
12281238 mem_col = mem_storage .create_column (
12291239 name ,
12301240 dtype = col .dtype ,
12311241 shape = (capacity ,),
1232- chunks = default_chunks ,
1233- blocks = default_blocks ,
1242+ chunks = col_chunks ,
1243+ blocks = col_blocks ,
12341244 cparams = None ,
12351245 dparams = None ,
12361246 )
@@ -1284,6 +1294,8 @@ def _make_view(cls, parent: CTable, new_valid_rows: blosc2.NDArray) -> CTable:
12841294 return obj
12851295
12861296 def view (self , new_valid_rows ):
1297+ if isinstance (new_valid_rows , np .ndarray ) and new_valid_rows .dtype == np .bool_ :
1298+ new_valid_rows = blosc2 .asarray (new_valid_rows )
12871299 if not (
12881300 isinstance (new_valid_rows , (blosc2 .NDArray , blosc2 .LazyExpr ))
12891301 and (getattr (new_valid_rows , "dtype" , None ) == np .bool_ )
@@ -1798,6 +1810,24 @@ def to_csv(self, path: str, *, header: bool = True, sep: str = ",") -> None:
17981810 for row in zip (* arrays , strict = True ):
17991811 writer .writerow (row )
18001812
1813+ @staticmethod
1814+ def _csv_col_to_array (raw : list [str ], col , nv ) -> np .ndarray :
1815+ """Convert a list of raw CSV strings to a numpy array for *col*."""
1816+ if col .dtype == np .bool_ :
1817+
1818+ def _parse (v , _nv = nv ):
1819+ stripped = v .strip ()
1820+ if stripped == "" and _nv is not None :
1821+ return _nv
1822+ return stripped in ("True" , "true" , "1" )
1823+
1824+ return np .array ([_parse (v ) for v in raw ], dtype = np .bool_ )
1825+ if col .dtype .kind == "S" :
1826+ prepared : list = [nv if (v .strip () == "" and nv is not None ) else v .encode () for v in raw ]
1827+ return np .array (prepared , dtype = col .dtype )
1828+ prepared2 = [nv if (v .strip () == "" and nv is not None ) else v for v in raw ]
1829+ return np .array (prepared2 , dtype = col .dtype )
1830+
18011831 @classmethod
18021832 def from_csv (
18031833 cls ,
@@ -1900,25 +1930,7 @@ def from_csv(
19001930 if n > 0 :
19011931 for i , col in enumerate (schema .columns ):
19021932 nv = getattr (col .spec , "null_value" , None )
1903- if col .dtype == np .bool_ :
1904- # np.array(["False"], dtype=bool) treats any non-empty
1905- # string as True. Parse "True"/"False"/"1"/"0" explicitly.
1906- # Empty cells → null_value (or False if no null_value).
1907- def _parse_bool (v , _nv = nv ):
1908- stripped = v .strip ()
1909- if stripped == "" and _nv is not None :
1910- return _nv
1911- return stripped in ("True" , "true" , "1" )
1912-
1913- arr = np .array ([_parse_bool (v ) for v in col_data [i ]], dtype = np .bool_ )
1914- else :
1915- raw_strings = col_data [i ]
1916- if nv is not None :
1917- # Replace empty cells with the null sentinel string representation,
1918- # then cast. For numeric types, store nv directly.
1919- nv_str = str (nv )
1920- raw_strings = [nv_str if v .strip () == "" else v for v in raw_strings ]
1921- arr = np .array (raw_strings , dtype = col .dtype )
1933+ arr = cls ._csv_col_to_array (col_data [i ], col , nv )
19221934 new_cols [col .name ][:n ] = arr
19231935 new_valid [:n ] = True
19241936 obj ._n_rows = n
@@ -2535,6 +2547,8 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
25352547
25362548 @profile
25372549 def where (self , expr_result ) -> CTable :
2550+ if isinstance (expr_result , np .ndarray ) and expr_result .dtype == np .bool_ :
2551+ expr_result = blosc2 .asarray (expr_result )
25382552 if not (
25392553 isinstance (expr_result , (blosc2 .NDArray , blosc2 .LazyExpr ))
25402554 and (getattr (expr_result , "dtype" , None ) == np .bool_ )
0 commit comments