Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ Required in the environment: `scikit-build-core`, `cmake>=3.21`, `ninja`, `nanob

Before introducing union-finds, priority queues, edge hashing, stride math, threading helpers, or label relabeling, check `include/bioimage_cpp/detail/`:

- `detail/union_find.hxx` — `UnionFind` (path compression + union by rank). `find`, `merge`, `merge_to`, `unite_roots`.
- `util/union_find.hxx` — `UnionFind` (namespace `bioimage_cpp::util`, not `detail`; path compression + union by rank). `find`, `merge`, `merge_to`, `unite_roots`.
- `detail/indexed_heap.hxx` — addressable max-heap with mutable priorities. `DenseIndexedHeap` (vector-backed locator, integer keys in `[0, N)`) and `SparseIndexedHeap` (hashmap-backed, arbitrary keys).
- `detail/edge_hash.hxx` — `Edge`, `edge_key`, `EdgeHash` for hashing unordered node pairs.
- `detail/grid.hxx` — `c_order_strides`, `valid_offset_target`, `is_valid_grid_edge` for row-major grid offsets.
- `detail/grid.hxx` — `number_of_elements`, `c_order_strides`, `valid_offset_target`, `is_valid_grid_edge` for row-major grid shapes/offsets.
- `detail/relabel.hxx` — `dense_relabel` to map arbitrary labels onto `[0, k)` preserving first-occurrence order.
- `detail/threading.hxx` — `normalize_thread_count`, `parallel_for_chunks(n_threads, n_items, chunk)`.

Expand Down
82 changes: 82 additions & 0 deletions development/graph/benchmark_accumulate_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Benchmark RAG label accumulation (``accumulate_labels``).

The code review replaced the per-thread ``n_threads x n_nodes`` map-of-maps with
a single combined-key ``(node, other)`` histogram per thread, and made the
per-node argmax single-threaded. This script times ``accumulate_labels`` across
thread counts (to confirm the allocation win and rule out a high-thread-count
regression from the now-sequential argmax) and across ``other``-label
cardinalities.

Inputs: a deterministic block-wise over-segmentation (each node is a contiguous
block, as in a real over-segmentation), so the RAG adjacency is local.

Not part of the test suite. Run::

python development/graph/benchmark_accumulate_labels.py --repeats 11
"""
from __future__ import annotations

import argparse
from statistics import median
from time import perf_counter

import numpy as np

import bioimage_cpp as bic


def _timeit(fn, repeats: int, warmup: int = 1) -> dict:
for _ in range(warmup):
fn()
timings = []
for _ in range(repeats):
t0 = perf_counter()
fn()
timings.append(perf_counter() - t0)
return {"median": median(timings), "min": min(timings), "n": repeats}


def _make_labels(shape=(40, 256, 256), coarsen=(2, 4, 4)) -> np.ndarray:
coarse_shape = tuple(s // c for s, c in zip(shape, coarsen))
n_nodes = int(np.prod(coarse_shape))
coarse = np.arange(n_nodes, dtype=np.uint64).reshape(coarse_shape)
block = np.ones(coarsen, dtype=np.uint64)
return np.ascontiguousarray(np.kron(coarse, block))


def run(repeats: int = 11) -> dict:
labels = _make_labels()
rag = bic.graph.region_adjacency_graph(labels)
n_nodes = int(rag.number_of_nodes)
rng = np.random.default_rng(0)

results: dict[str, dict] = {}
for n_other, tag in ((8, "dense8"), (2000, "sparse2000")):
other = rng.integers(0, n_other, size=labels.shape).astype(np.uint64)
other = np.ascontiguousarray(other)
for threads in (1, 4, 8):
def fn(t=threads, o=other):
bic.graph.features.accumulate_labels(rag, labels, o, number_of_threads=t)

key = f"accumulate_labels_{tag}_t{threads}"
results[key] = _timeit(fn, repeats)
results[key]["meta"] = {
"n_nodes": n_nodes,
"n_pixels": int(labels.size),
"n_other": n_other,
"threads": threads,
}
return results


def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repeats", type=int, default=11)
args = parser.parse_args()
results = run(repeats=args.repeats)
for name, r in results.items():
print(f"{name:<32} median={r['median'] * 1e3:8.3f} ms min={r['min'] * 1e3:8.3f} ms")


if __name__ == "__main__":
main()
70 changes: 70 additions & 0 deletions development/graph/benchmark_bfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Benchmark the workspace-reused BFS path (``lifted_edges_from_node_labels``).

The code review changed ``BfsWorkspace::reset`` from an O(N) clear of the
visited/distance buffers to an O(1) generation-stamp bump. That only pays off
when one workspace is reset across many sources, which is exactly what
``lifted_edges_from_node_labels`` does (one workspace per chunk, reset per
source). The single-call ``breadth_first_search`` builds a fresh workspace each
call and would NOT show the change, so we benchmark the lifted-edge path.

Single-threaded so the per-source reset cost is not hidden by parallelism. The
node count is swept to expose the previous O(N^2)-of-memset behavior.

Not part of the test suite. Run::

python development/graph/benchmark_bfs.py --repeats 5
"""
from __future__ import annotations

import argparse
from statistics import median
from time import perf_counter

import numpy as np

import bioimage_cpp as bic


def _timeit(fn, repeats: int, warmup: int = 1) -> dict:
for _ in range(warmup):
fn()
timings = []
for _ in range(repeats):
t0 = perf_counter()
fn()
timings.append(perf_counter() - t0)
return {"median": median(timings), "min": min(timings), "n": repeats}


def run(repeats: int = 5, depth: int = 2) -> dict:
shapes = [(100, 100), (160, 160), (220, 220)]
results: dict[str, dict] = {}
for shape in shapes:
n_nodes = int(np.prod(shape))
graph = bic.graph.grid_graph(shape)
rng = np.random.default_rng(0)
node_labels = rng.integers(0, 50, size=(n_nodes,), dtype=np.uint64)

def fn(g=graph, nl=node_labels, d=depth):
bic.graph.lifted_multicut.lifted_edges_from_node_labels(
g, nl, graph_depth=d, number_of_threads=1
)

key = f"bfs_lifted_{n_nodes}nodes_d{depth}"
results[key] = _timeit(fn, repeats)
results[key]["meta"] = {"n_nodes": n_nodes, "shape": shape, "depth": depth}
return results


def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repeats", type=int, default=5)
parser.add_argument("--depth", type=int, default=2)
args = parser.parse_args()
results = run(repeats=args.repeats, depth=args.depth)
for name, r in results.items():
print(f"{name:<28} median={r['median'] * 1e3:9.3f} ms min={r['min'] * 1e3:9.3f} ms")


if __name__ == "__main__":
main()
89 changes: 89 additions & 0 deletions development/segmentation/benchmark_mutex_watershed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Benchmark the mutex-watershed grid kernel (bic only).

The code review routed the neighbor computation in ``mutex_watershed_grid``
through ``detail::valid_offset_target`` (per-axis bounds check) instead of a
single precomputed flat-offset add. This script times ``mutex_watershed`` on the
ISBI affinities for 2D and 3D so we can A/B that inner-loop change against a
pre-review build. No external (affogato) dependency — bic only.

Not part of the test suite. Run::

python development/segmentation/benchmark_mutex_watershed.py --repeats 5
"""
from __future__ import annotations

import argparse
from statistics import median
from time import perf_counter

import numpy as np

import bioimage_cpp as bic
from bioimage_cpp._data import load_isbi_affinities


def _timeit(fn, repeats: int, warmup: int = 1) -> dict:
for _ in range(warmup):
fn()
timings = []
for _ in range(repeats):
t0 = perf_counter()
fn()
timings.append(perf_counter() - t0)
return {"median": median(timings), "min": min(timings), "n": repeats}


def _attractive_flip(affs: np.ndarray, n_attractive: int) -> np.ndarray:
# Match the convention in the equivalence checker: attractive channels are
# turned into merge affinities (1 - aff).
out = affs.copy()
out[:n_attractive] *= -1
out[:n_attractive] += 1
return out


def run(repeats: int = 5) -> dict:
affinities, offsets = load_isbi_affinities()
affinities = np.ascontiguousarray(affinities)
offsets = [tuple(o) for o in offsets]
results: dict[str, dict] = {}

# --- 2D: in-plane channels of a single z slice ---
channels_2d = [i for i, o in enumerate(offsets) if o[0] == 0]
aff2d = np.ascontiguousarray(affinities[channels_2d, 0, :256, :256])
offsets_2d = [offsets[i][1:] for i in channels_2d]
aff2d_flipped = _attractive_flip(aff2d, 2)

def mws_2d():
bic.segmentation.mutex_watershed(
aff2d_flipped, offsets_2d, number_of_attractive_channels=2
)

results["mws_2d_256x256"] = _timeit(mws_2d, repeats)
results["mws_2d_256x256"]["meta"] = {"shape": list(aff2d.shape)}

# --- 3D: small crop, all offsets ---
aff3d = np.ascontiguousarray(affinities[:, :6, :256, :256])
aff3d_flipped = _attractive_flip(aff3d, 3)

def mws_3d():
bic.segmentation.mutex_watershed(
aff3d_flipped, offsets, number_of_attractive_channels=3
)

results["mws_3d_6x256x256"] = _timeit(mws_3d, repeats)
results["mws_3d_6x256x256"]["meta"] = {"shape": list(aff3d.shape)}
return results


def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repeats", type=int, default=5)
args = parser.parse_args()
results = run(repeats=args.repeats)
for name, r in results.items():
print(f"{name:<20} median={r['median'] * 1e3:9.3f} ms min={r['min'] * 1e3:9.3f} ms")


if __name__ == "__main__":
main()
70 changes: 70 additions & 0 deletions development/utils/benchmark_union_find.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Micro-benchmark for the UnionFind Python bindings.

Times the bulk ``merge((N, 2) edges)`` and ``find(node_array)`` entry points,
which the code review changed to validate every node id against ``uf.size``
before touching the C++ structure (an extra O(N) pre-pass). This script lets us
A/B that pre-pass against a pre-review build.

Not part of the test suite. Run::

python development/utils/benchmark_union_find.py --repeats 11
"""
from __future__ import annotations

import argparse
from statistics import median
from time import perf_counter

import numpy as np

import bioimage_cpp as bic


def _timeit(fn, repeats: int, warmup: int = 1) -> dict:
for _ in range(warmup):
fn()
timings = []
for _ in range(repeats):
t0 = perf_counter()
fn()
timings.append(perf_counter() - t0)
return {"median": median(timings), "min": min(timings), "n": repeats}


def run(repeats: int = 11, n_nodes: int = 1_000_000, n_edges: int = 2_000_000) -> dict:
rng = np.random.default_rng(0)
edges = rng.integers(0, n_nodes, size=(n_edges, 2), dtype=np.uint64)
query = rng.integers(0, n_nodes, size=(n_edges,), dtype=np.uint64)

# Pre-merged structure for the find benchmark (find does not mutate the
# partition, so it can be reused across repeats).
merged = bic.utils.UnionFind(n_nodes)
merged.merge(edges)

def bulk_merge():
uf = bic.utils.UnionFind(n_nodes)
uf.merge(edges)

def bulk_find():
merged.find(query)

results = {
"uf_bulk_merge": _timeit(bulk_merge, repeats),
"uf_bulk_find": _timeit(bulk_find, repeats),
}
for r in results.values():
r["meta"] = {"n_nodes": n_nodes, "n_edges": n_edges}
return results


def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repeats", type=int, default=11)
args = parser.parse_args()
results = run(repeats=args.repeats)
for name, r in results.items():
print(f"{name:<16} median={r['median'] * 1e3:8.3f} ms min={r['min'] * 1e3:8.3f} ms")


if __name__ == "__main__":
main()
10 changes: 10 additions & 0 deletions include/bioimage_cpp/detail/grid.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@

namespace bioimage_cpp::detail {

// Total number of elements in a row-major array of the given shape (the product
// of the per-axis extents). Shape entries are assumed non-negative.
inline std::size_t number_of_elements(const std::vector<std::ptrdiff_t> &shape) {
std::size_t total = 1;
for (const auto extent : shape) {
total *= static_cast<std::size_t>(extent);
}
return total;
}

// C-order strides for a row-major array of the given shape, in units of array
// elements (not bytes). The innermost (last) axis has stride 1.
inline std::vector<std::ptrdiff_t> c_order_strides(const std::vector<std::ptrdiff_t> &shape) {
Expand Down
7 changes: 6 additions & 1 deletion include/bioimage_cpp/detail/profile.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,12 @@ inline ProfileTimerNull make_profile_timer(NullProfiler &profiler, const char *n
} // namespace bioimage_cpp::detail

#define BIOIMAGE_PROFILE_INIT(var) ::bioimage_cpp::detail::Profiler var;
#define BIOIMAGE_PROFILE_SCOPE(var, name) auto _bp_##__LINE__ = ::bioimage_cpp::detail::make_profile_timer(var, name);
// Two-level indirection so __LINE__ is expanded before the token paste; without
// it every BIOIMAGE_PROFILE_SCOPE in a translation unit would declare the same
// identifier `_bp___LINE__`, breaking two scopes in one block.
#define BIOIMAGE_PROFILE_CONCAT_(a, b) a##b
#define BIOIMAGE_PROFILE_CONCAT(a, b) BIOIMAGE_PROFILE_CONCAT_(a, b)
#define BIOIMAGE_PROFILE_SCOPE(var, name) auto BIOIMAGE_PROFILE_CONCAT(_bp_, __LINE__) = ::bioimage_cpp::detail::make_profile_timer(var, name);
#define BIOIMAGE_PROFILE_REPORT(var) (var).report();

#else
Expand Down
5 changes: 4 additions & 1 deletion include/bioimage_cpp/flow/flow_density.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ inline std::ptrdiff_t round_to_flat_index(
} else if (clipped > grid.upper[axis]) {
clipped = grid.upper[axis];
}
const auto coord = static_cast<std::ptrdiff_t>(std::nearbyint(clipped));
// Round half up, matching the nearest-neighbor convention in
// transformation/affine.hxx and segmentation/watershed.hxx.
// std::nearbyint would honor the FP rounding mode (round-half-to-even).
const auto coord = static_cast<std::ptrdiff_t>(std::floor(clipped + 0.5f));
flat += coord * grid.strides[axis];
}
return flat;
Expand Down
5 changes: 3 additions & 2 deletions include/bioimage_cpp/graph/agglomeration/detail.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,9 @@ inline std::size_t agglo_merge_dynamic_nodes(
if (new_priority != current_priority) {
edge.weight = new_priority;
// The edge may have been previously popped via kRejectEdge
// (GASP cannot-link), in which case it is no longer in the
// heap. Use push_or_change to handle both cases.
// (GASP cannot-link). In that case it must stay out of the heap
// (re-adding it would let it be re-popped and merged), so only
// update the priority when the edge is still present.
if (heap.contains(removed_edge_id)) {
heap.change(removed_edge_id, new_priority);
}
Expand Down
Loading
Loading