Skip to content

Commit 20de8e7

Browse files
authored
Adjust CLEM GridSquare registration logic (#776)
* Move CLEM atlas determination logic from a repeatedly used function into a computed field in the CLEM model instead * Added logic to determine whether incoming data is for a denoised dataset of a pre-existing one, and to overwrite existing ImagingSite database entry if it is * Switch from using 'series_name' to using 'site_name' from the CLEM Pydantic model when registering things in the databases * Added '_Lng_LVCC' files to test dataset and added more checks to ensure that the ISPyB entries all register the denoised datasets in their final state
1 parent 1fafd8f commit 20de8e7

2 files changed

Lines changed: 138 additions & 73 deletions

File tree

src/murfey/workflows/clem/register_preprocessing_results.py

Lines changed: 103 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111
import logging
1212
import traceback
1313
from collections.abc import Collection
14+
from functools import cached_property
1415
from importlib.metadata import entry_points
1516
from pathlib import Path
1617
from typing import Literal, Optional
1718

18-
from pydantic import BaseModel
19+
from pydantic import BaseModel, computed_field
1920
from sqlmodel import Session, select
2021

2122
import murfey.util.db as MurfeyDB
@@ -53,16 +54,42 @@ class CLEMPreprocessingResult(BaseModel):
5354
resolution: float
5455
extent: list[float] # [x0, x1, y0, y1]
5556

56-
57-
def _is_clem_atlas(result: CLEMPreprocessingResult):
58-
# If an image has a width/height of at least 1.5 mm, it should qualify as an atlas
59-
return (
60-
max(
61-
result.pixels_x * result.pixel_size,
62-
result.pixels_y * result.pixel_size,
57+
# Valid Pydantic decorator not supported by MyPy
58+
@computed_field # type: ignore
59+
@cached_property
60+
def is_denoised(self) -> bool:
61+
"""
62+
The "_Lng_LVCC" suffix appended to a CLEM dataset's position name indicates
63+
that it's a denoised image set of the same position. These results should
64+
override or supersede the original ones once they're available.
65+
"""
66+
return "_Lng_LVCC" in self.series_name
67+
68+
# Valid Pydantic decorator not supported by MyPy
69+
@computed_field # type: ignore
70+
@cached_property
71+
def site_name(self) -> str:
72+
"""
73+
Extract just the name of the site by removing the "_Lng_LVCC" suffix from
74+
the series name.
75+
"""
76+
return self.series_name.replace("_Lng_LVCC", "")
77+
78+
# Valid Pydantic decorator not supported by MyPy
79+
@computed_field # type: ignore
80+
@cached_property
81+
def is_atlas(self) -> bool:
82+
"""
83+
Incoming image sets with a width/height greater/equal to the pre-set threshold
84+
should qualify as an atlas.
85+
"""
86+
return (
87+
max(
88+
self.pixels_x * self.pixel_size,
89+
self.pixels_y * self.pixel_size,
90+
)
91+
>= processing_params.atlas_threshold
6392
)
64-
>= processing_params.atlas_threshold
65-
)
6693

6794

6895
COLOR_FLAGS_MURFEY = {
@@ -91,51 +118,71 @@ def _register_clem_imaging_site(
91118
result: CLEMPreprocessingResult,
92119
murfey_db: Session,
93120
):
121+
def _register(
122+
entry: MurfeyDB.ImagingSite,
123+
result: CLEMPreprocessingResult,
124+
):
125+
"""
126+
Helper function to update the ImagingSite column values with.
127+
"""
128+
129+
# Is this an atlas or grid square
130+
entry.data_type = "atlas" if result.is_atlas else "grid_square"
131+
# Register file paths
132+
output_file = list(result.output_files.values())[0]
133+
entry.image_path = str(output_file.parent / "*.tiff")
134+
# Shape and resolution information
135+
entry.image_pixels_x = result.pixels_x
136+
entry.image_pixels_y = result.pixels_y
137+
entry.image_pixel_size = result.pixel_size
138+
entry.units = result.units
139+
# Extent of imaged area in real space
140+
entry.x0 = result.extent[0]
141+
entry.x1 = result.extent[1]
142+
entry.y0 = result.extent[2]
143+
entry.y1 = result.extent[3]
144+
145+
# Iteratively add colour channel information
146+
entry.number_of_members = result.number_of_members
147+
for col_name, value in _get_color_flags(result.output_files.keys()).items():
148+
setattr(entry, col_name, value)
149+
entry.collection_mode = _determine_collection_mode(result.output_files.keys())
150+
151+
# Register thumbnail information if present
152+
if result.thumbnails and result.thumbnail_size:
153+
thumbnail = list(result.thumbnails.values())[0]
154+
entry.thumbnail_path = str(thumbnail.parent / "*.png")
155+
156+
thumbnail_height, thumbnail_width = result.thumbnail_size
157+
scaling_factor = min(
158+
thumbnail_height / result.pixels_y, thumbnail_width / result.pixels_x
159+
)
160+
entry.thumbnail_pixel_size = result.pixel_size / scaling_factor
161+
entry.thumbnail_pixels_x = int(round(result.pixels_x * scaling_factor)) or 1
162+
entry.thumbnail_pixels_y = int(round(result.pixels_y * scaling_factor)) or 1
163+
return entry
164+
165+
# Create a new entry if one doesn't already exist
94166
if not (
95167
clem_img_site := murfey_db.exec(
96168
select(MurfeyDB.ImagingSite)
97169
.where(MurfeyDB.ImagingSite.session_id == session_id)
98-
.where(MurfeyDB.ImagingSite.site_name == result.series_name)
170+
.where(MurfeyDB.ImagingSite.site_name == result.site_name)
99171
).one_or_none()
100172
):
101173
clem_img_site = MurfeyDB.ImagingSite(
102-
session_id=session_id, site_name=result.series_name
174+
session_id=session_id,
175+
site_name=result.site_name,
103176
)
177+
clem_img_site = _register(clem_img_site, result)
178+
179+
# Prepare to overwrite existing entry if current result is a denoised dataset
180+
if result.is_denoised:
181+
# Proceed with overwrite if current result is different from existing entry
182+
output_file = list(result.output_files.values())[0]
183+
if str(output_file.parent / "*.tiff") != clem_img_site.image_path:
184+
clem_img_site = _register(clem_img_site, result)
104185

105-
# Add metadata for this series
106-
output_file = list(result.output_files.values())[0]
107-
clem_img_site.image_path = str(output_file.parent / "*tiff")
108-
clem_img_site.data_type = "atlas" if _is_clem_atlas(result) else "grid_square"
109-
clem_img_site.number_of_members = result.number_of_members
110-
for col_name, value in _get_color_flags(result.output_files.keys()).items():
111-
setattr(clem_img_site, col_name, value)
112-
clem_img_site.collection_mode = _determine_collection_mode(
113-
result.output_files.keys()
114-
)
115-
clem_img_site.image_pixels_x = result.pixels_x
116-
clem_img_site.image_pixels_y = result.pixels_y
117-
clem_img_site.image_pixel_size = result.pixel_size
118-
clem_img_site.units = result.units
119-
clem_img_site.x0 = result.extent[0]
120-
clem_img_site.x1 = result.extent[1]
121-
clem_img_site.y0 = result.extent[2]
122-
clem_img_site.y1 = result.extent[3]
123-
# Register thumbnails if they are present
124-
if result.thumbnails and result.thumbnail_size:
125-
thumbnail = list(result.thumbnails.values())[0]
126-
clem_img_site.thumbnail_path = str(thumbnail.parent / "*.png")
127-
128-
thumbnail_height, thumbnail_width = result.thumbnail_size
129-
scaling_factor = min(
130-
thumbnail_height / result.pixels_y, thumbnail_width / result.pixels_x
131-
)
132-
clem_img_site.thumbnail_pixel_size = result.pixel_size / scaling_factor
133-
clem_img_site.thumbnail_pixels_x = (
134-
int(round(result.pixels_x * scaling_factor)) or 1
135-
)
136-
clem_img_site.thumbnail_pixels_y = (
137-
int(round(result.pixels_y * scaling_factor)) or 1
138-
)
139186
murfey_db.add(clem_img_site)
140187
murfey_db.commit()
141188
murfey_db.close()
@@ -183,12 +230,12 @@ def _register_dcg_and_atlas(
183230
visit_number = visit_name.split("-")[-1]
184231

185232
# Generate name/tag for data colleciton group based on series name
186-
dcg_name = result.series_name.split("--")[0]
187-
if result.series_name.split("--")[1].isdigit():
188-
dcg_name += f"--{result.series_name.split('--')[1]}"
233+
dcg_name = result.site_name.split("--")[0]
234+
if result.site_name.split("--")[1].isdigit():
235+
dcg_name += f"--{result.site_name.split('--')[1]}"
189236

190237
# Determine values for atlas
191-
if _is_clem_atlas(result):
238+
if result.is_atlas:
192239
output_file = list(result.output_files.values())[0]
193240
# Register the thumbnail entries if they are provided
194241
if result.thumbnails and result.thumbnail_size is not None:
@@ -227,7 +274,7 @@ def _register_dcg_and_atlas(
227274
dcg_entry = dcg_search[0]
228275
# Update atlas if registering atlas dataset
229276
# and data collection group already exists
230-
if _is_clem_atlas(result):
277+
if result.is_atlas:
231278
atlas_message = {
232279
"session_id": session_id,
233280
"dcgid": dcg_entry.id,
@@ -287,11 +334,11 @@ def _register_dcg_and_atlas(
287334
clem_img_site := murfey_db.exec(
288335
select(MurfeyDB.ImagingSite)
289336
.where(MurfeyDB.ImagingSite.session_id == session_id)
290-
.where(MurfeyDB.ImagingSite.site_name == result.series_name)
337+
.where(MurfeyDB.ImagingSite.site_name == result.site_name)
291338
).one_or_none()
292339
):
293340
clem_img_site = MurfeyDB.ImagingSite(
294-
session_id=session_id, site_name=result.series_name
341+
session_id=session_id, site_name=result.site_name
295342
)
296343

297344
clem_img_site.dcg_id = dcg_entry.id
@@ -311,9 +358,9 @@ def _register_grid_square(
311358
logger.error("Unable to find transport manager")
312359
return
313360
# Load all entries for the current data collection group
314-
dcg_name = result.series_name.split("--")[0]
315-
if result.series_name.split("--")[1].isdigit():
316-
dcg_name += f"--{result.series_name.split('--')[1]}"
361+
dcg_name = result.site_name.split("--")[0]
362+
if result.site_name.split("--")[1].isdigit():
363+
dcg_name += f"--{result.site_name.split('--')[1]}"
317364

318365
# Check if an atlas has been registered
319366
if not (

tests/workflows/clem/test_register_preprocessing_results.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def generate_preprocessing_messages(
4747
# Construct all the datasets to be tested
4848
datasets: list[tuple[Path, bool, bool, tuple[int, int], float, list[float]]] = [
4949
(
50-
grid_dir / "Overview_1" / "Image_1",
50+
grid_dir / "Overview 1" / "Image 1",
5151
False,
5252
True,
5353
(2400, 2400),
@@ -59,22 +59,38 @@ def generate_preprocessing_messages(
5959
datasets.extend(
6060
[
6161
(
62-
grid_dir / "TileScan_1" / f"Position_{n}",
62+
grid_dir / "TileScan 1" / f"Position {n + 1}",
6363
True,
6464
False,
6565
(2048, 2048),
6666
1.6e-7,
6767
[0.003, 0.00332768, 0.003, 0.00332768],
6868
)
69-
for n in range(5)
69+
for n in range(3)
70+
]
71+
)
72+
datasets.extend(
73+
[
74+
(
75+
grid_dir / "TileScan 1" / f"Position {n + 1}_Lng_LVCC",
76+
True,
77+
False,
78+
(2048, 2048),
79+
1.6e-7,
80+
[0.003, 0.00332768, 0.003, 0.00332768],
81+
)
82+
for n in range(3)
7083
]
7184
)
7285

7386
messages: list[dict[str, Any]] = []
74-
for dataset in datasets:
87+
for series_path, is_stack, is_montage, shape, pixel_size, extent in datasets:
7588
# Unpack items from list of dataset parameters
76-
series_path = dataset[0]
77-
series_name = str(series_path.relative_to(processed_dir)).replace("/", "--")
89+
series_name = (
90+
str(series_path.relative_to(processed_dir))
91+
.replace("/", "--")
92+
.replace(" ", "_")
93+
)
7894
metadata = series_path / "metadata" / f"{series_path.stem}.xml"
7995
metadata.parent.mkdir(parents=True, exist_ok=True)
8096
metadata.touch(exist_ok=True)
@@ -89,11 +105,6 @@ def generate_preprocessing_messages(
89105
thumbnail.parent.mkdir(parents=True)
90106
thumbnail.touch(exist_ok=True)
91107
thumbnail_size = (512, 512)
92-
is_stack = dataset[1]
93-
is_montage = dataset[2]
94-
shape = dataset[3]
95-
pixel_size = dataset[4]
96-
extent = dataset[5]
97108

98109
message = {
99110
"session_id": session_id,
@@ -373,21 +384,23 @@ def test_run_with_db(
373384
else:
374385
assert mock_align_and_merge_call.call_count == len(preprocessing_messages) * 3
375386

376-
# Both databases should have entries for data collection group, and grid squares
377-
# ISPyB database should additionally have an atlas entry
387+
# Murfey's DataCollectionGroup should have an entry
378388
murfey_dcg_search = murfey_db_session.exec(
379389
sm_select(MurfeyDB.DataCollectionGroup).where(
380390
MurfeyDB.DataCollectionGroup.session_id == murfey_session.id
381391
)
382392
).all()
383393
assert len(murfey_dcg_search) == 1
394+
395+
# GridSquare entries should be half the initial number of entries due to overwrites
384396
murfey_gs_search = murfey_db_session.exec(
385397
sm_select(MurfeyDB.GridSquare).where(
386398
MurfeyDB.GridSquare.session_id == murfey_session.id
387399
)
388400
).all()
389-
assert len(murfey_gs_search) == len(preprocessing_messages) - 1
401+
assert len(murfey_gs_search) == (len(preprocessing_messages) - 1) // 2
390402

403+
# ISPyB's DataCollectionGroup should have an entry
391404
murfey_dcg = murfey_dcg_search[0]
392405
ispyb_dcg_search = (
393406
ispyb_db_session.execute(
@@ -400,6 +413,7 @@ def test_run_with_db(
400413
)
401414
assert len(ispyb_dcg_search) == 1
402415

416+
# Atlas should have an entry
403417
ispyb_dcg = ispyb_dcg_search[0]
404418
ispyb_atlas_search = (
405419
ispyb_db_session.execute(
@@ -419,12 +433,13 @@ def test_run_with_db(
419433
}
420434
collection_mode = _determine_collection_mode(colors)
421435

436+
# Atlas color flags and collection mode should be set correctly
422437
ispyb_atlas = ispyb_atlas_search[0]
423-
# Check that the Atlas color flags and collection mode are set correctly
424438
for flag, value in color_flags.items():
425439
assert getattr(ispyb_atlas, flag) == value
426440
assert ispyb_atlas.mode == collection_mode
427441

442+
# ISPyB's GrridSquare should have half the number of initial entries
428443
ispyb_gs_search = (
429444
ispyb_db_session.execute(
430445
sa_select(ISPyBDB.GridSquare).where(
@@ -434,9 +449,12 @@ def test_run_with_db(
434449
.scalars()
435450
.all()
436451
)
437-
assert len(ispyb_gs_search) == len(preprocessing_messages) - 1
452+
assert len(ispyb_gs_search) == (len(preprocessing_messages) - 1) // 2
438453
for gs in ispyb_gs_search:
439-
# Check that the Atlas color flags and collection mode are set correctly
454+
# Check that all entries point to the denoised images ("_Lng_LVCC")
455+
assert gs.gridSquareImage is not None and "_Lng_LVCC" in gs.gridSquareImage
456+
457+
# Check that the GridSquare color flags and collection mode are set correctly
440458
for flag, value in color_flags.items():
441459
assert getattr(gs, flag) == value
442460
assert gs.mode == collection_mode

0 commit comments

Comments
 (0)