From b6c84c4094a7a2742e908ad13b7c9fa401ea6d9a Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:17:55 +0200 Subject: [PATCH 01/30] ngmix: restore original-PSF fit; split the two PSF families by name (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metacalibration has two PSFs and ngmix was exporting only one of them under both column families, mislabeled. Fix the substance and the names together. Substance — the original image PSF is fit again. average_original_psf fits each epoch's pristine gal_obs.psf (the psfex/mccd model stamp) with the existing psf_runner, BEFORE boot.go reconvolves it, weight-averaged by the galaxy obs.weight.sum() — the same scheme average_multiepoch_psf uses for the reconvolution kernel. A shared _average_psf_fits core backs both averagers; do_ngmix_metacal now returns (resdict, psf_res, psfo_res), with psfo_res computed before boot.go so it sees the un-reconvolved PSF. Names — the two families are now self-naming and never alias: *_psf_orig original image PSF (average_original_psf) *_psf_reconv reconvolution kernel (average_multiepoch_psf) Each carries g1/g2 (+_err), T (+_err) and r50 (+_err). The back-fill and compile_results emit loop copy these keys straight through, so the column name is the value's provenance. Galaxy ellipticity is the scalar g1/g2 pair. The previous code emitted one reconvolution-kernel result under both g*_psfo_ngmix and g1_psf/g2_psf; that double-emit is gone. Docstrings (compile_results, the two averagers, do_ngmix_metacal) now name both PSFs explicitly; the old "psfo is the original image psf" note — false since the original fit was dropped — is replaced by an accurate one. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 278 +++++++++++++------ 1 file changed, 199 insertions(+), 79 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 386c25ef..ced2d392 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -355,8 +355,19 @@ def compile_results(self, results): Returns ------- dict - Compiled results ready to be written to a file - note: psfo is the original image psf from psfex or mccd + Compiled results ready to be written to a file. + + Two PSF column families — each carrying ellipticity *and* size, + for *different* PSFs (shapepipe#749): + + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``, ``r50``) — the + ORIGINAL image PSF (the psfex/mccd model stamp), fit before + metacal reconvolution by :func:`average_original_psf`. This is + the PSF whose true ellipticity and size enter object-wise + PSF-leakage diagnostics. + * ``*_psf_reconv`` — the metacal RECONVOLUTION kernel (round and + enlarged by construction, used for the Tgal/Tpsf size cut and a + g~0 sanity check), fit by :func:`average_multiepoch_psf`. Raises ------ @@ -378,31 +389,40 @@ def compile_results(self, results): 'n_epoch_model', 'mcal_types_fail', 'nfev_fit', - 'g1_psfo_ngmix', - 'g2_psfo_ngmix', - 'g1_err_psfo_ngmix', - 'g2_err_psfo_ngmix', + # galaxy 'g1', 'g1_err', 'g2', 'g2_err', 'T', 'T_err', - 'Tpsf', - 'Tpsf_err', 'r50', 'r50_err', - 'r50psf', - 'r50psf_err', - 'g1_psf', - 'g2_psf', 'flux', 'flux_err', 's2n', 'mag', 'mag_err', 'flags', - 'mcal_flags' + 'mcal_flags', + # original image PSF (psfex/mccd), fit by average_original_psf + 'g1_psf_orig', + 'g2_psf_orig', + 'g1_err_psf_orig', + 'g2_err_psf_orig', + 'T_psf_orig', + 'T_err_psf_orig', + 'r50_psf_orig', + 'r50_err_psf_orig', + # metacal reconvolution kernel, fit by average_multiepoch_psf + 'g1_psf_reconv', + 'g2_psf_reconv', + 'g1_err_psf_reconv', + 'g2_err_psf_reconv', + 'T_psf_reconv', + 'T_err_psf_reconv', + 'r50_psf_reconv', + 'r50_err_psf_reconv', ] output_dict = {k: {kk: [] for kk in names2} for k in names} for idx in range(len(results)): @@ -439,24 +459,23 @@ def compile_results(self, results): output_dict[name]["nfev_fit"].append( fit.get("nfev", np.nan) ) - output_dict[name]["g1_psfo_ngmix"].append( - results[idx]["g_PSFo"][0] - ) - output_dict[name]["g2_psfo_ngmix"].append( - results[idx]["g_PSFo"][1] - ) - output_dict[name]["g1_err_psfo_ngmix"].append( - results[idx]["g_err_PSFo"][0] - ) - output_dict[name]["g2_err_psfo_ngmix"].append( - results[idx]["g_err_PSFo"][1] - ) - output_dict[name]["T"].append(T_gal) - output_dict[name]["T_err"].append(T_gal_err) - output_dict[name]["Tpsf"].append(results[idx]["T_PSFo"]) - output_dict[name]["Tpsf_err"].append(results[idx]["T_err_PSFo"]) - output_dict[name]["g1_psf"].append(results[idx]["g_PSFo"][0]) - output_dict[name]["g2_psf"].append(results[idx]["g_PSFo"][1]) + # The two PSF families are object-level (one value per + # object, not per shear type) and self-named: every key + # below is copied straight through from compile-loop input to + # output, so the column name *is* the value's provenance. + # *_psf_orig = original image PSF (average_original_psf) + # *_psf_reconv = reconvolution kernel (average_multiepoch_psf) + for psf_key in ( + 'g1_psf_orig', 'g2_psf_orig', + 'g1_err_psf_orig', 'g2_err_psf_orig', + 'T_psf_orig', 'T_err_psf_orig', + 'r50_psf_orig', 'r50_err_psf_orig', + 'g1_psf_reconv', 'g2_psf_reconv', + 'g1_err_psf_reconv', 'g2_err_psf_reconv', + 'T_psf_reconv', 'T_err_psf_reconv', + 'r50_psf_reconv', 'r50_err_psf_reconv', + ): + output_dict[name][psf_key].append(results[idx][psf_key]) # Galaxy half-light radius from the fitted area T = 2 sigma^2: # r50 = sqrt(ln 2 * T), with d r50 / d T = r50 / (2 T) @@ -465,16 +484,14 @@ def compile_results(self, results): r50_gal_err = r50_gal * T_gal_err / (2 * T_gal) else: r50_gal = r50_gal_err = np.nan - output_dict[name]['r50'].append(r50_gal) - output_dict[name]['r50_err'].append(r50_gal_err) - output_dict[name]['r50psf'].append(results[idx]["r50_PSFo"]) - output_dict[name]['r50psf_err'].append( - results[idx]["r50_err_PSFo"] - ) output_dict[name]["g1"].append(g[0]) output_dict[name]["g2"].append(g[1]) output_dict[name]["g1_err"].append(np.sqrt(g_cov[0, 0])) output_dict[name]["g2_err"].append(np.sqrt(g_cov[1, 1])) + output_dict[name]["T"].append(T_gal) + output_dict[name]["T_err"].append(T_gal_err) + output_dict[name]['r50'].append(r50_gal) + output_dict[name]['r50_err'].append(r50_gal_err) output_dict[name]["flux"].append(flux) output_dict[name]["flux_err"].append(flux_err) output_dict[name]["mag"].append(mag) @@ -662,7 +679,7 @@ def process(self): if tile_cat.flux is not None else 1.0 ) - res, psf_res = do_ngmix_metacal( + res, psf_res, psfo_res = do_ngmix_metacal( stamp, prior, flux_guess, @@ -689,17 +706,38 @@ def process(self): if res.get(k, {}).get('flags', 0) != 0 ) res['mcal_flags'] = get_mcal_flags(res) + # Two distinct PSF families (shapepipe#749), each with its own + # ellipticity AND size, written under self-naming res-keys: + # reconvolution kernel (psf_res) -> *_psf_reconv + # original image PSF (psfo_res) -> *_psf_orig # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with - # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma) - sigma_psfo = np.sqrt(max(psf_res['T_psf'], 0) / 2) - res['g_PSFo'] = psf_res['g_psf'] - res['g_err_PSFo'] = psf_res['g_psf_err'] - res['T_PSFo'] = psf_res['T_psf'] - res['T_err_PSFo'] = psf_res['T_psf_err'] - res['r50_PSFo'] = SIGMA_TO_R50 * sigma_psfo - res['r50_err_PSFo'] = ( - SIGMA_TO_R50 * psf_res['T_psf_err'] / (4 * sigma_psfo) - if sigma_psfo > 0 else np.nan + # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma). + def _psf_r50(T_psf, T_psf_err): + sigma = np.sqrt(max(T_psf, 0) / 2) + r50 = SIGMA_TO_R50 * sigma + r50_err = ( + SIGMA_TO_R50 * T_psf_err / (4 * sigma) + if sigma > 0 else np.nan + ) + return r50, r50_err + + res['g1_psf_reconv'] = psf_res['g_psf'][0] + res['g2_psf_reconv'] = psf_res['g_psf'][1] + res['g1_err_psf_reconv'] = psf_res['g_psf_err'][0] + res['g2_err_psf_reconv'] = psf_res['g_psf_err'][1] + res['T_psf_reconv'] = psf_res['T_psf'] + res['T_err_psf_reconv'] = psf_res['T_psf_err'] + res['r50_psf_reconv'], res['r50_err_psf_reconv'] = _psf_r50( + psf_res['T_psf'], psf_res['T_psf_err'] + ) + res['g1_psf_orig'] = psfo_res['g_psf'][0] + res['g2_psf_orig'] = psfo_res['g_psf'][1] + res['g1_err_psf_orig'] = psfo_res['g_psf_err'][0] + res['g2_err_psf_orig'] = psfo_res['g_psf_err'][1] + res['T_psf_orig'] = psfo_res['T_psf'] + res['T_err_psf_orig'] = psfo_res['T_psf_err'] + res['r50_psf_orig'], res['r50_err_psf_orig'] = _psf_r50( + psfo_res['T_psf'], psfo_res['T_psf_err'] ) final_res.append(res) n_fitted += 1 @@ -1122,55 +1160,126 @@ def make_ngmix_observation( noise=noise_img, ) -def average_multiepoch_psf(obsdict): - """ averages psf information over multiple epochs - we may need to do this for original psf as well +def _average_psf_fits(results_and_weights): + """Weight-average a set of per-epoch ngmix PSF-fit results. + + Shared core for both PSF families this module exports: the metacal + reconvolution kernel (:func:`average_multiepoch_psf`) and the original + image PSF (:func:`average_original_psf`). Epochs whose PSF fit failed + (``flags != 0``, carrying only flags/pars and no T/g) are dropped. + Parameters ---------- - obsdict : dict - Observation dict returned by MetacalBootstrapper.go(). + results_and_weights : iterable of (dict, float) + Per-epoch ``(result, weight)`` pairs, where ``result`` is an ngmix + Fitter result with keys ``flags``, ``g``, ``g_err``, ``T``, + ``T_err`` and ``weight`` is the epoch's averaging weight. Returns ------- dict - Keys: 'g_psf', 'g_psf_err', 'T_psf', 'T_psf_err' (weighted - averages over the epochs whose PSF fit succeeded) and 'n_epoch' - (the number of those surviving epochs). + Keys ``g_psf``, ``g_psf_err``, ``T_psf``, ``T_psf_err`` (weighted + averages over the surviving epochs) and ``n_epoch`` (their count). """ - psf_dict = {} - nepoch = len(obsdict['noshear']) n_epoch_used = 0 wsum = 0 g_psf_sum = np.array([0., 0.]) g_psf_err_sum = np.array([0., 0.]) T_psf_sum = 0 T_psf_err_sum = 0 - for n_e in np.arange(nepoch): - result = obsdict['noshear'][n_e].psf.meta['result'] - # ignore_failed_psf=True drops failed-PSF epochs from the galaxy - # fit but keeps them in obsdict; their result carries only - # flags/pars (no T/g), so skip them here too. + for result, weight in results_and_weights: if result['flags'] != 0: continue - ne_wsum = obsdict['noshear'][n_e].weight.sum() - n_epoch_used += 1 - wsum += ne_wsum - g_psf_sum += result['g'] * ne_wsum - g_psf_err_sum += result['g_err'] * ne_wsum - T_psf_sum += result['T'] * ne_wsum - T_psf_err_sum += result['T_err'] * ne_wsum + wsum += weight + g_psf_sum += result['g'] * weight + g_psf_err_sum += result['g_err'] * weight + T_psf_sum += result['T'] * weight + T_psf_err_sum += result['T_err'] * weight if wsum == 0: raise ZeroDivisionError('Sum of weights = 0, division by zero') - psf_dict['g_psf'] = g_psf_sum / wsum - psf_dict['g_psf_err'] = g_psf_err_sum / wsum - psf_dict['T_psf'] = T_psf_sum / wsum - psf_dict['T_psf_err'] = T_psf_err_sum / wsum - psf_dict['n_epoch'] = n_epoch_used + return { + 'g_psf': g_psf_sum / wsum, + 'g_psf_err': g_psf_err_sum / wsum, + 'T_psf': T_psf_sum / wsum, + 'T_psf_err': T_psf_err_sum / wsum, + 'n_epoch': n_epoch_used, + } + + +def average_multiepoch_psf(obsdict): + """Average the metacal *reconvolution* PSF over epochs. + + The PSF carried by each metacal observation (``obs.psf``) is the + Gaussian reconvolution kernel that metacal fit and convolved back in — + round by construction and slightly enlarged relative to the original + PSF. This is the kernel defining the sheared galaxy images, exported to + the reconvolution-kernel columns + (``NGMIX_G1/G2_PSF_RECONV``, ``NGMIX_T_PSF_RECONV``). The independent fit + of the *original* image PSF is :func:`average_original_psf`. + + Parameters + ---------- + obsdict : dict + Observation dict returned by MetacalBootstrapper.go(). - return psf_dict + Returns + ------- + dict + Keys: 'g_psf', 'g_psf_err', 'T_psf', 'T_psf_err' (weighted + averages over the epochs whose PSF fit succeeded) and 'n_epoch' + (the number of those surviving epochs). + """ + # ignore_failed_psf=True drops failed-PSF epochs from the galaxy fit but + # keeps them in obsdict; _average_psf_fits skips them on flags != 0. + return _average_psf_fits( + (obs.psf.meta['result'], obs.weight.sum()) + for obs in obsdict['noshear'] + ) + + +def average_original_psf(gal_obs_list, psf_runner): + """Fit and average the *original* image PSF over epochs. + + The original PSF is the psfex/mccd model stamp handed to ngmix + (``gal_obs.psf``), fit here with the same ``psf_runner`` machinery — and + so the same ``psf_fit_prior`` — used inside metacal, but on the PSF + *before* metacal's reconvolution. Exported to the original-PSF columns + (``NGMIX_G1/G2_PSF_ORIG``, ``NGMIX_T_PSF_ORIG``). Distinct from the + reconvolution-kernel fit (:func:`average_multiepoch_psf`): the original + PSF retains its true ellipticity and size, whereas the reconvolution + kernel is round and enlarged by construction. This is the PSF whose true + shape and size enter object-wise PSF-leakage diagnostics. + + Epochs are weighted by the *galaxy* inverse-variance weight + (``gal_obs.weight.sum()``), matching :func:`average_multiepoch_psf`, so + the two PSF families share an averaging scheme and differ only in which + PSF is fit. + + Parameters + ---------- + gal_obs_list : ngmix.observation.ObsList + Per-epoch galaxy observations; each ``gal_obs.psf`` is the original + (pre-metacal) PSF observation to fit, with no further ``.psf`` of + its own so the runner fits the stamp itself. + psf_runner : ngmix.runners.PSFRunner + The module's PSF runner, carrying the resolved ``psf_fit_prior``. + + Returns + ------- + dict + Same keys as :func:`average_multiepoch_psf`. + """ + def fit(gal_obs): + # PSFRunner.go fits gal_obs.psf (the original PSF stamp) and sets + # gal_obs.psf.meta['result']; failed fits keep flags != 0 and are + # dropped by _average_psf_fits. + psf_runner.go(gal_obs) + return gal_obs.psf.meta['result'], gal_obs.weight.sum() + + return _average_psf_fits(fit(gal_obs) for gal_obs in gal_obs_list) def make_runners(prior, flux_guess, rng): @@ -1233,8 +1342,12 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): Returns ------- tuple - (resdict, psf_res) where resdict is the MetacalBootstrapper result - dict and psf_res is the averaged PSF dict from average_multiepoch_psf. + ``(resdict, psf_res, psfo_res)`` where ``resdict`` is the + MetacalBootstrapper result dict, ``psf_res`` is the averaged metacal + *reconvolution*-kernel PSF dict (:func:`average_multiepoch_psf`), and + ``psfo_res`` is the averaged *original* image-PSF dict + (:func:`average_original_psf`). The two PSF dicts share keys but + describe different PSFs. """ n_epoch = len(stamp.gals) if n_epoch == 0: @@ -1260,6 +1373,13 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): runner, psf_runner = make_runners(prior, flux_guess, rng) + # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it, using + # the same psf_runner (so the same psf_fit_prior and centroid). This is + # the PSF_ORIG family; metacal below produces the PSF_RECONV family. Run + # first so the original PSF is fit on its own stamp, distinct from the + # round, enlarged kernel metacal convolves back in. + psfo_res = average_original_psf(gal_obs_list, psf_runner) + metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], 'step': 0.01, @@ -1277,4 +1397,4 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): ) resdict, obsdict = boot.go(gal_obs_list) psf_res = average_multiepoch_psf(obsdict) - return resdict, psf_res + return resdict, psf_res, psfo_res From 7e7887b22c93f1a2e1cf499a51159c19f287786a Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:18:08 +0200 Subject: [PATCH 02/30] make_cat: rename ngmix columns to the ESTIMATOR_COMPONENT_OBJECT grammar (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adopt NGMIX[m]_[_ERR]__, with OBJECT one of GAL, PSF_ORIG (original psfex/mccd image PSF) or PSF_RECONV (metacal reconvolution kernel), reading the self-named keys ngmix now writes: galaxy ELL (2-vec) -> scalar NGMIX_G1/G2_GAL, NGMIX_G1/G2_ERR_GAL galaxy NGMIX_T_ -> NGMIX_T_GAL (+ _ERR), SNR/FLUX/MAG/FLAGS_GAL NGMIX_ELL_PSFo (mis- -> genuine NGMIX_G1/G2_PSF_ORIG (from the restored labeled reconv) original fit) + NGMIX_G1/G2_PSF_RECONV NGMIX_T_PSFo, NGMIX_ -> NGMIX_T_PSF_ORIG and NGMIX_T_PSF_RECONV, each its Tpsf own family's size (+ _ERR on both) The reconvolution-kernel ellipticity gains its _ERR columns too, for symmetry with PSF_ORIG. The d6531b1b dedup (NGMIX_T_PSFo <- "Tpsf") is reverted: PSF_ORIG size now comes from the original fit ("T_psf_orig"), not the reconvolution-kernel "Tpsf" — they are different PSFs again. N_EPOCH / MCAL_FLAGS / MCAL_TYPES_FAIL keep their names (no PSF meaning). Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 196 +++++++++++++----- 1 file changed, 142 insertions(+), 54 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 79b0571a..235fd14f 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -329,10 +329,22 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. + Column grammar: ``NGMIX[m]_[_ERR]__``, with + ``OBJECT`` one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original + psfex/mccd image PSF, fit by ``ngmix.average_original_psf`` — its true + ellipticity and size, feeding object-wise PSF leakage), or + ``PSF_RECONV`` (the metacal reconvolution kernel, round and enlarged + by construction, fit by ``ngmix.average_multiepoch_psf`` — the Tgal / + Tpsf size cut and a g~0 sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` + are independent fits of different PSFs, no longer the single aliased + value of the pre-fix code (shapepipe#749). + Parameters ---------- ngmix_cat_path : str Path to NGMIX catalogue + moments : bool, optional + If True, write the parallel ``NGMIXm_*`` (moments-branch) columns. """ self._key_ends = ["1M", "1P", "2M", "2P", "NOSHEAR"] @@ -374,29 +386,47 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" + # Galaxy (GAL), original image PSF (PSF_ORIG) and metacal + # reconvolution kernel (PSF_RECONV). G1/G2 are scalar reduced-shear + # components, not a 2-vector. Sentinels: sizes/fluxes/mags/flags 0, + # *_ERR fluxes/mags -1, ellipticities -10, *_ERR sizes 1e30. for key_str in ( - f"{prefix}_T_", - f"{prefix}_Tpsf_", - f"{prefix}_SNR_", - f"{prefix}_FLUX_", - f"{prefix}_MAG_", - f"{prefix}_FLAGS_", - f"{prefix}_T_PSFo_", + f"{prefix}_T_GAL_", + f"{prefix}_SNR_GAL_", + f"{prefix}_FLUX_GAL_", + f"{prefix}_MAG_GAL_", + f"{prefix}_FLAGS_GAL_", + f"{prefix}_T_PSF_ORIG_", + f"{prefix}_T_PSF_RECONV_", ): self._update_dict(key_str, np.zeros(n_obj)) - for key_str in (f"NGMIX{m}_FLUX_ERR_", f"NGMIX{m}_MAG_ERR_"): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) for key_str in ( - f"NGMIX{m}_ELL_", - f"NGMIX{m}_ELL_ERR_", - f"NGMIX{m}_ELL_PSFo_", + f"{prefix}_FLUX_ERR_GAL_", + f"{prefix}_MAG_ERR_GAL_", ): - self._update_dict(key_str, np.ones((len(self._obj_id), 2)) * -10.0) - self._update_dict( - f"NGMIX{m}_T_ERR_", - np.ones(len(self._obj_id)) * 1e30, - ) - self._add2dict(f"NGMIX{m}_MCAL_FLAGS", np.zeros(len(self._obj_id))) + self._update_dict(key_str, np.ones(n_obj) * -1) + for key_str in ( + f"{prefix}_G1_GAL_", + f"{prefix}_G2_GAL_", + f"{prefix}_G1_ERR_GAL_", + f"{prefix}_G2_ERR_GAL_", + f"{prefix}_G1_PSF_ORIG_", + f"{prefix}_G2_PSF_ORIG_", + f"{prefix}_G1_ERR_PSF_ORIG_", + f"{prefix}_G2_ERR_PSF_ORIG_", + f"{prefix}_G1_PSF_RECONV_", + f"{prefix}_G2_PSF_RECONV_", + f"{prefix}_G1_ERR_PSF_RECONV_", + f"{prefix}_G2_ERR_PSF_RECONV_", + ): + self._update_dict(key_str, np.ones(n_obj) * -10.0) + for key_str in ( + f"{prefix}_T_ERR_GAL_", + f"{prefix}_T_ERR_PSF_ORIG_", + f"{prefix}_T_ERR_PSF_RECONV_", + ): + self._update_dict(key_str, np.ones(n_obj) * 1e30) + self._add2dict(f"{prefix}_MCAL_FLAGS", np.zeros(n_obj)) for idx, id_tmp in enumerate(self._obj_id): ind = np.where(id_tmp == ngmix_id)[0] @@ -406,46 +436,104 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data = ngmix_cat_file.get_data(key) - g = (ncf_data["g1"][ind[0]], ncf_data["g2"][ind[0]]) - g_err = ( - ncf_data["g1_err"][ind[0]], - ncf_data["g2_err"][ind[0]], + # Galaxy shape, size and photometry. + self._add2dict( + f"{prefix}_G1_GAL_{key}", ncf_data["g1"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_GAL_{key}", ncf_data["g2"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_GAL_{key}", + ncf_data["g1_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_GAL_{key}", + ncf_data["g2_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_GAL_{key}", ncf_data["T"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_GAL_{key}", + ncf_data["T_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_SNR_GAL_{key}", ncf_data["s2n"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLUX_GAL_{key}", + ncf_data["flux"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLUX_ERR_GAL_{key}", + ncf_data["flux_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_MAG_GAL_{key}", ncf_data["mag"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_MAG_ERR_GAL_{key}", + ncf_data["mag_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLAGS_GAL_{key}", + ncf_data["flags"][ind[0]], idx ) - self._add2dict(f"NGMIX{m}_ELL_{key}", g, idx) - self._add2dict(f"NGMIX{m}_ELL_ERR_{key}", g_err, idx) - - t = ncf_data["T"][ind[0]] - t_err = ncf_data["T_err"][ind[0]] - tpsf = ncf_data["Tpsf"][ind[0]] - self._add2dict(f"NGMIX{m}_T_{key}", t, idx) - self._add2dict(f"NGMIX{m}_T_ERR_{key}", t_err, idx) - self._add2dict(f"NGMIX{m}_Tpsf_{key}", tpsf, idx) - - s2n = ncf_data["s2n"][ind[0]] - self._add2dict(f"NGMIX{m}_SNR_{key}", s2n, idx) - - flux = ncf_data["flux"][ind[0]] - flux_err = ncf_data["flux_err"][ind[0]] - self._add2dict(f"NGMIX{m}_FLUX_{key}", flux, idx) - self._add2dict(f"NGMIX{m}_FLUX_ERR_{key}", flux_err, idx) - - mag = ncf_data["mag"][ind[0]] - mag_err = ncf_data["mag_err"][ind[0]] - self._add2dict(f"NGMIX{m}_MAG_{key}", mag, idx) - self._add2dict(f"NGMIX{m}_MAG_ERR_{key}", mag_err, idx) - - flags = ncf_data["flags"][ind[0]] - self._add2dict(f"NGMIX{m}_FLAGS_{key}", flags, idx) - - g_psf = ( - ncf_data["g1_psfo_ngmix"][ind[0]], - ncf_data["g2_psfo_ngmix"][ind[0]], + # Original image PSF (psfex/mccd) — its genuine, + # un-reconvolved ellipticity and size. + self._add2dict( + f"{prefix}_G1_PSF_ORIG_{key}", + ncf_data["g1_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_PSF_ORIG_{key}", + ncf_data["g2_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_PSF_ORIG_{key}", + ncf_data["g1_err_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_PSF_ORIG_{key}", + ncf_data["g2_err_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_PSF_ORIG_{key}", + ncf_data["T_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_PSF_ORIG_{key}", + ncf_data["T_err_psf_orig"][ind[0]], idx ) - self._add2dict(f"NGMIX{m}_ELL_PSFo_{key}", g_psf, idx) - t_psfo = ncf_data["Tpsf"][ind[0]] - self._add2dict(f"NGMIX{m}_T_PSFo_{key}", t_psfo, idx) + # Metacal reconvolution kernel — round, enlarged by + # construction; the Tgal/Tpsf cut and g~0 sanity check. + self._add2dict( + f"{prefix}_G1_PSF_RECONV_{key}", + ncf_data["g1_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_PSF_RECONV_{key}", + ncf_data["g2_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_PSF_RECONV_{key}", + ncf_data["g1_err_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_PSF_RECONV_{key}", + ncf_data["g2_err_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_PSF_RECONV_{key}", + ncf_data["T_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_PSF_RECONV_{key}", + ncf_data["T_err_psf_reconv"][ind[0]], idx + ) self._add2dict( f"NGMIX{m}_MCAL_FLAGS", From c3fbebebd648d350916d840b7e5518eda598913b Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:18:20 +0200 Subject: [PATCH 03/30] tests: cover both PSF families and the 3-tuple do_ngmix_metacal (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _fake_metacal_result now emits both self-named PSF key families with distinct values (original PSF elliptical and smaller; reconvolution kernel rounder and larger), so any regression to the old single-source aliasing fails loudly. New tests: test_compile_results_psf_families_are_unaliased — g1/g2_psf_orig differ from g1/g2_psf_reconv, each tracing its own source test_average_original_psf_fits_each_gal_psf_with_galaxy_weight — the runner fits every epoch's gal_obs.psf, weight-averaged by the galaxy inverse-variance weight, failed-PSF epochs dropped The size-columns test now checks both families' r50/T (orig != reconv). Every do_ngmix_metacal call site unpacks the (res, psf_res, psfo_res) 3-tuple: test_ngmix, test_ngmix_weight_validation, test_mbias, test_star_response. Co-Authored-By: Claude Opus 4.8 --- tests/module/test_ngmix.py | 206 +++++++++++++++---- tests/module/test_ngmix_weight_validation.py | 4 +- tests/science/test_mbias.py | 2 +- 3 files changed, 175 insertions(+), 37 deletions(-) diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index dcdcb2d2..e035a0e8 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -119,7 +119,7 @@ def _metacal_noshear_g(seed): flags, jacobs, ) - res, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) + res, _, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) return np.asarray(res["noshear"]["g"]) @@ -135,15 +135,46 @@ def test_metacal_is_reproducible_with_fixed_seed(): npt.assert_array_equal(_metacal_noshear_g(42), _metacal_noshear_g(42)) +# The two PSF families carry distinct ellipticity AND size (shapepipe#749): +# the original image PSF (-> *_psf_orig) and the metacal reconvolution kernel +# (-> *_psf_reconv) are independent fits, so a regression to the old aliasing +# (both from one source) would surface here. The original PSF is elliptical +# and smaller than the round, enlarged reconvolution kernel. +RECONV_PSF_G = [0.001, 0.002] +RECONV_PSF_G_ERR = [1e-5, 1e-5] +ORIG_PSF_G = [0.004, -0.003] +ORIG_PSF_G_ERR = [2e-5, 3e-5] +# original-PSF size, deliberately != the reconvolution T_psf the builder is +# handed, so the size un-aliasing is exercised end to end. +ORIG_PSF_T = 0.07 +ORIG_PSF_T_ERR = 0.0008 + + +def _psf_r50_pair(T_psf, T_psf_err, sigma_to_r50): + """``(r50, r50_err)`` for a PSF size, mirroring the source conversion.""" + sigma = np.sqrt(max(T_psf, 0) / 2) + return ( + sigma_to_r50 * sigma, + sigma_to_r50 * T_psf_err / (4 * sigma) if sigma > 0 else np.nan, + ) + + def _fake_metacal_result(T, T_err, T_psf, T_psf_err): """Build one minimal metacal result as produced by the process loop. - The PSF size entries mirror the conversion done at the source: - ``r50_PSFo = sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T_psf / 2)``. + Both PSF families are filled with self-named scalar keys, as the process + loop back-fills them: ``*_psf_orig`` (original image PSF) and + ``*_psf_reconv`` (metacal reconvolution kernel). The reconvolution-kernel + size is the builder's ``T_psf`` argument; the original-PSF size is the + fixed ``ORIG_PSF_T``, deliberately different so the un-aliasing is tested. + PSF r50 = ``sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T / 2)``. """ from shapepipe.modules.ngmix_package.ngmix import SIGMA_TO_R50 - sigma_psf = np.sqrt(max(T_psf, 0) / 2) + r50_reconv, r50_err_reconv = _psf_r50_pair(T_psf, T_psf_err, SIGMA_TO_R50) + r50_orig, r50_err_orig = _psf_r50_pair( + ORIG_PSF_T, ORIG_PSF_T_ERR, SIGMA_TO_R50 + ) per_type = { "nfev": 1, "g": [0.01, -0.02], @@ -159,16 +190,24 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "obj_id": 1, "n_epoch_model": 1, "mcal_types_fail": 0, - "g_PSFo": [0.001, 0.002], - "g_err_PSFo": [1e-5, 1e-5], - "T_PSFo": T_psf, - "T_err_PSFo": T_psf_err, - "r50_PSFo": SIGMA_TO_R50 * sigma_psf, - "r50_err_PSFo": ( - SIGMA_TO_R50 * T_psf_err / (4 * sigma_psf) - if sigma_psf > 0 - else np.nan - ), + # original image PSF (psfex/mccd) family + "g1_psf_orig": ORIG_PSF_G[0], + "g2_psf_orig": ORIG_PSF_G[1], + "g1_err_psf_orig": ORIG_PSF_G_ERR[0], + "g2_err_psf_orig": ORIG_PSF_G_ERR[1], + "T_psf_orig": ORIG_PSF_T, + "T_err_psf_orig": ORIG_PSF_T_ERR, + "r50_psf_orig": r50_orig, + "r50_err_psf_orig": r50_err_orig, + # metacal reconvolution-kernel family + "g1_psf_reconv": RECONV_PSF_G[0], + "g2_psf_reconv": RECONV_PSF_G[1], + "g1_err_psf_reconv": RECONV_PSF_G_ERR[0], + "g2_err_psf_reconv": RECONV_PSF_G_ERR[1], + "T_psf_reconv": T_psf, + "T_err_psf_reconv": T_psf_err, + "r50_psf_reconv": r50_reconv, + "r50_err_psf_reconv": r50_err_reconv, "mcal_flags": 0, } res.update( @@ -178,11 +217,13 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): def test_compile_results_size_columns_are_half_light_radii(): - """Every r50 column is a true half-light radius, on both sides. + """Every r50 column is a true half-light radius, on every side. - Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``), PSF - ``r50psf = sqrt(2 ln 2) * sigma_psf`` (not bare sigma), and the - redundant ``*_psfo_ngmix`` size duplicates are gone. + Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``) and both + PSF families ``r50 = sqrt(2 ln 2) * sigma`` (not bare sigma), in the same + convention so galaxy / PSF radii are commensurable. The reconvolution + kernel (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are + independent fits with their own, different sizes (shapepipe#749). """ from shapepipe.modules.ngmix_package.ngmix import Ngmix, SIGMA_TO_R50 @@ -200,31 +241,66 @@ def test_compile_results_size_columns_are_half_light_radii(): npt.assert_allclose(noshear["r50"], [r50_expected]) npt.assert_allclose(noshear["r50_err"], [r50_expected * T_err / (2 * T)]) - # PSF: r50psf = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) - sigma_psf = np.sqrt(T_psf / 2) - npt.assert_allclose(noshear["r50psf"], [SIGMA_TO_R50 * sigma_psf]) + # reconvolution kernel: r50 = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) + sigma_reconv = np.sqrt(T_psf / 2) + npt.assert_allclose(noshear["r50_psf_reconv"], [SIGMA_TO_R50 * sigma_reconv]) npt.assert_allclose( - noshear["r50psf_err"], [SIGMA_TO_R50 * T_psf_err / (4 * sigma_psf)] + noshear["r50_err_psf_reconv"], + [SIGMA_TO_R50 * T_psf_err / (4 * sigma_reconv)], ) + npt.assert_allclose(noshear["T_psf_reconv"], [T_psf]) + npt.assert_allclose(noshear["T_err_psf_reconv"], [T_psf_err]) - # galaxy/PSF r50 are now commensurable: same convention on both sides + # galaxy/reconv-PSF r50 are commensurable: same convention on both sides npt.assert_allclose( - np.array(noshear["r50"]) / np.array(noshear["r50psf"]), + np.array(noshear["r50"]) / np.array(noshear["r50_psf_reconv"]), [np.sqrt(T / T_psf)], ) - # areas pass through untouched, with the err asymmetry fixed - npt.assert_allclose(noshear["Tpsf"], [T_psf]) - npt.assert_allclose(noshear["Tpsf_err"], [T_psf_err]) + # original image PSF carries its OWN size, un-aliased from the kernel + sigma_orig = np.sqrt(ORIG_PSF_T / 2) + npt.assert_allclose(noshear["T_psf_orig"], [ORIG_PSF_T]) + npt.assert_allclose(noshear["T_err_psf_orig"], [ORIG_PSF_T_ERR]) + npt.assert_allclose(noshear["r50_psf_orig"], [SIGMA_TO_R50 * sigma_orig]) + npt.assert_allclose( + noshear["r50_err_psf_orig"], + [SIGMA_TO_R50 * ORIG_PSF_T_ERR / (4 * sigma_orig)], + ) + assert noshear["T_psf_orig"] != noshear["T_psf_reconv"] + assert noshear["r50_psf_orig"] != noshear["r50_psf_reconv"] - # the *_psfo_ngmix size duplicates are retired - for retired in ( - "T_psfo_ngmix", - "T_err_psfo_ngmix", - "r50_psfo_ngmix", - "r50_err_psfo_ngmix", - ): - assert retired not in noshear + +def test_compile_results_psf_families_are_unaliased(): + """The two PSF-ellipticity families document *different* PSFs (#749). + + ``*_psf_reconv`` carries the metacal RECONVOLUTION kernel; ``*_psf_orig`` + carries the ORIGINAL image PSF, fit independently. Before the fix both + came from one ``average_multiepoch_psf`` result and so were byte- + identical; here they must differ, each tracing its own source. The + companion size un-aliasing is checked in + ``test_compile_results_size_columns_are_half_light_radii``. + """ + from shapepipe.modules.ngmix_package.ngmix import Ngmix + + inst = object.__new__(Ngmix) + inst._zero_point = 30.0 + noshear = inst.compile_results( + [_fake_metacal_result(0.18, 0.02, 0.09, 0.001)] + )["noshear"] + + # reconvolution kernel + npt.assert_allclose(noshear["g1_psf_reconv"], [RECONV_PSF_G[0]]) + npt.assert_allclose(noshear["g2_psf_reconv"], [RECONV_PSF_G[1]]) + npt.assert_allclose(noshear["g1_err_psf_reconv"], [RECONV_PSF_G_ERR[0]]) + npt.assert_allclose(noshear["g2_err_psf_reconv"], [RECONV_PSF_G_ERR[1]]) + # original image PSF + npt.assert_allclose(noshear["g1_psf_orig"], [ORIG_PSF_G[0]]) + npt.assert_allclose(noshear["g2_psf_orig"], [ORIG_PSF_G[1]]) + npt.assert_allclose(noshear["g1_err_psf_orig"], [ORIG_PSF_G_ERR[0]]) + npt.assert_allclose(noshear["g2_err_psf_orig"], [ORIG_PSF_G_ERR[1]]) + # the un-aliasing: the two families are no longer the same value + assert noshear["g1_psf_orig"] != noshear["g1_psf_reconv"] + assert noshear["g2_psf_orig"] != noshear["g2_psf_reconv"] def test_compile_results_nan_fills_failed_fit_types(): @@ -333,6 +409,66 @@ def epoch(result, weight_value): assert psf_res["n_epoch"] == 1 +def test_average_original_psf_fits_each_gal_psf_with_galaxy_weight(): + """The original-PSF average fits ``gal_obs.psf`` and weights by gal S/N. + + Restores the independent original-image-PSF fit (shapepipe#749): each + epoch's psfex/mccd PSF stamp (``gal_obs.psf``) is fit by the shared + ``psf_runner``, then weight-averaged by the *galaxy* inverse-variance + weight (matching :func:`average_multiepoch_psf`). A pre-set + ``psf.meta['result']`` per epoch stands in for the runner's fit so the + averaging math is checked in isolation. + """ + from types import SimpleNamespace + from shapepipe.modules.ngmix_package.ngmix import average_original_psf + + def gal_epoch(psf_result, weight_value): + # gal_obs carries a .psf whose meta['result'] is what the runner + # would set; .weight is the galaxy inverse-variance map. + return SimpleNamespace( + psf=SimpleNamespace(meta={"result": psf_result}), + weight=np.full((2, 2), weight_value), + ) + + # runner stub: a no-op .go, since meta['result'] is pre-populated. It + # must be *called* (the fit happens), so record that it was. + calls = [] + runner = SimpleNamespace(go=lambda obs: calls.append(obs)) + + good_a = { + "flags": 0, "T": 0.30, "T_err": 0.02, + "g": np.array([0.04, -0.03]), "g_err": np.array([1e-3, 1e-3]), + } + good_b = { + "flags": 0, "T": 0.20, "T_err": 0.01, + "g": np.array([0.06, -0.01]), "g_err": np.array([2e-3, 2e-3]), + } + failed = {"flags": 3, "nfev": 5, "pars": np.zeros(6)} # no T/g keys + + gal_obs_list = [ + gal_epoch(good_a, 1.0), # galaxy weight-sum = 4 + gal_epoch(good_b, 2.0), # galaxy weight-sum = 8 + gal_epoch(failed, 4.0), # dropped on flags != 0 + ] + psfo_res = average_original_psf(gal_obs_list, runner) + + # the runner fit every epoch's PSF + assert len(calls) == 3 + # weighted over the two surviving epochs (weights 4 and 8) + w = np.array([4.0, 8.0]) + npt.assert_allclose( + psfo_res["g_psf"], + (good_a["g"] * w[0] + good_b["g"] * w[1]) / w.sum(), + ) + npt.assert_allclose( + psfo_res["T_psf"], + (good_a["T"] * w[0] + good_b["T"] * w[1]) / w.sum(), + ) + assert psfo_res["n_epoch"] == 2 + # original PSF is elliptical — not the round reconvolution kernel + assert abs(psfo_res["g_psf"][0]) > 1e-3 + + def test_weight_map_recovers_injected_inverse_variance(): """A supplied inverse-variance map must not be renormalized twice.""" from shapepipe.modules.ngmix_package.ngmix import prepare_ngmix_weights diff --git a/tests/module/test_ngmix_weight_validation.py b/tests/module/test_ngmix_weight_validation.py index 243cbe83..2dad3a8c 100644 --- a/tests/module/test_ngmix_weight_validation.py +++ b/tests/module/test_ngmix_weight_validation.py @@ -145,7 +145,9 @@ def fit_metacal(gal, psf_im, bkg_rms, seed): stamp.jacobs = [WCS.jacobian()] if bkg_rms is not None: stamp.bkg_rms = [bkg_rms] - res, _ = do_ngmix_metacal(stamp, get_prior(PIX_SCALE, rng), GAL_FLUX, rng) + res, _, _ = do_ngmix_metacal( + stamp, get_prior(PIX_SCALE, rng), GAL_FLUX, rng + ) return res["noshear"] diff --git a/tests/science/test_mbias.py b/tests/science/test_mbias.py index 347bb5be..d7a0ebec 100644 --- a/tests/science/test_mbias.py +++ b/tests/science/test_mbias.py @@ -56,7 +56,7 @@ def _recover_g1_with_response(seed=42): stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( gals, psfs, weights, flags, jacobs, ) - res, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) + res, _, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) g1_noshear = res["noshear"]["g"][0] R11 = (res["1p"]["g"][0] - res["1m"]["g"][0]) / (2 * METACAL_STEP) From b7ababa3a28376a951eaf1da5efbbd0a206d5ed5 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:48:35 +0200 Subject: [PATCH 04/30] make_cat: update shipped param files to the new ngmix column grammar (#749) The two in-repo param files are the renamed columns' shipped consumer (read by scripts/python/create_final_cat.py). Map every NGMIX token to the NGMIX_[_ERR]_[_] grammar: NGMIX_ELL_PSFo_NOSHEAR -> NGMIX_G1/G2_PSF_ORIG_NOSHEAR NGMIX_ELL_ -> NGMIX_G1/G2_GAL_ NGMIX_ELL_ERR_NOSHEAR -> NGMIX_G1/G2_ERR_GAL_NOSHEAR NGMIX_T[_ERR]_ -> NGMIX_T[_ERR]_GAL_ NGMIX_Tpsf_ -> NGMIX_T_PSF_RECONV_ NGMIX_T_PSFo_NOSHEAR -> NGMIX_T_PSF_ORIG_NOSHEAR NGMIX_FLUX[_ERR]_ -> NGMIX_FLUX[_ERR]_GAL_ NGMIX_FLAGS_ -> NGMIX_FLAGS_GAL_ Non-NGMIX columns (NGMIX_MCAL_FLAGS, NGMIX_N_EPOCH, NGMIX_MOM_FAIL, and all SExtractor/external columns) are untouched. Every uncommented NGMIX column now matches a column make_cat._save_ngmix_data produces. Co-Authored-By: Claude Opus 4.8 --- example/cfis/final_cat.param | 99 +++++++++++++++------------- example/unions_800/cat_matched.param | 99 +++++++++++++++------------- 2 files changed, 110 insertions(+), 88 deletions(-) diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index eae7a2c0..a469aac7 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -11,8 +11,9 @@ FLAGS IMAFLAGS_ISO NGMIX_MCAL_FLAGS -# PSF ellipticity -NGMIX_ELL_PSFo_NOSHEAR +# PSF ellipticity (original image PSF) +NGMIX_G1_PSF_ORIG_NOSHEAR +NGMIX_G2_PSF_ORIG_NOSHEAR # spread class #SPREAD_CLASS @@ -29,52 +30,62 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_ELL_1M -NGMIX_ELL_1P -NGMIX_ELL_2M -NGMIX_ELL_2P -NGMIX_ELL_NOSHEAR -#NGMIX_ELL_ERR_1M -#NGMIX_ELL_ERR_1P -#NGMIX_ELL_ERR_2M -#NGMIX_ELL_ERR_2P -NGMIX_ELL_ERR_NOSHEAR +NGMIX_G1_GAL_1M +NGMIX_G2_GAL_1M +NGMIX_G1_GAL_1P +NGMIX_G2_GAL_1P +NGMIX_G1_GAL_2M +NGMIX_G2_GAL_2M +NGMIX_G1_GAL_2P +NGMIX_G2_GAL_2P +NGMIX_G1_GAL_NOSHEAR +NGMIX_G2_GAL_NOSHEAR +#NGMIX_G1_ERR_GAL_1M +#NGMIX_G2_ERR_GAL_1M +#NGMIX_G1_ERR_GAL_1P +#NGMIX_G2_ERR_GAL_1P +#NGMIX_G1_ERR_GAL_2M +#NGMIX_G2_ERR_GAL_2M +#NGMIX_G1_ERR_GAL_2P +#NGMIX_G2_ERR_GAL_2P +NGMIX_G1_ERR_GAL_NOSHEAR +NGMIX_G2_ERR_GAL_NOSHEAR # flags -NGMIX_FLAGS_1M -NGMIX_FLAGS_1P -NGMIX_FLAGS_2M -NGMIX_FLAGS_2P -NGMIX_FLAGS_NOSHEAR +NGMIX_FLAGS_GAL_1M +NGMIX_FLAGS_GAL_1P +NGMIX_FLAGS_GAL_2M +NGMIX_FLAGS_GAL_2P +NGMIX_FLAGS_GAL_NOSHEAR # size and error -NGMIX_T_1M -NGMIX_T_1P -NGMIX_T_2M -NGMIX_T_2P -NGMIX_T_NOSHEAR -NGMIX_T_ERR_1M -NGMIX_T_ERR_1P -NGMIX_T_ERR_2M -NGMIX_T_ERR_2P -NGMIX_T_ERR_NOSHEAR -NGMIX_Tpsf_1M -NGMIX_Tpsf_1P -NGMIX_Tpsf_2M -NGMIX_Tpsf_2P -NGMIX_Tpsf_NOSHEAR +NGMIX_T_GAL_1M +NGMIX_T_GAL_1P +NGMIX_T_GAL_2M +NGMIX_T_GAL_2P +NGMIX_T_GAL_NOSHEAR +NGMIX_T_ERR_GAL_1M +NGMIX_T_ERR_GAL_1P +NGMIX_T_ERR_GAL_2M +NGMIX_T_ERR_GAL_2P +NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_PSF_RECONV_1M +NGMIX_T_PSF_RECONV_1P +NGMIX_T_PSF_RECONV_2M +NGMIX_T_PSF_RECONV_2P +NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_1M -NGMIX_FLUX_1P -NGMIX_FLUX_2M -NGMIX_FLUX_2P -NGMIX_FLUX_NOSHEAR -NGMIX_FLUX_ERR_1M -NGMIX_FLUX_ERR_1P -NGMIX_FLUX_ERR_2M -NGMIX_FLUX_ERR_2P -NGMIX_FLUX_ERR_NOSHEAR +NGMIX_FLUX_GAL_1M +NGMIX_FLUX_GAL_1P +NGMIX_FLUX_GAL_2M +NGMIX_FLUX_GAL_2P +NGMIX_FLUX_GAL_NOSHEAR +NGMIX_FLUX_ERR_GAL_1M +NGMIX_FLUX_ERR_GAL_1P +NGMIX_FLUX_ERR_GAL_2M +NGMIX_FLUX_ERR_GAL_2P +NGMIX_FLUX_ERR_GAL_NOSHEAR # magnitudes MAG_AUTO @@ -94,10 +105,10 @@ FWHM_IMAGE FWHM_WORLD # PSF size measured on original image -NGMIX_T_PSFo_NOSHEAR +NGMIX_T_PSF_ORIG_NOSHEAR # PSF size measured on reconvolved image -# NGMIX_Tpsf_NOSHEAR +# NGMIX_T_PSF_RECONV_NOSHEAR # ngmix moment failure flag NGMIX_MOM_FAIL diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index 0df13407..e1869df5 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -11,8 +11,9 @@ FLAGS IMAFLAGS_ISO NGMIX_MCAL_FLAGS -# PSF ellipticity -NGMIX_ELL_PSFo_NOSHEAR +# PSF ellipticity (original image PSF) +NGMIX_G1_PSF_ORIG_NOSHEAR +NGMIX_G2_PSF_ORIG_NOSHEAR # spread class SPREAD_CLASS @@ -29,52 +30,62 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_ELL_1M -NGMIX_ELL_1P -NGMIX_ELL_2M -NGMIX_ELL_2P -NGMIX_ELL_NOSHEAR -#NGMIX_ELL_ERR_1M -#NGMIX_ELL_ERR_1P -#NGMIX_ELL_ERR_2M -#NGMIX_ELL_ERR_2P -NGMIX_ELL_ERR_NOSHEAR +NGMIX_G1_GAL_1M +NGMIX_G2_GAL_1M +NGMIX_G1_GAL_1P +NGMIX_G2_GAL_1P +NGMIX_G1_GAL_2M +NGMIX_G2_GAL_2M +NGMIX_G1_GAL_2P +NGMIX_G2_GAL_2P +NGMIX_G1_GAL_NOSHEAR +NGMIX_G2_GAL_NOSHEAR +#NGMIX_G1_ERR_GAL_1M +#NGMIX_G2_ERR_GAL_1M +#NGMIX_G1_ERR_GAL_1P +#NGMIX_G2_ERR_GAL_1P +#NGMIX_G1_ERR_GAL_2M +#NGMIX_G2_ERR_GAL_2M +#NGMIX_G1_ERR_GAL_2P +#NGMIX_G2_ERR_GAL_2P +NGMIX_G1_ERR_GAL_NOSHEAR +NGMIX_G2_ERR_GAL_NOSHEAR # flags -NGMIX_FLAGS_1M -NGMIX_FLAGS_1P -NGMIX_FLAGS_2M -NGMIX_FLAGS_2P -NGMIX_FLAGS_NOSHEAR +NGMIX_FLAGS_GAL_1M +NGMIX_FLAGS_GAL_1P +NGMIX_FLAGS_GAL_2M +NGMIX_FLAGS_GAL_2P +NGMIX_FLAGS_GAL_NOSHEAR # size and error -NGMIX_T_1M -NGMIX_T_1P -NGMIX_T_2M -NGMIX_T_2P -NGMIX_T_NOSHEAR -NGMIX_T_ERR_1M -NGMIX_T_ERR_1P -NGMIX_T_ERR_2M -NGMIX_T_ERR_2P -NGMIX_T_ERR_NOSHEAR -NGMIX_Tpsf_1M -NGMIX_Tpsf_1P -NGMIX_Tpsf_2M -NGMIX_Tpsf_2P -NGMIX_Tpsf_NOSHEAR +NGMIX_T_GAL_1M +NGMIX_T_GAL_1P +NGMIX_T_GAL_2M +NGMIX_T_GAL_2P +NGMIX_T_GAL_NOSHEAR +NGMIX_T_ERR_GAL_1M +NGMIX_T_ERR_GAL_1P +NGMIX_T_ERR_GAL_2M +NGMIX_T_ERR_GAL_2P +NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_PSF_RECONV_1M +NGMIX_T_PSF_RECONV_1P +NGMIX_T_PSF_RECONV_2M +NGMIX_T_PSF_RECONV_2P +NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_1M -NGMIX_FLUX_1P -NGMIX_FLUX_2M -NGMIX_FLUX_2P -NGMIX_FLUX_NOSHEAR -NGMIX_FLUX_ERR_1M -NGMIX_FLUX_ERR_1P -NGMIX_FLUX_ERR_2M -NGMIX_FLUX_ERR_2P -NGMIX_FLUX_ERR_NOSHEAR +NGMIX_FLUX_GAL_1M +NGMIX_FLUX_GAL_1P +NGMIX_FLUX_GAL_2M +NGMIX_FLUX_GAL_2P +NGMIX_FLUX_GAL_NOSHEAR +NGMIX_FLUX_ERR_GAL_1M +NGMIX_FLUX_ERR_GAL_1P +NGMIX_FLUX_ERR_GAL_2M +NGMIX_FLUX_ERR_GAL_2P +NGMIX_FLUX_ERR_GAL_NOSHEAR # Flag for multiple objects due to overlapping # tiles @@ -87,10 +98,10 @@ MAG_AUTO SNR_WIN # PSF size measured on original image -NGMIX_T_PSFo_NOSHEAR +NGMIX_T_PSF_ORIG_NOSHEAR # PSF size measured on reconvolved image -# NGMIX_Tpsf_NOSHEAR +# NGMIX_T_PSF_RECONV_NOSHEAR # ngmix moment failure flag NGMIX_MOM_FAIL From 9b3025dcedf4662af5a18cd8ec28fbe70e5b8f8d Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:48:51 +0200 Subject: [PATCH 05/30] make_cat: add _save_ngmix_data tests; soften the grammar docstring (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No make_cat test existed for the renamed ngmix columns. Add four tests driving SaveCatalogue._save_ngmix_data against a synthetic ngmix catalogue (distinct orig vs reconv sentinels): - produced columns follow the new grammar; no old name (_PSFo, _Tpsf, NGMIX_ELL_) survives - NGMIX_G1_PSF_ORIG_* traces the orig source and NGMIX_G1_PSF_RECONV_* the reconv source, and the two differ (the un-aliasing #749 restores) - NGMIX_T_PSF_ORIG_* != NGMIX_T_PSF_RECONV_* - an obj_id absent from the ngmix cat keeps its sentinel fill - end-to-end: make_cat reads a catalogue ngmix's own compile_results + save_results serialised, guarding the reader against key-set drift Soften the _save_ngmix_data grammar docstring so OBJECT and SHEAR read as optional (NGMIX[m]_[_ERR][_][_]) — the metadata columns NGMIX_MCAL_FLAGS / NGMIX_N_EPOCH / NGMIX_MCAL_TYPES_FAIL carry neither. Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 21 +- tests/module/test_make_cat.py | 297 ++++++++++++++++++ 2 files changed, 309 insertions(+), 9 deletions(-) create mode 100644 tests/module/test_make_cat.py diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 235fd14f..0122f9e4 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -329,15 +329,18 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. - Column grammar: ``NGMIX[m]_[_ERR]__``, with - ``OBJECT`` one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original - psfex/mccd image PSF, fit by ``ngmix.average_original_psf`` — its true - ellipticity and size, feeding object-wise PSF leakage), or - ``PSF_RECONV`` (the metacal reconvolution kernel, round and enlarged - by construction, fit by ``ngmix.average_multiepoch_psf`` — the Tgal / - Tpsf size cut and a g~0 sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` - are independent fits of different PSFs, no longer the single aliased - value of the pre-fix code (shapepipe#749). + Column grammar: ``NGMIX[m]_[_ERR][_][_]``. + ``OBJECT`` and ``SHEAR`` are both optional: the per-object metadata + columns (``NGMIX_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, + ``NGMIX_MCAL_TYPES_FAIL``) carry neither. When present, ``OBJECT`` is + one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original psfex/mccd image + PSF, fit by ``ngmix.average_original_psf`` — its true ellipticity and + size, feeding object-wise PSF leakage), or ``PSF_RECONV`` (the metacal + reconvolution kernel, round and enlarged by construction, fit by + ``ngmix.average_multiepoch_psf`` — the Tgal / Tpsf size cut and a g~0 + sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` are independent fits of + different PSFs, no longer the single aliased value of the pre-fix code + (shapepipe#749). Parameters ---------- diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py new file mode 100644 index 00000000..c3728cef --- /dev/null +++ b/tests/module/test_make_cat.py @@ -0,0 +1,297 @@ +"""UNIT TESTS FOR MODULE PACKAGE: MAKE_CAT. + +Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue +to lock in the renamed PSF-column grammar +``NGMIX_[_ERR]_[_]`` (shapepipe#749): the original +image PSF (``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) +are independent fits of *different* PSFs, no longer the single aliased value of +the pre-fix code. +""" + +import numpy as np +import numpy.testing as npt +from astropy.io import fits + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue +from shapepipe.modules.ngmix_package.ngmix import Ngmix + + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +# Per-object key set that compile_results emits into every shear-type +# extension (mirrors ngmix.compile_results ``names2``). Sentinel values are +# distinct per key so a mis-routed column is caught by value, and the orig vs +# reconv PSF families carry *different* sentinels so any aliasing surfaces. +NGMIX_KEYS = [ + "id", + "n_epoch_model", + "mcal_types_fail", + "nfev_fit", + "g1", "g1_err", "g2", "g2_err", + "T", "T_err", "r50", "r50_err", + "flux", "flux_err", "s2n", "mag", "mag_err", + "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", + "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", "r50_psf_orig", "r50_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", + "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", "r50_psf_reconv", "r50_err_psf_reconv", +] + +SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] +SHEAR_EXTS_LOWER = ["1m", "1p", "2m", "2p", "noshear"] + + +def _ngmix_row(obj_id): + """One object's per-key values, distinct for orig vs reconv PSF families. + + Built so every PSF column carries a sentinel that names its source: the + orig family from ``*_orig`` integers, the reconv family from ``*_reconv`` + integers, deliberately disjoint. + """ + return { + "id": obj_id, + "n_epoch_model": 3, + "mcal_types_fail": 0, + "nfev_fit": 7, + "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, + "T": 0.30, "T_err": 0.033, "r50": 0.40, "r50_err": 0.044, + "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, + "mag": 21.0, "mag_err": 0.05, + "flags": 0, "mcal_flags": 0, + # original image PSF — its own ellipticity and size + "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, + "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, + "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, + "r50_psf_orig": 0.221, "r50_err_psf_orig": 1.2e-3, + # metacal reconvolution kernel — round and enlarged, distinct values + "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, + "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, + "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, + "r50_psf_reconv": 0.252, "r50_err_psf_reconv": 1.0e-3, + } + + +def _write_ngmix_cat(path, obj_ids): + """Write a synthetic ngmix FITS with the five shear-type extensions. + + Each extension carries the exact ``compile_results`` key set, with the + same per-object values across extensions (the PSF families are + object-level, not per-shear). + """ + rows = [_ngmix_row(oid) for oid in obj_ids] + hdus = [fits.PrimaryHDU()] + for ext in SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in ("id", "n_epoch_model", "mcal_types_fail", + "nfev_fit", "flags", "mcal_flags") else "D", + array=np.array([row[key] for row in rows]), + ) + for key in NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_id, cat_size_target=None): + """Drive ``_save_ngmix_data`` and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_id) + inst._output_dict = {} + inst._cat_size_target = ( + len(inst._obj_id) if cat_size_target is None else cat_size_target + ) + inst._w_log = _NullLogger() + + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): + """Produced columns follow the new grammar; no old token survives. + + The renamed grammar drops ``_PSFo``, ``_Tpsf`` and the ellipticity + 2-vector ``NGMIX_ELL_*`` entirely (shapepipe#749). A regression to any of + those would be a silent break for the shipped param-file consumer + (``create_final_cat.py``), so the absence is asserted directly. + """ + ngmix_path = tmp_path / "ngmix-0.fits" + obj_ids = [11, 22, 33] + _write_ngmix_cat(ngmix_path, obj_ids) + + out = _run_save_ngmix(ngmix_path, obj_ids) + + # Every per-shear family is present under the new grammar. + for shear in SHEAR_EXTS: + for col in ( + f"NGMIX_G1_GAL_{shear}", f"NGMIX_G2_GAL_{shear}", + f"NGMIX_G1_ERR_GAL_{shear}", f"NGMIX_G2_ERR_GAL_{shear}", + f"NGMIX_T_GAL_{shear}", f"NGMIX_T_ERR_GAL_{shear}", + f"NGMIX_SNR_GAL_{shear}", + f"NGMIX_FLUX_GAL_{shear}", f"NGMIX_FLUX_ERR_GAL_{shear}", + f"NGMIX_MAG_GAL_{shear}", f"NGMIX_MAG_ERR_GAL_{shear}", + f"NGMIX_FLAGS_GAL_{shear}", + f"NGMIX_G1_PSF_ORIG_{shear}", f"NGMIX_G2_PSF_ORIG_{shear}", + f"NGMIX_T_PSF_ORIG_{shear}", + f"NGMIX_G1_PSF_RECONV_{shear}", f"NGMIX_G2_PSF_RECONV_{shear}", + f"NGMIX_T_PSF_RECONV_{shear}", + ): + assert col in out, f"missing {col}" + + # Object-level metadata columns carry no OBJECT/SHEAR token. + for col in ("NGMIX_MCAL_FLAGS", "NGMIX_N_EPOCH", "NGMIX_MCAL_TYPES_FAIL"): + assert col in out, f"missing {col}" + + # No old name survives anywhere in the produced columns. + for col in out: + assert "_PSFo" not in col, col + assert "_Tpsf" not in col and "TPSF" not in col, col + assert not col.startswith("NGMIX_ELL_"), col + assert "NGMIXm" not in col, col # moments branch off by default + + +def test_save_ngmix_data_psf_families_trace_distinct_sources(tmp_path): + """The two PSF families read from their own ngmix columns, un-aliased. + + ``NGMIX_G1_PSF_ORIG_*`` must equal the orig source and + ``NGMIX_G1_PSF_RECONV_*`` the reconv source, and the two must differ — + the un-aliasing that shapepipe#749 restores. Likewise the sizes: + ``NGMIX_T_PSF_ORIG_*`` != ``NGMIX_T_PSF_RECONV_*``. + """ + ngmix_path = tmp_path / "ngmix-1.fits" + obj_ids = [11, 22, 33] + _write_ngmix_cat(ngmix_path, obj_ids) + row = _ngmix_row(11) + + out = _run_save_ngmix(ngmix_path, obj_ids) + + for shear in SHEAR_EXTS: + npt.assert_allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + [row["g1_psf_orig"]] * len(obj_ids), + ) + npt.assert_allclose( + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + [row["g1_psf_reconv"]] * len(obj_ids), + ) + # the un-aliasing: orig and reconv ellipticities differ + assert not np.allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + ) + # sizes are independent too + npt.assert_allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], [row["T_psf_orig"]] * len(obj_ids) + ) + npt.assert_allclose( + out[f"NGMIX_T_PSF_RECONV_{shear}"], + [row["T_psf_reconv"]] * len(obj_ids), + ) + assert not np.allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], + out[f"NGMIX_T_PSF_RECONV_{shear}"], + ) + + +def test_save_ngmix_data_fills_sentinels_for_absent_objects(tmp_path): + """An obj_id absent from the ngmix cat keeps its sentinel fill. + + make_cat pre-fills every column with a type-specific sentinel and only + overwrites the rows whose ``NUMBER`` matches an ngmix ``id``. An object + SExtractor saw but ngmix never fit (no matching id) must therefore keep + the sentinels: 0 for sizes/flags, -10 for ellipticities, 1e30 for + ``T_ERR``, -1 for flux/mag errors. + """ + ngmix_path = tmp_path / "ngmix-2.fits" + # ngmix fit only object 22; the final cat also carries 11 and 99. + _write_ngmix_cat(ngmix_path, [22]) + obj_ids = [11, 22, 99] + + out = _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=3) + + present = obj_ids.index(22) + absent = [obj_ids.index(11), obj_ids.index(99)] + + # The matched object got its measured value; the others keep sentinels. + row = _ngmix_row(22) + g1_orig = np.asarray(out["NGMIX_G1_PSF_ORIG_NOSHEAR"]) + npt.assert_allclose(g1_orig[present], row["g1_psf_orig"]) + npt.assert_allclose(g1_orig[absent], [-10.0, -10.0]) + + t_orig = np.asarray(out["NGMIX_T_PSF_ORIG_NOSHEAR"]) + npt.assert_allclose(t_orig[present], row["T_psf_orig"]) + npt.assert_allclose(t_orig[absent], [0.0, 0.0]) + + t_err_gal = np.asarray(out["NGMIX_T_ERR_GAL_NOSHEAR"]) + npt.assert_allclose(t_err_gal[present], row["T_err"]) + npt.assert_allclose(t_err_gal[absent], [1e30, 1e30]) + + flux_err = np.asarray(out["NGMIX_FLUX_ERR_GAL_NOSHEAR"]) + npt.assert_allclose(flux_err[present], row["flux_err"]) + npt.assert_allclose(flux_err[absent], [-1.0, -1.0]) + + n_epoch = np.asarray(out["NGMIX_N_EPOCH"]) + npt.assert_allclose(n_epoch[present], row["n_epoch_model"]) + npt.assert_allclose(n_epoch[absent], [0.0, 0.0]) + + +def test_save_ngmix_data_matches_module_serialised_catalogue(tmp_path): + """End-to-end: make_cat reads a catalogue ngmix itself serialised. + + Rather than hand-rolling the FITS, drive ngmix's own + ``compile_results`` + ``save_results`` (the real write path), then read it + back through ``_save_ngmix_data``. This guards the make_cat reader against + drift in the ngmix key set: the keys must line up end to end. + """ + obj_ids = [5, 7] + results = [] + for oid in obj_ids: + row = _ngmix_row(oid) + per_type = { + "nfev": row["nfev_fit"], + "g": [row["g1"], row["g2"]], + "g_cov": np.diag([row["g1_err"] ** 2, row["g2_err"] ** 2]), + "T": row["T"], "T_err": row["T_err"], + "flux": row["flux"], "flux_err": row["flux_err"], + "s2n": row["s2n"], "flags": 0, + } + res = { + "obj_id": oid, + "n_epoch_model": row["n_epoch_model"], + "mcal_types_fail": row["mcal_types_fail"], + "mcal_flags": row["mcal_flags"], + } + for key in NGMIX_KEYS: + if key.endswith("_psf_orig") or key.endswith("_psf_reconv"): + res[key] = row[key] + res.update({name: dict(per_type) for name in SHEAR_EXTS_LOWER}) + results.append(res) + + ngmix_inst = object.__new__(Ngmix) + ngmix_inst._zero_point = 30.0 + ngmix_inst._output_dir = str(tmp_path) + ngmix_inst._file_number_string = "-0" + out_dict = ngmix_inst.compile_results(results) + ngmix_inst.save_results(out_dict) + + ngmix_path = ngmix_inst.get_output_path(str(tmp_path)) + out = _run_save_ngmix(ngmix_path, obj_ids) + + # Both PSF families survive the round trip, distinct, on every object. + npt.assert_allclose( + out["NGMIX_G1_PSF_ORIG_NOSHEAR"], [_ngmix_row(o)["g1_psf_orig"] for o in obj_ids] + ) + npt.assert_allclose( + out["NGMIX_G1_PSF_RECONV_NOSHEAR"], + [_ngmix_row(o)["g1_psf_reconv"] for o in obj_ids], + ) + assert not np.allclose( + out["NGMIX_G1_PSF_ORIG_NOSHEAR"], out["NGMIX_G1_PSF_RECONV_NOSHEAR"] + ) From 9ab43af0a73cfe224955dda0ddeca3569720d146 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:49:10 +0200 Subject: [PATCH 06/30] ngmix: fit a copy of the original PSF so gal_obs.psf is never mutated (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SCIENCE-CRITICAL. average_original_psf called psf_runner.go(gal_obs), and PSFRunner.go sets .gmix (and .meta['result']) on the observation it fits — here gal_obs.psf, the PSF metacal later deep-copies and consumes via boot.go(gal_obs_list). A stray gmix survives that copy and is reused as the MetacalFitGaussPSF fallback when its own admom+ML PSF fits both fail, so objects the base branch dropped (BootPSFFailure) silently survived on this branch, changing the galaxy/shear result set. A rename+add-column refactor must be bit-identical on galaxy results, so fit gal_obs.psf.copy() and read from the copy, leaving gal_obs pristine (verified: gal_obs.psf carries no gmix and no leaked meta['result'] afterward, matching base-branch state). Also in do_ngmix_metacal: - return a MetacalResult NamedTuple (resdict, reconv, orig) so the two PSF families are named not positional, guarding against a reconv/orig transposition; still a tuple, so positional unpacking is unchanged. - rename psfo_res -> psf_orig_res everywhere (no trailing-o shorthand). - soften the average_original_psf weighting note: same form as average_multiepoch_psf (weight.sum(), skip flags!=0) but the reconv path weights by the fixnoise-combined inverse variance of the noshear metacal image while the orig path uses the raw galaxy inverse variance. - note the r50_psf_orig/reconv columns are intermediate-only (make_cat reads T_psf_*/g, not r50, so they do not reach the final catalogue). Tests: a do_ngmix_metacal guard asserting gal_obs.psf carries no gmix after the original-PSF pre-fit; and, on an elliptical PSF stamp, T_psf_reconv > T_psf_orig and |g_psf_reconv| < |g_psf_orig| (true by construction — the reconvolution kernel is round and dilated), which catches a psf_res / psf_orig_res transposition. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 109 +++++++++++----- tests/module/test_ngmix.py | 129 ++++++++++++++++++- 2 files changed, 201 insertions(+), 37 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index ced2d392..b50f94f9 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -8,6 +8,8 @@ import os import re +from typing import NamedTuple + import ngmix import galsim import numpy as np @@ -26,6 +28,30 @@ METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') +class MetacalResult(NamedTuple): + """Return of :func:`do_ngmix_metacal`: the metacal fit plus both PSFs. + + A NamedTuple so the two PSF families are named, not positional — guarding + against a reconv/orig transposition at call sites. Still a plain tuple, so + ``resdict, psf_res, psf_orig_res = do_ngmix_metacal(...)`` keeps working. + + Attributes + ---------- + resdict : dict + MetacalBootstrapper result dict (one entry per metacal type). + reconv : dict + Averaged metacal *reconvolution*-kernel PSF (round, enlarged): + :func:`average_multiepoch_psf`. + orig : dict + Averaged *original* image-PSF (psfex/mccd, its true shape and size): + :func:`average_original_psf`. + """ + + resdict: dict + reconv: dict + orig: dict + + def get_mcal_flags(res): """Get Metacal Flags. @@ -405,7 +431,10 @@ def compile_results(self, results): 'mag_err', 'flags', 'mcal_flags', - # original image PSF (psfex/mccd), fit by average_original_psf + # original image PSF (psfex/mccd), fit by average_original_psf. + # The r50_psf_* columns here are intermediate-only: make_cat reads + # the T_psf_* (and g) columns, not r50, so these do not reach the + # final catalogue. 'g1_psf_orig', 'g2_psf_orig', 'g1_err_psf_orig', @@ -679,7 +708,7 @@ def process(self): if tile_cat.flux is not None else 1.0 ) - res, psf_res, psfo_res = do_ngmix_metacal( + res, psf_res, psf_orig_res = do_ngmix_metacal( stamp, prior, flux_guess, @@ -708,8 +737,8 @@ def process(self): res['mcal_flags'] = get_mcal_flags(res) # Two distinct PSF families (shapepipe#749), each with its own # ellipticity AND size, written under self-naming res-keys: - # reconvolution kernel (psf_res) -> *_psf_reconv - # original image PSF (psfo_res) -> *_psf_orig + # reconvolution kernel (psf_res) -> *_psf_reconv + # original image PSF (psf_orig_res) -> *_psf_orig # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma). def _psf_r50(T_psf, T_psf_err): @@ -730,14 +759,14 @@ def _psf_r50(T_psf, T_psf_err): res['r50_psf_reconv'], res['r50_err_psf_reconv'] = _psf_r50( psf_res['T_psf'], psf_res['T_psf_err'] ) - res['g1_psf_orig'] = psfo_res['g_psf'][0] - res['g2_psf_orig'] = psfo_res['g_psf'][1] - res['g1_err_psf_orig'] = psfo_res['g_psf_err'][0] - res['g2_err_psf_orig'] = psfo_res['g_psf_err'][1] - res['T_psf_orig'] = psfo_res['T_psf'] - res['T_err_psf_orig'] = psfo_res['T_psf_err'] + res['g1_psf_orig'] = psf_orig_res['g_psf'][0] + res['g2_psf_orig'] = psf_orig_res['g_psf'][1] + res['g1_err_psf_orig'] = psf_orig_res['g_psf_err'][0] + res['g2_err_psf_orig'] = psf_orig_res['g_psf_err'][1] + res['T_psf_orig'] = psf_orig_res['T_psf'] + res['T_err_psf_orig'] = psf_orig_res['T_psf_err'] res['r50_psf_orig'], res['r50_err_psf_orig'] = _psf_r50( - psfo_res['T_psf'], psfo_res['T_psf_err'] + psf_orig_res['T_psf'], psf_orig_res['T_psf_err'] ) final_res.append(res) n_fitted += 1 @@ -1254,16 +1283,31 @@ def average_original_psf(gal_obs_list, psf_runner): shape and size enter object-wise PSF-leakage diagnostics. Epochs are weighted by the *galaxy* inverse-variance weight - (``gal_obs.weight.sum()``), matching :func:`average_multiepoch_psf`, so - the two PSF families share an averaging scheme and differ only in which - PSF is fit. + (``gal_obs.weight.sum()``). This is the same *form* as + :func:`average_multiepoch_psf` (``weight.sum()`` per epoch, skipping + ``flags != 0`` fits) but not the identical weight: the reconvolution path + weights by the fixnoise-combined inverse variance of the noshear metacal + image, whereas this path uses the raw galaxy inverse variance. So the two + PSF families share an averaging *scheme* and differ in which PSF is fit + and in the precise per-epoch weighting factor. + + The fit runs on a *copy* of each PSF observation so ``gal_obs.psf`` — + the object metacal later deep-copies and consumes via + ``boot.go(gal_obs_list)`` — is never mutated. ``PSFRunner.go`` sets both + ``.meta['result']`` and, on success, the ``.gmix`` attribute of the + observation it fits; were that ``gal_obs.psf`` itself, the stray gmix + would survive metacal's deep copy and be reused as the + ``MetacalFitGaussPSF`` fallback when its own admom+ML PSF fits both fail, + silently rescuing objects the base branch dropped (``BootPSFFailure``) and + changing the galaxy/shear result set. Fitting a copy keeps this add-column + refactor bit-identical on the galaxy results. Parameters ---------- gal_obs_list : ngmix.observation.ObsList Per-epoch galaxy observations; each ``gal_obs.psf`` is the original (pre-metacal) PSF observation to fit, with no further ``.psf`` of - its own so the runner fits the stamp itself. + its own so the runner fits the stamp itself. Left pristine. psf_runner : ngmix.runners.PSFRunner The module's PSF runner, carrying the resolved ``psf_fit_prior``. @@ -1273,11 +1317,14 @@ def average_original_psf(gal_obs_list, psf_runner): Same keys as :func:`average_multiepoch_psf`. """ def fit(gal_obs): - # PSFRunner.go fits gal_obs.psf (the original PSF stamp) and sets - # gal_obs.psf.meta['result']; failed fits keep flags != 0 and are - # dropped by _average_psf_fits. - psf_runner.go(gal_obs) - return gal_obs.psf.meta['result'], gal_obs.weight.sum() + # Fit a COPY of the PSF observation: PSFRunner.go fits the PSF stamp + # and sets its .meta['result'] (and .gmix on success). Reading from + # the copy leaves gal_obs.psf pristine — no gmix to leak through + # metacal's deep copy into the MetacalFitGaussPSF fallback. Failed + # fits keep flags != 0 and are dropped by _average_psf_fits. + psf_obs = gal_obs.psf.copy() + psf_runner.go(psf_obs) + return psf_obs.meta['result'], gal_obs.weight.sum() return _average_psf_fits(fit(gal_obs) for gal_obs in gal_obs_list) @@ -1341,13 +1388,13 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): Returns ------- - tuple - ``(resdict, psf_res, psfo_res)`` where ``resdict`` is the - MetacalBootstrapper result dict, ``psf_res`` is the averaged metacal - *reconvolution*-kernel PSF dict (:func:`average_multiepoch_psf`), and - ``psfo_res`` is the averaged *original* image-PSF dict - (:func:`average_original_psf`). The two PSF dicts share keys but - describe different PSFs. + MetacalResult + Named 3-tuple ``(resdict, reconv, orig)``: the MetacalBootstrapper + result dict, the averaged metacal *reconvolution*-kernel PSF dict + (:func:`average_multiepoch_psf`), and the averaged *original* image-PSF + dict (:func:`average_original_psf`). The two PSF dicts share keys but + describe different PSFs; the named fields guard against transposing + them. Unpacks positionally as ``resdict, psf_res, psf_orig_res``. """ n_epoch = len(stamp.gals) if n_epoch == 0: @@ -1377,8 +1424,10 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): # the same psf_runner (so the same psf_fit_prior and centroid). This is # the PSF_ORIG family; metacal below produces the PSF_RECONV family. Run # first so the original PSF is fit on its own stamp, distinct from the - # round, enlarged kernel metacal convolves back in. - psfo_res = average_original_psf(gal_obs_list, psf_runner) + # round, enlarged kernel metacal convolves back in. average_original_psf + # fits a COPY of each gal_obs.psf, so gal_obs_list reaches boot.go below + # pristine — no stray gmix to leak into MetacalFitGaussPSF's fallback. + psf_orig_res = average_original_psf(gal_obs_list, psf_runner) metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], @@ -1397,4 +1446,4 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): ) resdict, obsdict = boot.go(gal_obs_list) psf_res = average_multiepoch_psf(obsdict) - return resdict, psf_res, psfo_res + return MetacalResult(resdict=resdict, reconv=psf_res, orig=psf_orig_res) diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index e035a0e8..157dc726 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -424,9 +424,16 @@ def test_average_original_psf_fits_each_gal_psf_with_galaxy_weight(): def gal_epoch(psf_result, weight_value): # gal_obs carries a .psf whose meta['result'] is what the runner - # would set; .weight is the galaxy inverse-variance map. + # would set; .weight is the galaxy inverse-variance map. The PSF + # observation must expose .copy() (average_original_psf fits a copy + # so gal_obs.psf is never mutated); the copy carries the same + # pre-populated meta so the averaging math is unchanged. + psf_obs = SimpleNamespace(meta={"result": psf_result}) + psf_obs.copy = lambda _self=psf_obs: SimpleNamespace( + meta=dict(_self.meta) + ) return SimpleNamespace( - psf=SimpleNamespace(meta={"result": psf_result}), + psf=psf_obs, weight=np.full((2, 2), weight_value), ) @@ -450,23 +457,131 @@ def gal_epoch(psf_result, weight_value): gal_epoch(good_b, 2.0), # galaxy weight-sum = 8 gal_epoch(failed, 4.0), # dropped on flags != 0 ] - psfo_res = average_original_psf(gal_obs_list, runner) + psf_orig_res = average_original_psf(gal_obs_list, runner) # the runner fit every epoch's PSF assert len(calls) == 3 # weighted over the two surviving epochs (weights 4 and 8) w = np.array([4.0, 8.0]) npt.assert_allclose( - psfo_res["g_psf"], + psf_orig_res["g_psf"], (good_a["g"] * w[0] + good_b["g"] * w[1]) / w.sum(), ) npt.assert_allclose( - psfo_res["T_psf"], + psf_orig_res["T_psf"], (good_a["T"] * w[0] + good_b["T"] * w[1]) / w.sum(), ) - assert psfo_res["n_epoch"] == 2 + assert psf_orig_res["n_epoch"] == 2 # original PSF is elliptical — not the round reconvolution kernel - assert abs(psfo_res["g_psf"][0]) > 1e-3 + assert abs(psf_orig_res["g_psf"][0]) > 1e-3 + + +def _do_ngmix_metacal_on_psf(psf_shear, seed=7, n_epochs=2): + """Run do_ngmix_metacal on a simulated stamp with a given PSF shape. + + Returns ``(resdict, psf_reconv, psf_orig, gal_obs_list)``, where the last + is the list metacal consumed — so a caller can assert that the original-PSF + pre-fit left it pristine. + """ + from shapepipe.modules.ngmix_package.ngmix import ( + Postage_stamp, + do_ngmix_metacal, + get_prior, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(seed) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=n_epochs, + img_size=51, + psf_shear=psf_shear, + ) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( + gals, + psfs, + weights, + flags, + jacobs, + ) + resdict, psf_reconv, psf_orig = do_ngmix_metacal(stamp, prior, 1.0, rng) + return resdict, psf_reconv, psf_orig + + +def test_original_psf_prefit_leaves_gal_obs_psf_pristine(): + """The original-PSF pre-fit must not mutate the PSF metacal consumes (#749). + + ``average_original_psf`` fits each epoch's psfex/mccd PSF with the shared + ``psf_runner``. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) + on the observation it fits; if that were ``gal_obs.psf`` itself, the gmix + would survive metacal's deep copy and be reused as the + ``MetacalFitGaussPSF`` fallback when admom+ML both fail — silently + rescuing objects the base branch dropped (``BootPSFFailure``). The fix + fits a *copy*, so ``gal_obs.psf`` carries NO gmix afterward, matching the + base-branch state that makes this add-column refactor bit-identical on the + galaxy results. + """ + from ngmix.observation import ObsList + from shapepipe.modules.ngmix_package.ngmix import ( + average_original_psf, + get_prior, + make_ngmix_observation, + make_runners, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(7) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=2, + img_size=51, + psf_shear=(0.05, -0.04), + ) + gal_obs_list = ObsList() + for n_e in range(2): + gal_obs_list.append( + make_ngmix_observation( + gals[n_e], weights[n_e], flags[n_e], psfs[n_e], jacobs[n_e], + rng, + ) + ) + + # Pre-state: a freshly built PSF observation carries no gmix. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list) + + runner, psf_runner = make_runners(prior, 1.0, rng) + average_original_psf(gal_obs_list, psf_runner) + + # The pre-fit fit a copy, so the originals are untouched: no gmix and no + # leaked fit result in meta. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list), ( + "original-PSF pre-fit leaked a gmix into gal_obs.psf" + ) + assert not any("result" in obs.psf.meta for obs in gal_obs_list), ( + "original-PSF pre-fit leaked a fit result into gal_obs.psf.meta" + ) + + +def test_reconv_psf_is_rounder_and_larger_than_orig_on_elliptical_psf(): + """Reconvolution kernel is round + dilated relative to the original PSF. + + On an ELLIPTICAL PSF stamp the metacal reconvolution kernel is round and + enlarged by construction, so ``T_psf_reconv > T_psf_orig`` and + ``|g_psf_reconv| < |g_psf_orig|``. This end-to-end guard catches a + ``psf_res`` / ``psf_orig_res`` transposition in :func:`do_ngmix_metacal` + (the two families share keys, so a swap is otherwise silent). + """ + _, psf_reconv, psf_orig = _do_ngmix_metacal_on_psf((0.05, -0.04)) + + assert psf_reconv["T_psf"] > psf_orig["T_psf"] + assert np.hypot(*psf_reconv["g_psf"]) < np.hypot(*psf_orig["g_psf"]) def test_weight_map_recovers_injected_inverse_variance(): From b690d3f41d3f4d59ff75bf445b83f6edaf02f8ce Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 00:35:03 +0200 Subject: [PATCH 07/30] ngmix: drop dead r50 columns (derivable from T) r50 = SIGMA_TO_R50*sqrt(T/2) is a deterministic function of T, which is already exported; the r50_* keys were written to the intermediate ngmix catalogue and read by nobody (confirmed across shapepipe + sp_validation). Remove the computation, schema entries, and the local SIGMA_TO_R50 constant; downstream r50 is derivable via cs_util.size.T_to_r50. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 46 +----------- tests/module/test_make_cat.py | 10 +-- tests/module/test_ngmix.py | 78 ++++---------------- 3 files changed, 19 insertions(+), 115 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index b50f94f9..da21af66 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -20,11 +20,6 @@ from shapepipe.pipeline import file_io -# Gaussian half-light radius per unit sigma: r50 = sqrt(2 ln 2) * sigma -# = 1.17741 * sigma. With the ngmix area parameter T = 2 sigma^2 this -# gives r50 = SIGMA_TO_R50 * sqrt(T / 2) = sqrt(ln 2 * T). -SIGMA_TO_R50 = np.sqrt(2.0 * np.log(2.0)) - METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') @@ -386,7 +381,7 @@ def compile_results(self, results): Two PSF column families — each carrying ellipticity *and* size, for *different* PSFs (shapepipe#749): - * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``, ``r50``) — the + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the ORIGINAL image PSF (the psfex/mccd model stamp), fit before metacal reconvolution by :func:`average_original_psf`. This is the PSF whose true ellipticity and size enter object-wise @@ -422,8 +417,6 @@ def compile_results(self, results): 'g2_err', 'T', 'T_err', - 'r50', - 'r50_err', 'flux', 'flux_err', 's2n', @@ -431,18 +424,13 @@ def compile_results(self, results): 'mag_err', 'flags', 'mcal_flags', - # original image PSF (psfex/mccd), fit by average_original_psf. - # The r50_psf_* columns here are intermediate-only: make_cat reads - # the T_psf_* (and g) columns, not r50, so these do not reach the - # final catalogue. + # original image PSF (psfex/mccd), fit by average_original_psf 'g1_psf_orig', 'g2_psf_orig', 'g1_err_psf_orig', 'g2_err_psf_orig', 'T_psf_orig', 'T_err_psf_orig', - 'r50_psf_orig', - 'r50_err_psf_orig', # metacal reconvolution kernel, fit by average_multiepoch_psf 'g1_psf_reconv', 'g2_psf_reconv', @@ -450,8 +438,6 @@ def compile_results(self, results): 'g2_err_psf_reconv', 'T_psf_reconv', 'T_err_psf_reconv', - 'r50_psf_reconv', - 'r50_err_psf_reconv', ] output_dict = {k: {kk: [] for kk in names2} for k in names} for idx in range(len(results)): @@ -498,29 +484,18 @@ def compile_results(self, results): 'g1_psf_orig', 'g2_psf_orig', 'g1_err_psf_orig', 'g2_err_psf_orig', 'T_psf_orig', 'T_err_psf_orig', - 'r50_psf_orig', 'r50_err_psf_orig', 'g1_psf_reconv', 'g2_psf_reconv', 'g1_err_psf_reconv', 'g2_err_psf_reconv', 'T_psf_reconv', 'T_err_psf_reconv', - 'r50_psf_reconv', 'r50_err_psf_reconv', ): output_dict[name][psf_key].append(results[idx][psf_key]) - # Galaxy half-light radius from the fitted area T = 2 sigma^2: - # r50 = sqrt(ln 2 * T), with d r50 / d T = r50 / (2 T) - if T_gal > 0: - r50_gal = SIGMA_TO_R50 * np.sqrt(T_gal / 2) - r50_gal_err = r50_gal * T_gal_err / (2 * T_gal) - else: - r50_gal = r50_gal_err = np.nan output_dict[name]["g1"].append(g[0]) output_dict[name]["g2"].append(g[1]) output_dict[name]["g1_err"].append(np.sqrt(g_cov[0, 0])) output_dict[name]["g2_err"].append(np.sqrt(g_cov[1, 1])) output_dict[name]["T"].append(T_gal) output_dict[name]["T_err"].append(T_gal_err) - output_dict[name]['r50'].append(r50_gal) - output_dict[name]['r50_err'].append(r50_gal_err) output_dict[name]["flux"].append(flux) output_dict[name]["flux_err"].append(flux_err) output_dict[name]["mag"].append(mag) @@ -739,35 +714,18 @@ def process(self): # ellipticity AND size, written under self-naming res-keys: # reconvolution kernel (psf_res) -> *_psf_reconv # original image PSF (psf_orig_res) -> *_psf_orig - # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with - # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma). - def _psf_r50(T_psf, T_psf_err): - sigma = np.sqrt(max(T_psf, 0) / 2) - r50 = SIGMA_TO_R50 * sigma - r50_err = ( - SIGMA_TO_R50 * T_psf_err / (4 * sigma) - if sigma > 0 else np.nan - ) - return r50, r50_err - res['g1_psf_reconv'] = psf_res['g_psf'][0] res['g2_psf_reconv'] = psf_res['g_psf'][1] res['g1_err_psf_reconv'] = psf_res['g_psf_err'][0] res['g2_err_psf_reconv'] = psf_res['g_psf_err'][1] res['T_psf_reconv'] = psf_res['T_psf'] res['T_err_psf_reconv'] = psf_res['T_psf_err'] - res['r50_psf_reconv'], res['r50_err_psf_reconv'] = _psf_r50( - psf_res['T_psf'], psf_res['T_psf_err'] - ) res['g1_psf_orig'] = psf_orig_res['g_psf'][0] res['g2_psf_orig'] = psf_orig_res['g_psf'][1] res['g1_err_psf_orig'] = psf_orig_res['g_psf_err'][0] res['g2_err_psf_orig'] = psf_orig_res['g_psf_err'][1] res['T_psf_orig'] = psf_orig_res['T_psf'] res['T_err_psf_orig'] = psf_orig_res['T_psf_err'] - res['r50_psf_orig'], res['r50_err_psf_orig'] = _psf_r50( - psf_orig_res['T_psf'], psf_orig_res['T_psf_err'] - ) final_res.append(res) n_fitted += 1 count_batch += 1 diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py index c3728cef..b5077d3a 100644 --- a/tests/module/test_make_cat.py +++ b/tests/module/test_make_cat.py @@ -31,15 +31,15 @@ def info(self, *_args, **_kwargs): "mcal_types_fail", "nfev_fit", "g1", "g1_err", "g2", "g2_err", - "T", "T_err", "r50", "r50_err", + "T", "T_err", "flux", "flux_err", "s2n", "mag", "mag_err", "flags", "mcal_flags", "g1_psf_orig", "g2_psf_orig", "g1_err_psf_orig", "g2_err_psf_orig", - "T_psf_orig", "T_err_psf_orig", "r50_psf_orig", "r50_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", "g1_psf_reconv", "g2_psf_reconv", "g1_err_psf_reconv", "g2_err_psf_reconv", - "T_psf_reconv", "T_err_psf_reconv", "r50_psf_reconv", "r50_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", ] SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] @@ -59,7 +59,7 @@ def _ngmix_row(obj_id): "mcal_types_fail": 0, "nfev_fit": 7, "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, - "T": 0.30, "T_err": 0.033, "r50": 0.40, "r50_err": 0.044, + "T": 0.30, "T_err": 0.033, "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, "mag": 21.0, "mag_err": 0.05, "flags": 0, "mcal_flags": 0, @@ -67,12 +67,10 @@ def _ngmix_row(obj_id): "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, - "r50_psf_orig": 0.221, "r50_err_psf_orig": 1.2e-3, # metacal reconvolution kernel — round and enlarged, distinct values "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, - "r50_psf_reconv": 0.252, "r50_err_psf_reconv": 1.0e-3, } diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index 157dc726..9d0e640f 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -150,15 +150,6 @@ def test_metacal_is_reproducible_with_fixed_seed(): ORIG_PSF_T_ERR = 0.0008 -def _psf_r50_pair(T_psf, T_psf_err, sigma_to_r50): - """``(r50, r50_err)`` for a PSF size, mirroring the source conversion.""" - sigma = np.sqrt(max(T_psf, 0) / 2) - return ( - sigma_to_r50 * sigma, - sigma_to_r50 * T_psf_err / (4 * sigma) if sigma > 0 else np.nan, - ) - - def _fake_metacal_result(T, T_err, T_psf, T_psf_err): """Build one minimal metacal result as produced by the process loop. @@ -167,14 +158,7 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): ``*_psf_reconv`` (metacal reconvolution kernel). The reconvolution-kernel size is the builder's ``T_psf`` argument; the original-PSF size is the fixed ``ORIG_PSF_T``, deliberately different so the un-aliasing is tested. - PSF r50 = ``sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T / 2)``. """ - from shapepipe.modules.ngmix_package.ngmix import SIGMA_TO_R50 - - r50_reconv, r50_err_reconv = _psf_r50_pair(T_psf, T_psf_err, SIGMA_TO_R50) - r50_orig, r50_err_orig = _psf_r50_pair( - ORIG_PSF_T, ORIG_PSF_T_ERR, SIGMA_TO_R50 - ) per_type = { "nfev": 1, "g": [0.01, -0.02], @@ -197,8 +181,6 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "g2_err_psf_orig": ORIG_PSF_G_ERR[1], "T_psf_orig": ORIG_PSF_T, "T_err_psf_orig": ORIG_PSF_T_ERR, - "r50_psf_orig": r50_orig, - "r50_err_psf_orig": r50_err_orig, # metacal reconvolution-kernel family "g1_psf_reconv": RECONV_PSF_G[0], "g2_psf_reconv": RECONV_PSF_G[1], @@ -206,8 +188,6 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "g2_err_psf_reconv": RECONV_PSF_G_ERR[1], "T_psf_reconv": T_psf, "T_err_psf_reconv": T_psf_err, - "r50_psf_reconv": r50_reconv, - "r50_err_psf_reconv": r50_err_reconv, "mcal_flags": 0, } res.update( @@ -216,16 +196,16 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): return res -def test_compile_results_size_columns_are_half_light_radii(): - """Every r50 column is a true half-light radius, on every side. +def test_compile_results_size_columns_are_unaliased(): + """Each PSF family carries its OWN fitted area ``T``, un-aliased (#749). - Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``) and both - PSF families ``r50 = sqrt(2 ln 2) * sigma`` (not bare sigma), in the same - convention so galaxy / PSF radii are commensurable. The reconvolution - kernel (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are - independent fits with their own, different sizes (shapepipe#749). + The galaxy area ``T`` is the fitted ``pars[4]``; the reconvolution kernel + (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are + independent fits with their own, different sizes. Half-light radius is no + longer carried in the catalogue — it is derivable from ``T`` downstream + via :func:`cs_util.size.T_to_r50` — so only the ``T`` columns are checked. """ - from shapepipe.modules.ngmix_package.ngmix import Ngmix, SIGMA_TO_R50 + from shapepipe.modules.ngmix_package.ngmix import Ngmix T, T_err = 0.18, 0.02 T_psf, T_psf_err = 0.09, 0.001 @@ -236,38 +216,18 @@ def test_compile_results_size_columns_are_half_light_radii(): noshear = out["noshear"] - # galaxy: r50 = sqrt(ln2 * T) = 1.17741 * sqrt(T / 2), error dT * r50/(2T) - r50_expected = np.sqrt(np.log(2) * T) - npt.assert_allclose(noshear["r50"], [r50_expected]) - npt.assert_allclose(noshear["r50_err"], [r50_expected * T_err / (2 * T)]) + # galaxy area is the fitted T + npt.assert_allclose(noshear["T"], [T]) + npt.assert_allclose(noshear["T_err"], [T_err]) - # reconvolution kernel: r50 = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) - sigma_reconv = np.sqrt(T_psf / 2) - npt.assert_allclose(noshear["r50_psf_reconv"], [SIGMA_TO_R50 * sigma_reconv]) - npt.assert_allclose( - noshear["r50_err_psf_reconv"], - [SIGMA_TO_R50 * T_psf_err / (4 * sigma_reconv)], - ) + # reconvolution kernel npt.assert_allclose(noshear["T_psf_reconv"], [T_psf]) npt.assert_allclose(noshear["T_err_psf_reconv"], [T_psf_err]) - # galaxy/reconv-PSF r50 are commensurable: same convention on both sides - npt.assert_allclose( - np.array(noshear["r50"]) / np.array(noshear["r50_psf_reconv"]), - [np.sqrt(T / T_psf)], - ) - # original image PSF carries its OWN size, un-aliased from the kernel - sigma_orig = np.sqrt(ORIG_PSF_T / 2) npt.assert_allclose(noshear["T_psf_orig"], [ORIG_PSF_T]) npt.assert_allclose(noshear["T_err_psf_orig"], [ORIG_PSF_T_ERR]) - npt.assert_allclose(noshear["r50_psf_orig"], [SIGMA_TO_R50 * sigma_orig]) - npt.assert_allclose( - noshear["r50_err_psf_orig"], - [SIGMA_TO_R50 * ORIG_PSF_T_ERR / (4 * sigma_orig)], - ) assert noshear["T_psf_orig"] != noshear["T_psf_reconv"] - assert noshear["r50_psf_orig"] != noshear["r50_psf_reconv"] def test_compile_results_psf_families_are_unaliased(): @@ -330,7 +290,7 @@ def test_compile_results_nan_fills_failed_fit_types(): failed = out["1p"] for col in ( "g1", "g2", "g1_err", "g2_err", "T", "T_err", "flux", "flux_err", - "s2n", "mag", "mag_err", "r50", "r50_err", + "s2n", "mag", "mag_err", ): assert np.isnan(failed[col]).all(), col assert failed["flags"] == [0x8] @@ -359,18 +319,6 @@ def test_get_mcal_flags_ors_per_type_fit_flags(): assert get_mcal_flags(res) == 0xA -def test_compile_results_nonpositive_T_gives_nan_r50(): - """A non-positive fitted area cannot yield a real half-light radius.""" - from shapepipe.modules.ngmix_package.ngmix import Ngmix - - inst = object.__new__(Ngmix) - inst._zero_point = 30.0 - out = inst.compile_results([_fake_metacal_result(-0.05, 0.02, 0.09, 0.001)]) - - assert np.isnan(out["noshear"]["r50"]).all() - assert np.isnan(out["noshear"]["r50_err"]).all() - - def test_average_multiepoch_psf_skips_failed_psf_epochs(): """A failed-PSF epoch must be skipped, not KeyError the whole object. From 85db8c134e366df0d131583500e28db698aa498f Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 00:35:03 +0200 Subject: [PATCH 08/30] tests: hypothesis property tests for the PSF column refactor Four property-test modules, each mutation-verified non-vacuous: - grammar: orig/reconv never crossed for any distinct inputs; every emitted column matches the NGMIX_[_ERR]_[_] grammar; every NGMIX token in final_cat.param is producible by the writer. - physics: T_reconv > T_orig and |g_reconv| <= |g_orig| over random elliptical PSFs (the dilation clause is the robust transposition catcher). - no-op: average_original_psf leaves gal_obs.psf pristine (no gmix leak), so the restored original-PSF fit cannot alter the galaxy/shear results. - averaging: epoch-weighted mean within per-epoch range, flagged epochs excluded, single survivor passes through, sentinel fills for absent objects. Co-Authored-By: Claude Opus 4.8 --- tests/module/test_psf_averaging_properties.py | 329 ++++++++++++++++ tests/module/test_psf_grammar_properties.py | 353 ++++++++++++++++++ tests/module/test_psf_noop_property.py | 180 +++++++++ tests/module/test_psf_physics_properties.py | 116 ++++++ 4 files changed, 978 insertions(+) create mode 100644 tests/module/test_psf_averaging_properties.py create mode 100644 tests/module/test_psf_grammar_properties.py create mode 100644 tests/module/test_psf_noop_property.py create mode 100644 tests/module/test_psf_physics_properties.py diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py new file mode 100644 index 00000000..25c0fc77 --- /dev/null +++ b/tests/module/test_psf_averaging_properties.py @@ -0,0 +1,329 @@ +"""PROPERTY-BASED TESTS FOR PSF EPOCH-AVERAGING + make_cat SENTINELS. + +The two PSF families this module exports (the metacal reconvolution kernel via +``average_multiepoch_psf`` and the original image PSF via +``average_original_psf``) share a single averaging core, +:func:`shapepipe.modules.ngmix_package.ngmix._average_psf_fits`. These +hypothesis properties pin the contract of that core directly — a weighted mean +over the surviving (``flags == 0``) epochs — and the companion sentinel-fill +contract on the make_cat reader, which must leave an obj_id absent from the +ngmix catalogue at its type-specific sentinel. + +Strategies are kept in physically sensible ranges (positive epoch weights, +positive sizes ``T``, ``|g| < 1``) so every generated input is a valid PSF-fit +result the averaging core would actually be handed in production. +""" + +import os +import tempfile +from itertools import zip_longest + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue +from shapepipe.modules.ngmix_package.ngmix import _average_psf_fits + + +# --------------------------------------------------------------------------- # +# Strategies for one valid per-epoch PSF-fit result. +# --------------------------------------------------------------------------- # + +# Bounded, non-degenerate floats: positive sizes, |g| < 1, strictly positive +# weights. min_magnitude keeps weights away from 0 so wsum can never vanish on +# the surviving epochs (the core raises ZeroDivisionError there by contract). +_g_component = st.floats(min_value=-0.9, max_value=0.9, allow_nan=False) +_positive_T = st.floats(min_value=1e-3, max_value=10.0, allow_nan=False) +_positive_err = st.floats(min_value=1e-6, max_value=1.0, allow_nan=False) +_weight = st.floats( + min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False +) + + +def _result(g1, g2, t, g1_err, g2_err, t_err, flags=0): + """One ngmix PSF-fit result dict, as the averaging core consumes it.""" + return { + "flags": flags, + "g": np.array([g1, g2]), + "g_err": np.array([g1_err, g2_err]), + "T": t, + "T_err": t_err, + } + + +@st.composite +def _good_epoch(draw): + """A surviving (flags == 0) epoch paired with its positive weight.""" + return ( + _result( + draw(_g_component), draw(_g_component), draw(_positive_T), + draw(_positive_err), draw(_positive_err), draw(_positive_err), + ), + draw(_weight), + ) + + +# --------------------------------------------------------------------------- # +# (a) weighted average lies within [min, max] of the per-epoch values. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given(st.lists(_good_epoch(), min_size=1, max_size=8)) +def test_average_lies_within_per_epoch_range(epochs): + """A positive-weight weighted mean is a convex combination of its inputs. + + For every averaged component (g1, g2, T) the result must sit within the + [min, max] envelope of the per-epoch values. This is the defining property + of a weighted mean with strictly positive weights; it fails immediately if + the core ever divided by the wrong weight sum, summed unweighted, or let a + value escape the convex hull. + """ + out = _average_psf_fits(epochs) + + g1_vals = np.array([r["g"][0] for r, _ in epochs]) + g2_vals = np.array([r["g"][1] for r, _ in epochs]) + t_vals = np.array([r["T"] for r, _ in epochs]) + + # rtol absorbs only floating-point round-off, not a real bound violation. + npt.assert_array_less(out["g_psf"][0], g1_vals.max() + 1e-9) + npt.assert_array_less(g1_vals.min() - 1e-9, out["g_psf"][0]) + npt.assert_array_less(out["g_psf"][1], g2_vals.max() + 1e-9) + npt.assert_array_less(g2_vals.min() - 1e-9, out["g_psf"][1]) + npt.assert_array_less(out["T_psf"], t_vals.max() + 1e-9) + npt.assert_array_less(t_vals.min() - 1e-9, out["T_psf"]) + + assert out["n_epoch"] == len(epochs) + + +@settings(deadline=None, max_examples=50) +@given(st.lists(_good_epoch(), min_size=1, max_size=8)) +def test_average_matches_explicit_weighted_mean(epochs): + """The core's output equals the textbook weighted mean of the survivors. + + Stronger than the [min, max] envelope: pins the exact value, so a swap of + weighting factor or an off-by-one in the accumulation is caught. + """ + out = _average_psf_fits(epochs) + w = np.array([weight for _, weight in epochs]) + + g = np.array([r["g"] for r, _ in epochs]) + t = np.array([r["T"] for r, _ in epochs]) + npt.assert_allclose(out["g_psf"], (g * w[:, None]).sum(0) / w.sum()) + npt.assert_allclose(out["T_psf"], (t * w).sum() / w.sum()) + + +# --------------------------------------------------------------------------- # +# (b) epochs with flags != 0 are excluded from the average. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given( + st.lists(_good_epoch(), min_size=1, max_size=6), + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + min_size=1, + max_size=6, + ), +) +def test_flagged_epochs_are_excluded(good_epochs, flagged_specs): + """A failed-PSF epoch (flags != 0) must not enter the average at all. + + Each flagged epoch carries poisoned NaN measurement fields and a huge, + out-of-range T — values that would wreck the mean (NaN-poison it, or shove + it far outside the good-epoch envelope) if they leaked in. The averaged + result and n_epoch must match a clean average over the good epochs only, + proving the flagged ones were dropped, not merely down-weighted. + """ + flagged = [ + ( + _result( + np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=flags + ), + weight, + ) + for weight, flags in flagged_specs + ] + # Interleave good and flagged epochs WITHOUT duplicating any good epoch, so + # exclusion can't be a happy accident of ordering. itertools.zip_longest + # threads them together; the leftover tail of the longer list follows. + mixed = [ + e + for pair in zip_longest(good_epochs, flagged) + for e in pair + if e is not None + ] + + out = _average_psf_fits(mixed) + expected = _average_psf_fits(good_epochs) + + npt.assert_allclose(out["g_psf"], expected["g_psf"]) + npt.assert_allclose(out["T_psf"], expected["T_psf"]) + npt.assert_allclose(out["T_psf_err"], expected["T_psf_err"]) + assert out["n_epoch"] == len(good_epochs) + assert np.isfinite(out["g_psf"]).all() and np.isfinite(out["T_psf"]) + + +# --------------------------------------------------------------------------- # +# (c) a single surviving epoch returns that epoch's value. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given( + _good_epoch(), + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + max_size=5, + ), +) +def test_single_survivor_returns_its_own_value(survivor, flagged_specs): + """When exactly one epoch survives, its values pass through untouched. + + The lone survivor's weight cancels in mean = (v*w)/w, so the result must be + the survivor's value exactly, regardless of how many flagged epochs (with + arbitrary weights) surround it. n_epoch must be 1. + """ + result, weight = survivor + flagged = [ + (_result(np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=f), w) + for w, f in flagged_specs + ] + out = _average_psf_fits(flagged + [survivor] + flagged) + + npt.assert_allclose(out["g_psf"], result["g"]) + npt.assert_allclose(out["g_psf_err"], result["g_err"]) + npt.assert_allclose(out["T_psf"], result["T"]) + npt.assert_allclose(out["T_psf_err"], result["T_err"]) + assert out["n_epoch"] == 1 + + +# --------------------------------------------------------------------------- # +# (d) make_cat: an obj_id absent from the ngmix cat keeps its sentinel fill. +# --------------------------------------------------------------------------- # + +# The sentinel value each column family is pre-filled with before the matched +# rows are overwritten (mirrors make_cat._save_ngmix_data). The property: a row +# whose obj_id never appears among the ngmix ids keeps exactly these. +_SENTINELS = { + "NGMIX_T_GAL_NOSHEAR": 0.0, + "NGMIX_SNR_GAL_NOSHEAR": 0.0, + "NGMIX_FLAGS_GAL_NOSHEAR": 0.0, + "NGMIX_T_PSF_ORIG_NOSHEAR": 0.0, + "NGMIX_T_PSF_RECONV_NOSHEAR": 0.0, + "NGMIX_FLUX_ERR_GAL_NOSHEAR": -1.0, + "NGMIX_MAG_ERR_GAL_NOSHEAR": -1.0, + "NGMIX_G1_GAL_NOSHEAR": -10.0, + "NGMIX_G2_GAL_NOSHEAR": -10.0, + "NGMIX_G1_PSF_ORIG_NOSHEAR": -10.0, + "NGMIX_G1_PSF_RECONV_NOSHEAR": -10.0, + "NGMIX_T_ERR_GAL_NOSHEAR": 1e30, + "NGMIX_T_ERR_PSF_ORIG_NOSHEAR": 1e30, + "NGMIX_T_ERR_PSF_RECONV_NOSHEAR": 1e30, + "NGMIX_N_EPOCH": 0.0, + "NGMIX_MCAL_FLAGS": 0.0, + "NGMIX_MCAL_TYPES_FAIL": 0.0, +} + +# Per-key write format and a measured value distinct from every sentinel, so a +# matched row is unmistakably "overwritten" and an absent row unmistakably not. +_NGMIX_KEYS = [ + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", + "g1", "g1_err", "g2", "g2_err", "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] +_INT_KEYS = { + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", "flags", "mcal_flags" +} +_SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] + + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +def _measured_row(obj_id): + """One fit object whose every value is far from any sentinel (5 / 0.5).""" + row = {key: (5 if key in _INT_KEYS else 0.5) for key in _NGMIX_KEYS} + row["id"] = obj_id + return row + + +def _write_ngmix_cat(path, obj_ids): + rows = [_measured_row(oid) for oid in obj_ids] + hdus = [fits.PrimaryHDU()] + for ext in _SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in _INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in _NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_id): + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_id) + inst._output_dict = {} + inst._cat_size_target = len(inst._obj_id) + inst._w_log = _NullLogger() + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +# Distinct positive integer obj_ids; split into "fit by ngmix" vs "absent". +_distinct_ids = st.lists( + st.integers(min_value=1, max_value=10_000), + min_size=2, + max_size=8, + unique=True, +) + + +@settings(deadline=None, max_examples=25) +@given(_distinct_ids, st.data()) +def test_absent_obj_id_keeps_sentinel_fill(all_ids, data): + """An obj_id SExtractor saw but ngmix never fit keeps every sentinel. + + The final catalogue carries all of ``all_ids``; ngmix fit only a non-empty + proper subset. The unfit rows must retain the exact per-column sentinel + (0 / -10 / 1e30 / -1), while the fit rows must have been overwritten to the + measured value — so this is not vacuously satisfied by an all-sentinel cat. + """ + fit_ids = data.draw( + st.lists(st.sampled_from(all_ids), min_size=1, unique=True).filter( + lambda s: 0 < len(s) < len(all_ids) + ) + ) + absent_ids = [oid for oid in all_ids if oid not in fit_ids] + + with tempfile.TemporaryDirectory() as tmp: + ngmix_path = os.path.join(tmp, "ngmix.fits") + _write_ngmix_cat(ngmix_path, fit_ids) + out = _run_save_ngmix(ngmix_path, all_ids) + + absent_idx = [all_ids.index(oid) for oid in absent_ids] + fit_idx = [all_ids.index(oid) for oid in fit_ids] + + for col, sentinel in _SENTINELS.items(): + arr = np.asarray(out[col]) + npt.assert_allclose( + arr[absent_idx], sentinel, + err_msg=f"{col}: absent rows lost their sentinel {sentinel}", + ) + # The fit rows were overwritten — measured value 5 (int cols) or 0.5, + # both distinct from every sentinel, so the fill wasn't global. + assert not np.any(np.isclose(arr[fit_idx], sentinel)), ( + f"{col}: a fit row still carries the sentinel {sentinel}" + ) diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py new file mode 100644 index 00000000..5123d7f3 --- /dev/null +++ b/tests/module/test_psf_grammar_properties.py @@ -0,0 +1,353 @@ +"""PROPERTY-BASED TESTS FOR THE NGMIX PSF COLUMN GRAMMAR. + +Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under +hypothesis to pin three invariants of the renamed grammar +``NGMIX[m]_[_ERR]_[_]`` (shapepipe#749), where the +ORIGINAL image PSF (``PSF_ORIG``) and the metacal RECONVOLUTION kernel +(``PSF_RECONV``) are independent fits of *different* PSFs: + +(a) un-aliasing holds for ALL inputs: for arbitrary distinct values fed as the + orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column + traces to its orig source and every ``*_PSF_RECONV_*`` to its reconv + source, and the two families are never crossed; +(b) every column name the writer emits matches the grammar regex; +(c) every ``NGMIX_*`` token the shipped param file + (``example/cfis/final_cat.param``) names is a column the writer can + produce — writer/param-file consistency. + +Setup mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with +the five shear-type extensions carrying the exact ``compile_results`` key set, +read back through ``_save_ngmix_data``. +""" + +import re +from pathlib import Path + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue + + +# --------------------------------------------------------------------------- +# Harness (mirrors tests/module/test_make_cat.py) +# --------------------------------------------------------------------------- + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] + +# The full per-object key set ngmix.compile_results emits into every +# shear-type extension. Integer-typed keys carry FITS format "K", the rest +# "D"; everything else mirrors test_make_cat._ngmix_row. +INT_KEYS = { + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", "flags", "mcal_flags", +} +NGMIX_KEYS = [ + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", + "g1", "g1_err", "g2", "g2_err", + "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", + "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", + "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", + "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] + +# The six orig-PSF source keys and the six reconv-PSF source keys, paired by +# the output column they feed. Each pair is (output-column infix, source key); +# the infix slots into f"NGMIX_{infix}_{shear}". Holding this mapping +# explicitly is what makes the un-aliasing property check each column against +# its OWN source rather than against a single family-wide value, so a within- +# family swap (e.g. G1 <-> T, or value <-> error) is caught too. +ORIG_COLS = { + "G1_PSF_ORIG": "g1_psf_orig", + "G2_PSF_ORIG": "g2_psf_orig", + "G1_ERR_PSF_ORIG": "g1_err_psf_orig", + "G2_ERR_PSF_ORIG": "g2_err_psf_orig", + "T_PSF_ORIG": "T_psf_orig", + "T_ERR_PSF_ORIG": "T_err_psf_orig", +} +RECONV_COLS = { + "G1_PSF_RECONV": "g1_psf_reconv", + "G2_PSF_RECONV": "g2_psf_reconv", + "G1_ERR_PSF_RECONV": "g1_err_psf_reconv", + "G2_ERR_PSF_RECONV": "g2_err_psf_reconv", + "T_PSF_RECONV": "T_psf_reconv", + "T_ERR_PSF_RECONV": "T_err_psf_reconv", +} + + +def _base_row(obj_id): + """One object's per-key values; PSF keys overwritten by the caller.""" + return { + "id": obj_id, + "n_epoch_model": 3, "mcal_types_fail": 0, "nfev_fit": 7, + "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, + "T": 0.30, "T_err": 0.033, + "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, + "mag": 21.0, "mag_err": 0.05, + "flags": 0, "mcal_flags": 0, + "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, + "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, + "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, + "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, + "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, + "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, + } + + +def _write_ngmix_cat(path, rows): + """Write a synthetic ngmix FITS with the five shear-type extensions. + + Each extension carries the exact compile_results key set; PSF families are + object-level, so the same per-object values are written across extensions. + """ + hdus = [fits.PrimaryHDU()] + for ext in SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): + """Drive _save_ngmix_data and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_ids) + inst._output_dict = {} + inst._cat_size_target = ( + len(inst._obj_id) if cat_size_target is None else cat_size_target + ) + inst._w_log = _NullLogger() + + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +# --------------------------------------------------------------------------- +# Grammar regex + producible-column model +# --------------------------------------------------------------------------- + +# NGMIX[m]_[_ERR]__, plus the three per-object +# metadata columns that carry neither OBJECT nor SHEAR. Anchored so a stray +# legacy token (_PSFo, _Tpsf, NGMIX_ELL_*) fails to match. +_COMPONENT = "G1|G2|T|SNR|FLUX|MAG|FLAGS" +_OBJECT = "GAL|PSF_ORIG|PSF_RECONV" +_SHEAR = "NOSHEAR|1P|1M|2P|2M" +GRAMMAR_RE = re.compile( + rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?_(?:{_OBJECT})_(?:{_SHEAR})$" + rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" +) + +# example/cfis/final_cat.param lives two levels up from tests/module/. +PARAM_PATH = ( + Path(__file__).resolve().parents[2] / "example" / "cfis" / "final_cat.param" +) + +# The one param-file NGMIX token outside the _save_ngmix_data grammar: the +# moments-failure flag, set by a different code path (not the metacal writer). +# Named explicitly so the consistency check stays honest — it is excluded by +# name, not silently dropped, and the test asserts it really is absent from the +# producible set so this carve-out can never mask a real grammar regression. +NON_WRITER_PARAM_TOKENS = {"NGMIX_MOM_FAIL"} + + +def _param_ngmix_tokens(): + """Every distinct NGMIX_* token named in the shipped param file.""" + text = PARAM_PATH.read_text() + return set(re.findall(r"\bNGMIX[A-Za-z0-9_]*", text)) + + +def _produced_columns(obj_ids): + """The set of column names the writer emits for these obj_ids.""" + rows = [_base_row(oid) for oid in obj_ids] + import tempfile + with tempfile.NamedTemporaryFile(suffix=".fits", delete=False) as fh: + path = fh.name + _write_ngmix_cat(path, rows) + out = _run_save_ngmix(path, obj_ids) + Path(path).unlink() + return set(out) + + +# --------------------------------------------------------------------------- +# Strategies (physically-sensible ranges so generated inputs are valid) +# --------------------------------------------------------------------------- + +# Distinct, finite object NUMBERs. +obj_ids_strategy = st.lists( + st.integers(min_value=1, max_value=10_000), + min_size=1, max_size=5, unique=True, +) + +# A finite, well-separated value: ellipticity-/size-scale magnitudes the writer +# copies verbatim. Bounded away from zero and from each other (see below). +_finite = st.floats( + min_value=-1.0, max_value=1.0, + allow_nan=False, allow_infinity=False, +).filter(lambda x: abs(x) > 1e-6) + + +# --------------------------------------------------------------------------- +# (a) Un-aliasing for ALL inputs +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given( + orig_seed=_finite, + reconv_seed=_finite, + obj_ids=obj_ids_strategy, +) +def test_psf_orig_reconv_unaliased_for_all_distinct_sources( + orig_seed, reconv_seed, obj_ids, tmp_path_factory +): + """Orig/reconv PSF columns trace to their OWN source, never crossed (#749). + + For arbitrary distinct seeds, fill the six orig-PSF source keys with values + derived from ``orig_seed`` and the six reconv-PSF source keys from + ``reconv_seed`` — each of the twelve keys a DISTINCT value (seed times a + per-key multiplier). Then assert every emitted ``*_PSF_ORIG_*`` / + ``*_PSF_RECONV_*`` column equals the value of the exact source key it must + read. A family swap (orig<->reconv) OR a within-family swap (G1<->T, + value<->error) breaks at least one equality, so the property is non-vacuous. + """ + # Multipliers so each of the six keys per family gets its own value; the + # two families also never collide because orig_seed != reconv_seed is + # enforced below and the multiplier set is shared. + mult = { + "g1": 1.0, "g2": 1.3, "g1_err": 1.7, "g2_err": 2.1, + "T": 2.6, "T_err": 3.1, + } + # Guard against orig_seed and reconv_seed mapping any key to equal values + # (would make a cross undetectable for that key). assume() would discard; + # instead nudge reconv so the families are provably disjoint per key. + if abs(orig_seed - reconv_seed) < 1e-3: + reconv_seed = reconv_seed + (0.5 if reconv_seed < 0 else -0.5) + + def _src_val(suffix, family_seed): + # suffix e.g. "g1", "g1_err", "T"; matched to a multiplier by stripping + # the "_psf_orig"/"_psf_reconv" tail the caller already removed. + return family_seed * mult[suffix] + + rows = [] + for oid in obj_ids: + row = _base_row(oid) + for col_infix, src_key in ORIG_COLS.items(): + suffix = src_key.replace("_psf_orig", "") + row[src_key] = _src_val(suffix, orig_seed) + for col_infix, src_key in RECONV_COLS.items(): + suffix = src_key.replace("_psf_reconv", "") + row[src_key] = _src_val(suffix, reconv_seed) + rows.append(row) + + ngmix_path = tmp_path_factory.mktemp("ng") / "ngmix.fits" + _write_ngmix_cat(ngmix_path, rows) + out = _run_save_ngmix(ngmix_path, obj_ids) + + n = len(obj_ids) + for shear in SHEAR_EXTS: + for col_infix, src_key in ORIG_COLS.items(): + suffix = src_key.replace("_psf_orig", "") + npt.assert_allclose( + out[f"NGMIX_{col_infix}_{shear}"], + [_src_val(suffix, orig_seed)] * n, + err_msg=f"NGMIX_{col_infix}_{shear} not from {src_key}", + ) + for col_infix, src_key in RECONV_COLS.items(): + suffix = src_key.replace("_psf_reconv", "") + npt.assert_allclose( + out[f"NGMIX_{col_infix}_{shear}"], + [_src_val(suffix, reconv_seed)] * n, + err_msg=f"NGMIX_{col_infix}_{shear} not from {src_key}", + ) + # The two families are genuinely distinct everywhere — the un-aliasing. + assert not np.allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + ) + assert not np.allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], + out[f"NGMIX_T_PSF_RECONV_{shear}"], + ) + + +# --------------------------------------------------------------------------- +# (b) Every emitted column name matches the grammar +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy) +def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): + """Every column _save_ngmix_data emits matches the grammar regex. + + The grammar is ``NGMIX[m]_[_ERR]__`` plus the + three metadata columns. The regex is anchored, so a legacy token (``_PSFo``, + ``_Tpsf``, ``NGMIX_ELL_*``) or a malformed name would fail to match. Run + over hypothesis-varied object sets to confirm the emitted name set is + input-independent and always well-formed. + """ + rows = [_base_row(oid) for oid in obj_ids] + ngmix_path = tmp_path_factory.mktemp("ng") / "ngmix.fits" + _write_ngmix_cat(ngmix_path, rows) + out = _run_save_ngmix(ngmix_path, obj_ids) + + assert out, "writer produced no columns" + for col in out: + assert GRAMMAR_RE.match(col), f"off-grammar column emitted: {col!r}" + + +# --------------------------------------------------------------------------- +# (c) Param-file tokens are all producible by the writer +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=15) +@given(obj_ids=obj_ids_strategy) +def test_param_file_ngmix_tokens_are_producible(obj_ids): + """Every NGMIX_* token the param file names is a column the writer produces. + + ``example/cfis/final_cat.param`` is the consumer contract for the final + catalogue; ``create_final_cat`` keeps only the listed columns, so a token it + names that the writer cannot emit is a silent, empty column downstream. The + one known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a + different path — is excluded BY NAME, and we assert it is genuinely outside + the producible set so the carve-out cannot hide a real grammar regression. + + Driven over varied object sets to confirm the producible set is stable + across inputs (the contract is structural, not data-dependent). + """ + produced = _produced_columns(obj_ids) + param_tokens = _param_ngmix_tokens() + + # The carve-out is real: the excluded token is NOT producible. (If a future + # change made the writer emit it, this fails and we drop the exclusion.) + for tok in NON_WRITER_PARAM_TOKENS: + assert tok not in produced, ( + f"{tok} is now producible — remove it from NON_WRITER_PARAM_TOKENS" + ) + + missing = (param_tokens - NON_WRITER_PARAM_TOKENS) - produced + assert not missing, ( + f"param file names NGMIX columns the writer cannot produce: " + f"{sorted(missing)}" + ) + + # Non-vacuous: the writer DOES cover real grammar tokens from the param + # file (so the assertion above is exercised against a non-empty set). + assert (param_tokens - NON_WRITER_PARAM_TOKENS) & produced diff --git a/tests/module/test_psf_noop_property.py b/tests/module/test_psf_noop_property.py new file mode 100644 index 00000000..003efcaf --- /dev/null +++ b/tests/module/test_psf_noop_property.py @@ -0,0 +1,180 @@ +"""PROPERTY-BASED TESTS: the original-PSF pre-fit is a science no-op. + +``average_original_psf`` fits each epoch's psfex/mccd PSF stamp +(``gal_obs.psf``) with the shared ``psf_runner`` to populate the independent +original-PSF columns (``NGMIX_*_PSF_ORIG``; shapepipe#749). It fits a *copy* of +each PSF observation, so the observation metacal later deep-copies and consumes +(``boot.go(gal_obs_list)``) is left untouched. + +This is the invariant that makes the add-column refactor bit-identical to a +no-original-fit branch on the galaxy/shear results: if the pre-fit cannot alter +``gal_obs.psf``, it cannot alter what metacal consumes, so the galaxy results +are unchanged. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the +observation it fits; were that ``gal_obs.psf`` itself, a stray gmix would +survive metacal's deep copy and be reused as the ``MetacalFitGaussPSF`` fallback +when admom+ML both fail, silently rescuing objects the base branch dropped +(``BootPSFFailure``). + +The unit guard ``test_original_psf_prefit_leaves_gal_obs_psf_pristine`` in +``tests/module/test_ngmix.py`` pins this on one fixed input; here it is +generalised over varied PSF shapes, noise, epoch counts, stamp sizes, and RNG +seeds with hypothesis, so the no-op holds across the input space rather than at +a single point. The harness (``make_data`` -> ``make_ngmix_observation`` -> +``make_runners`` -> ``average_original_psf``) mirrors that unit test exactly. +""" + +import copy + +from hypothesis import given, settings +from hypothesis import strategies as st +import numpy as np +import numpy.testing as npt +from ngmix.observation import ObsList + +from shapepipe.modules.ngmix_package.ngmix import ( + average_original_psf, + get_prior, + make_ngmix_observation, + make_runners, +) +from shapepipe.testing.simulate import make_data + +PIXEL_SCALE = 0.1857 + + +def _jacobian_snapshot(jacobian): + """The six floats fully defining an ngmix Jacobian (affine + origin).""" + return ( + jacobian.dvdrow, jacobian.dvdcol, + jacobian.dudrow, jacobian.dudcol, + jacobian.row0, jacobian.col0, + ) + + +def _build_gal_obs_list(psf_shear, noise, n_epochs, img_size, data_seed, obs_seed): + """Build the per-epoch galaxy ObsList metacal would consume. + + Mirrors the construction in + ``test_original_psf_prefit_leaves_gal_obs_psf_pristine``: simulate epochs + with ``make_data`` (here over hypothesis-varied PSF shape, noise, epoch + count, stamp size, and seed), then wrap each through + ``make_ngmix_observation`` so each ``gal_obs.psf`` is a real, freshly built + PSF observation carrying no gmix. + """ + rng = np.random.RandomState(obs_seed) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(data_seed), + shear=(0.0, 0.0), + noise=noise, + n_epochs=n_epochs, + img_size=img_size, + psf_shear=psf_shear, + ) + gal_obs_list = ObsList() + for n_e in range(n_epochs): + gal_obs_list.append( + make_ngmix_observation( + gals[n_e], weights[n_e], flags[n_e], psfs[n_e], jacobs[n_e], + rng, + ) + ) + return gal_obs_list + + +# Physically sensible strategies: |g| well below 1 (Moffat shear must stay +# realisable), positive noise, >=1 epoch, odd stamp large enough to host the +# 0.55" FWHM PSF, and bounded RNG seeds. +psf_g_component = st.floats( + min_value=-0.3, max_value=0.3, allow_nan=False, allow_infinity=False +) +noise_strategy = st.floats( + min_value=1e-6, max_value=1e-3, allow_nan=False, allow_infinity=False +) +n_epochs_strategy = st.integers(min_value=1, max_value=3) +img_size_strategy = st.sampled_from([41, 51, 65]) +seed_strategy = st.integers(min_value=0, max_value=2**31 - 1) + + +@given( + g1=psf_g_component, + g2=psf_g_component, + noise=noise_strategy, + n_epochs=n_epochs_strategy, + img_size=img_size_strategy, + data_seed=seed_strategy, + obs_seed=seed_strategy, +) +@settings(deadline=None, max_examples=25) +def test_average_original_psf_leaves_gal_obs_psf_pristine_property( + g1, g2, noise, n_epochs, img_size, data_seed, obs_seed +): + """For arbitrary inputs, the original-PSF pre-fit leaves gal_obs.psf pristine. + + The no-op invariant that guarantees bit-identical galaxy/shear results: + after ``average_original_psf``, every epoch's ``gal_obs.psf`` is unchanged + in every channel metacal reads — + + * ``has_gmix()`` is still False (no fitted gmix leaked in), and no fit + ``result`` leaked into ``.meta``; + * the PSF ``image``, ``weight``, and ``jacobian`` are bit-for-bit what they + were before the call. + + Each assertion is load-bearing: were the pre-fit to fit ``gal_obs.psf`` + itself instead of a copy, ``PSFRunner.go`` would set ``.gmix`` / + ``.meta['result']`` (first two assertions catch that), and centroid/weight + handling inside the fit could perturb the arrays (the array/jacobian + snapshots catch that). The fit still *runs* on every epoch — this pins that + it runs harmlessly on a copy, not that it is skipped. + """ + psf_shear = (g1, g2) + rng = np.random.RandomState(obs_seed) + prior = get_prior(PIXEL_SCALE, rng) + gal_obs_list = _build_gal_obs_list( + psf_shear, noise, n_epochs, img_size, data_seed, obs_seed + ) + + # Pre-state: a freshly built PSF observation carries no gmix or result, and + # we snapshot every array channel metacal will consume. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list) + pre = [ + { + "image": obs.psf.image.copy(), + "weight": obs.psf.weight.copy(), + "jacobian": _jacobian_snapshot(obs.psf.jacobian), + "meta": copy.deepcopy(dict(obs.psf.meta)), + } + for obs in gal_obs_list + ] + + runner, psf_runner = make_runners(prior, 1.0, rng) + psf_orig_res = average_original_psf(gal_obs_list, psf_runner) + + # The pre-fit actually did work (so the no-op is non-vacuous): at least one + # epoch's PSF was fit and averaged in. + assert psf_orig_res["n_epoch"] >= 1 + + for obs, snapshot in zip(gal_obs_list, pre): + psf = obs.psf + # No fitted gmix leaked into the observation metacal consumes. + assert not psf.has_gmix(), ( + "original-PSF pre-fit leaked a gmix into gal_obs.psf" + ) + # No fit result leaked into its metadata. + assert "result" not in psf.meta, ( + "original-PSF pre-fit leaked a fit result into gal_obs.psf.meta" + ) + assert dict(psf.meta) == snapshot["meta"], ( + "original-PSF pre-fit mutated gal_obs.psf.meta" + ) + # The image, weight, and jacobian metacal reads are bit-for-bit intact. + npt.assert_array_equal( + psf.image, snapshot["image"], + err_msg="original-PSF pre-fit mutated gal_obs.psf.image", + ) + npt.assert_array_equal( + psf.weight, snapshot["weight"], + err_msg="original-PSF pre-fit mutated gal_obs.psf.weight", + ) + assert _jacobian_snapshot(psf.jacobian) == snapshot["jacobian"], ( + "original-PSF pre-fit mutated gal_obs.psf.jacobian" + ) diff --git a/tests/module/test_psf_physics_properties.py b/tests/module/test_psf_physics_properties.py new file mode 100644 index 00000000..6b67f164 --- /dev/null +++ b/tests/module/test_psf_physics_properties.py @@ -0,0 +1,116 @@ +"""PROPERTY-BASED TESTS FOR NGMIX METACAL PSF PHYSICS. + +Hypothesis tests over *random elliptical PSF stamps*. The metacal +reconvolution kernel is round and dilated by construction, so for ANY input +PSF ellipticity / size it must come back rounder and larger than the original +image-PSF fit: + + T_reconv >= T_orig and |g_reconv| <= |g_orig|. + +This is a genuine metacal physics invariant (shapepipe#749) and, because the +two PSF families (``reconv`` -> ``average_multiepoch_psf``, ``orig`` -> +``average_original_psf``) share key names, it also catches any orig/reconv +routing transposition in :func:`do_ngmix_metacal` — a swap that is otherwise +silent. The single-example sibling lives in +``test_ngmix.test_reconv_psf_is_rounder_and_larger_than_orig_on_elliptical_psf``; +this widens it to a swept family of PSFs. +""" + +import numpy as np +from hypothesis import given, settings +from hypothesis import strategies as st + + +def _do_ngmix_metacal_on_psf(psf_shear, psf_fwhm=0.55, img_size=51, seed=7, + n_epochs=2): + """Run do_ngmix_metacal on a simulated stamp with a given PSF shape. + + Mirrors ``test_ngmix._do_ngmix_metacal_on_psf`` (the harness this file is + asked to reuse), exposing ``psf_fwhm`` so the PSF *size* can be swept too. + The galaxy stamp is drawn from a fixed seed (123); only the metacal RNG + varies with ``seed``. Returns ``(resdict, reconv, orig)`` — the + :class:`MetacalResult` NamedTuple, still unpacking positionally. + """ + from shapepipe.modules.ngmix_package.ngmix import ( + Postage_stamp, + do_ngmix_metacal, + get_prior, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(seed) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=n_epochs, + img_size=img_size, + psf_fwhm=psf_fwhm, + psf_shear=psf_shear, + ) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( + gals, + psfs, + weights, + flags, + jacobs, + ) + return do_ngmix_metacal(stamp, prior, 1.0, rng) + + +# Physically-sensible PSF strategies: ellipticity well inside |g| < 1 (real +# PSFs are mildly elliptical; a magnitude up to ~0.18 keeps the Moffat shear +# valid and the fit well-behaved), and a Moffat FWHM in arcsec spanning the +# realistic ground-based range. NB: the original-PSF fit uses the default galaxy +# prior (psf_fit_prior="galaxy"), which shrinks the *recovered* |g_orig| to +# O(1e-4) even for input |g|~0.1 (the shapepipe#749 prior-domination) -- so the +# dilation (T) inequality, not the rounding (|g|) one, is the primary guard. +_g_complex = st.complex_numbers( + min_magnitude=0.02, max_magnitude=0.18, allow_nan=False, allow_infinity=False +) +_psf_fwhm = st.floats(min_value=0.5, max_value=0.7, allow_nan=False, + allow_infinity=False) + + +@settings(deadline=None, max_examples=25) +@given(g=_g_complex, psf_fwhm=_psf_fwhm) +def test_reconv_psf_rounder_and_larger_than_orig_over_random_psfs(g, psf_fwhm): + """For ANY elliptical PSF, the reconv kernel is rounder and larger. + + Sweeps the input PSF ellipticity ``(g1, g2)`` (magnitude 0.02-0.18, any + orientation) and the Moffat FWHM, runs the full :func:`do_ngmix_metacal`, + and asserts the metacal reconvolution kernel is round + dilated relative to + the independently-fit original image PSF: + + ``T_reconv >= T_orig`` and ``|g_reconv| <= |g_orig|``. + + The ``T`` inequality is the robust transposition catcher: the kernel is + dilated, so ``T_reconv`` exceeds ``T_orig`` by a clear margin (~0.01-0.03) + and a strict ``>`` is asserted below -- a transposition makes ``T_reconv`` + the smaller value and fails it hard. The ``|g|`` inequality corroborates but + with a thin margin: under the default galaxy prior the recovered ``|g_orig|`` + is only O(1e-4) (shapepipe#749 prior-domination), so it is a secondary check. + """ + _, reconv, orig = _do_ngmix_metacal_on_psf( + (g.real, g.imag), psf_fwhm=psf_fwhm + ) + + g_reconv = np.hypot(*reconv["g_psf"]) + g_orig = np.hypot(*orig["g_psf"]) + + # Round: the reconvolution kernel carries less ellipticity than the + # original PSF it was built to dominate (tiny tolerance for fit noise). + assert g_reconv <= g_orig + 1e-6, ( + f"reconv |g|={g_reconv:.5f} not <= orig |g|={g_orig:.5f} " + f"(input PSF g=({g.real:.3f},{g.imag:.3f}), fwhm={psf_fwhm:.3f})" + ) + + # Larger: the reconvolution kernel is strictly dilated relative to the + # original PSF (observed dT ~0.01-0.03, far above fit noise); a transposition + # makes T_reconv the smaller value and fails this hard. + assert reconv["T_psf"] > orig["T_psf"], ( + f"reconv T={reconv['T_psf']:.5f} not dilated above orig T={orig['T_psf']:.5f} " + f"(input PSF g=({g.real:.3f},{g.imag:.3f}), fwhm={psf_fwhm:.3f})" + ) From dfac16211119488031ec871dec82f8dfdc6ebdb4 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 01:21:16 +0200 Subject: [PATCH 09/30] deps: bump cs_util 0.2 -> 0.2.1 so cs_util.size is in the image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyproject pins cs_util>=0.1.9 and the lock already resolved it from PyPI, but to 0.2 — the last release *before* the size module. So the dev image installs cs_util, yet `import cs_util.size` raises ModuleNotFoundError: every path that would use the size-conversion helpers is broken in-container. cs_util 0.2.1 (the current PyPI release, "first release with cs_util.size") ships cs_util/size.py with the Gaussian size web (T_to_sigma / sigma_to_fwhm / sigma_to_r50 / ...). `uv lock --upgrade-package cs-util` bumps only that pin; its dependency set is unchanged, so no new heavy deps land. After the CI image rebuild (`uv sync --frozen` in the Dockerfile) `import cs_util.size` resolves. Co-Authored-By: Claude Opus 4.8 --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 7360bb6c..b483439a 100644 --- a/uv.lock +++ b/uv.lock @@ -654,7 +654,7 @@ wheels = [ [[package]] name = "cs-util" -version = "0.2" +version = "0.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astropy", marker = "sys_platform == 'linux'" }, @@ -670,9 +670,9 @@ dependencies = [ { name = "swig", marker = "sys_platform == 'linux'" }, { name = "vos", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/1d/068d7522652fed361d80502c958f37080a9f0cfee5ad201b9dea0197b562/cs_util-0.2.tar.gz", hash = "sha256:4cbe01556fb47e832b5d517af812a6cf4a94856ad134be8c21e74874b4d56de6", size = 24932, upload-time = "2026-04-07T07:59:21.174Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/08/d759e6af88c8a53b9c1c863a3bc3b9b5b26cf434f7232875a4bb75bec514/cs_util-0.2.1.tar.gz", hash = "sha256:5dd9be50249c412743ed116c0cb3364d5bcf1ad907be692dc52db0059a00412d", size = 26713, upload-time = "2026-06-10T23:58:23.017Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/63/9dcd114438db8b7be74dee98dc3841e43335b1c0cc6aba64ec4bd1de3c01/cs_util-0.2-py3-none-any.whl", hash = "sha256:e329ea5f2a4f3ea9b8f0649f5c9321bc18ddf4ad76bffc8152fb22e3f4168ef4", size = 29723, upload-time = "2026-04-07T07:59:19.764Z" }, + { url = "https://files.pythonhosted.org/packages/9a/2f/c8d89e8013567c3cf5535b3b941b81033e3c4a81101f3953d611af817dca/cs_util-0.2.1-py3-none-any.whl", hash = "sha256:577abc9ba04da8c78bdba9f6d235deecac99c266a0937e2c44ea1df5b50224de", size = 32209, upload-time = "2026-06-10T23:58:21.766Z" }, ] [[package]] From 413c5ddb89067ae0cec5b4cb77fde4243d1fad1a Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 01:21:26 +0200 Subject: [PATCH 10/30] refactor: route size conversions through cs_util.size The Gaussian size conversions are now sourced from cs_util.size, the single source of truth shared with sp_validation, instead of being re-derived locally: - ngmix.py get_noise window: np.sqrt(guess[4]/2) -> cs_size.T_to_sigma (guess[4] is the ngmix area parameter T = 2 sigma^2; bit-exact). - make_cat.py PSF_FWHM: galaxy.sigma_to_fwhm -> cs_size.sigma_to_fwhm (bare conversion, no pixel_scale). - utilities/galaxy.sigma_to_fwhm becomes a thin wrapper that keeps the pixel_scale rescaling and ShapePipe input validation but delegates the bare 2*sqrt(2 ln 2) factor to cs_util.size; this preserves its second caller (spread_model.py, which uses pixel_scale) and the test_utilities contract. Matches the old hard-coded 2.35482004503 constant to <1e-11 relative. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/make_cat_package/make_cat.py | 4 ++-- src/shapepipe/modules/ngmix_package/ngmix.py | 3 ++- src/shapepipe/utilities/galaxy.py | 10 +++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 0122f9e4..ca78dc75 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -13,10 +13,10 @@ from astropy import coordinates as coords from astropy import units as u from astropy.wcs import WCS +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -from shapepipe.utilities import galaxy def get_output_name(output_dir, file_number_string): @@ -725,7 +725,7 @@ def _save_psf_data(self, galaxy_psf_path): ) self._add2dict(f"PSF_ELL_{epoch + 1}", e_psf, idx) - psf_fwhm = galaxy.sigma_to_fwhm( + psf_fwhm = cs_size.sigma_to_fwhm( gpc_data["SHAPES"]["SIGMA_PSF_HSM"] ) self._add2dict(f"PSF_FWHM_{epoch + 1}", psf_fwhm, idx) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index da21af66..56b974d5 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -14,6 +14,7 @@ import galsim import numpy as np from astropy.io import fits +from cs_util import size as cs_size from modopt.math.stats import sigma_mad from ngmix.observation import Observation, ObsList from sqlitedict import SqliteDict @@ -970,7 +971,7 @@ def get_noise(gal, weight, guess, pixel_scale, thresh=1.2): sig_tmp = sigma_mad(gal[m_weight]) - gauss_win = galsim.Gaussian(sigma=np.sqrt(guess[4] / 2), flux=guess[5]) + gauss_win = galsim.Gaussian(sigma=cs_size.T_to_sigma(guess[4]), flux=guess[5]) gauss_win = gauss_win.shear(g1=guess[2], g2=guess[3]) gauss_win = gauss_win.drawImage( nx=img_shape[0], ny=img_shape[1], scale=pixel_scale diff --git a/src/shapepipe/utilities/galaxy.py b/src/shapepipe/utilities/galaxy.py index 40948a8f..b593df45 100644 --- a/src/shapepipe/utilities/galaxy.py +++ b/src/shapepipe/utilities/galaxy.py @@ -7,6 +7,7 @@ """ import numpy as np +from cs_util import size as cs_size def sigma_to_fwhm(sigma, pixel_scale=1.0): @@ -15,6 +16,11 @@ def sigma_to_fwhm(sigma, pixel_scale=1.0): Transform standard deviation of a 1D Gaussian, sigma, to FWHM (Full Width Half Maximum). + Thin wrapper over :func:`cs_util.size.sigma_to_fwhm`, the single + source of truth for the bare ``FWHM = 2 sqrt(2 ln 2) sigma`` + conversion; this layer adds the optional ``pixel_scale`` rescaling + and ShapePipe's input validation. + Parameters ---------- sigma : numpy.ndarray @@ -89,6 +95,4 @@ def sigma_to_fwhm(sigma, pixel_scale=1.0): f"Invalid pixel scale {pixel_scale}, needs to be greater than 0.0." ) - cst = 2.35482004503 - - return sigma * cst * pixel_scale + return cs_size.sigma_to_fwhm(sigma) * pixel_scale From e2f37be9334cd819c86ae122951e108bca3c26fd Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 02:15:14 +0200 Subject: [PATCH 11/30] test: make_data gains psf_shear for an elliptical PSF The PSF property tests (reconv rounder/larger than orig, pristine prefit) need a non-round PSF to exercise. Add an optional psf_shear=(0,0) param to the make_data test helper that shears the Moffat PSF; the default is a no-op, so every existing caller is unchanged. Test-helper only, no pipeline behaviour change. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/testing/simulate.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/shapepipe/testing/simulate.py b/src/shapepipe/testing/simulate.py index 81b9b6ab..8ab5a06f 100644 --- a/src/shapepipe/testing/simulate.py +++ b/src/shapepipe/testing/simulate.py @@ -21,6 +21,7 @@ def make_data( psf_fwhm=0.55, pixel_scale=0.1857, img_size=201, + psf_shear=(0.0, 0.0), ): """Simulate an exponential galaxy with Moffat PSF. @@ -48,6 +49,12 @@ def make_data( Pixel scale in arcsec/pixel. Default 0.1857. img_size : int, optional Stamp size in pixels (square). Default 201. + psf_shear : tuple of float, optional + Ellipticity (g1, g2) applied to the Moffat PSF. Default (0, 0), a + round PSF. A non-round PSF is what the PSF-leakage / additive-bias + guardrail needs: the PSF carries a shape that an unbiased deconvolution + must remove from a round galaxy. The same sheared PSF is used both to + convolve the object and as the PSF model stamp. Returns ------- @@ -71,7 +78,9 @@ def make_data( if not share_shift: dy, dx = rng.uniform(low=-scale / 2, high=scale / 2, size=2) - psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm) + psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm).shear( + g1=psf_shear[0], g2=psf_shear[1] + ) obj = galsim.Convolve( psf, galsim.Exponential(half_light_radius=gal_hlr, flux=gal_flux).shear( From 31d2080676863745be0cf2fc2f1c90dfbd1c402f Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 16:27:28 +0200 Subject: [PATCH 12/30] review: address PR #761 code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - do_ngmix_metacal: seed the original-PSF pre-fit from a snapshot of the metacal RNG so it no longer advances the rng boot.go consumes — galaxy/shear results are now bit-identical to the pre-PR base (only the *_PSF_ORIG columns are added). Conservative: galaxy science unchanged vs base. - remove the redundant galaxy.sigma_to_fwhm wrapper; inline cs_size.sigma_to_fwhm(sigma) * pixel_scale at call sites (spread_model); cs_util unchanged. - drop leftover psf_fit_prior knob references (docstrings/comments/tests). - canonicalize the ESTIMATOR_COMPONENT_OBJECT grammar spec string across all sites. - trim over-explained copy-PSF / leakage / weighting prose to one canonical spot; document that bit-identity rests on BOTH pristine gal_obs.psf AND the snapshot RNG. - add tests: PSF columns survive a failed-fit NaN-fill; all-epochs-failed raises; grammar token check runs over both shipped param files. Fix a stale cross-ref. Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 37 +++--- src/shapepipe/modules/ngmix_package/ngmix.py | 66 +++++----- .../spread_model_package/spread_model.py | 4 +- src/shapepipe/utilities/galaxy.py | 91 ------------- tests/module/test_make_cat.py | 9 +- tests/module/test_ngmix.py | 10 +- tests/module/test_psf_averaging_properties.py | 26 ++++ tests/module/test_psf_grammar_properties.py | 44 ++++--- tests/module/test_psf_noop_property.py | 19 +-- tests/module/test_psf_physics_properties.py | 14 +- tests/module/test_utilities.py | 123 ------------------ 11 files changed, 137 insertions(+), 306 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index ca78dc75..d9aaa5f2 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -329,18 +329,16 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. - Column grammar: ``NGMIX[m]_[_ERR][_][_]``. - ``OBJECT`` and ``SHEAR`` are both optional: the per-object metadata - columns (``NGMIX_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, - ``NGMIX_MCAL_TYPES_FAIL``) carry neither. When present, ``OBJECT`` is - one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original psfex/mccd image - PSF, fit by ``ngmix.average_original_psf`` — its true ellipticity and - size, feeding object-wise PSF leakage), or ``PSF_RECONV`` (the metacal - reconvolution kernel, round and enlarged by construction, fit by - ``ngmix.average_multiepoch_psf`` — the Tgal / Tpsf size cut and a g~0 - sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` are independent fits of - different PSFs, no longer the single aliased value of the pre-fix code - (shapepipe#749). + Column grammar: ``NGMIX[m]_[_ERR]__``, plus + three OBJECT/SHEAR-less per-object metadata columns + (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, + ``NGMIX_MCAL_TYPES_FAIL``). ``OBJECT`` is one of ``GAL`` (galaxy), + ``PSF_ORIG`` (the original image PSF, fit by + :func:`ngmix.average_original_psf`), or ``PSF_RECONV`` (the metacal + reconvolution kernel, fit by :func:`ngmix.average_multiepoch_psf`); + see those functions for what each PSF family IS. ``PSF_ORIG`` and + ``PSF_RECONV`` are independent fits of different PSFs, no longer the + single aliased value of the pre-fix code (shapepipe#749). Parameters ---------- @@ -390,9 +388,11 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" # Galaxy (GAL), original image PSF (PSF_ORIG) and metacal - # reconvolution kernel (PSF_RECONV). G1/G2 are scalar reduced-shear - # components, not a 2-vector. Sentinels: sizes/fluxes/mags/flags 0, - # *_ERR fluxes/mags -1, ellipticities -10, *_ERR sizes 1e30. + # reconvolution kernel (PSF_RECONV); see ngmix.average_original_psf / + # average_multiepoch_psf for what each PSF family is. G1/G2 are scalar + # reduced-shear components, not a 2-vector. Sentinels: + # sizes/fluxes/mags/flags 0, *_ERR fluxes/mags -1, ellipticities -10, + # *_ERR sizes 1e30. for key_str in ( f"{prefix}_T_GAL_", f"{prefix}_SNR_GAL_", @@ -484,8 +484,7 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data["flags"][ind[0]], idx ) - # Original image PSF (psfex/mccd) — its genuine, - # un-reconvolved ellipticity and size. + # Original image PSF (see ngmix.average_original_psf). self._add2dict( f"{prefix}_G1_PSF_ORIG_{key}", ncf_data["g1_psf_orig"][ind[0]], idx @@ -511,8 +510,8 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data["T_err_psf_orig"][ind[0]], idx ) - # Metacal reconvolution kernel — round, enlarged by - # construction; the Tgal/Tpsf cut and g~0 sanity check. + # Metacal reconvolution kernel (see + # ngmix.average_multiepoch_psf). self._add2dict( f"{prefix}_G1_PSF_RECONV_{key}", ncf_data["g1_psf_reconv"][ind[0]], idx diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 56b974d5..33222106 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -380,16 +380,13 @@ def compile_results(self, results): Compiled results ready to be written to a file. Two PSF column families — each carrying ellipticity *and* size, - for *different* PSFs (shapepipe#749): + for *different* PSFs (shapepipe#749). See the function docstrings + for what each family IS: - * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the - ORIGINAL image PSF (the psfex/mccd model stamp), fit before - metacal reconvolution by :func:`average_original_psf`. This is - the PSF whose true ellipticity and size enter object-wise - PSF-leakage diagnostics. - * ``*_psf_reconv`` — the metacal RECONVOLUTION kernel (round and - enlarged by construction, used for the Tgal/Tpsf size cut and a - g~0 sanity check), fit by :func:`average_multiepoch_psf`. + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the original + image PSF, fit by :func:`average_original_psf`. + * ``*_psf_reconv`` — the metacal reconvolution kernel, fit by + :func:`average_multiepoch_psf`. Raises ------ @@ -1232,23 +1229,18 @@ def average_original_psf(gal_obs_list, psf_runner): """Fit and average the *original* image PSF over epochs. The original PSF is the psfex/mccd model stamp handed to ngmix - (``gal_obs.psf``), fit here with the same ``psf_runner`` machinery — and - so the same ``psf_fit_prior`` — used inside metacal, but on the PSF - *before* metacal's reconvolution. Exported to the original-PSF columns + (``gal_obs.psf``), fit here with the same ``psf_runner`` (hence the same + fit prior and guesser) used inside metacal, but on the PSF *before* + metacal's reconvolution. Exported to the original-PSF columns (``NGMIX_G1/G2_PSF_ORIG``, ``NGMIX_T_PSF_ORIG``). Distinct from the reconvolution-kernel fit (:func:`average_multiepoch_psf`): the original PSF retains its true ellipticity and size, whereas the reconvolution kernel is round and enlarged by construction. This is the PSF whose true shape and size enter object-wise PSF-leakage diagnostics. - Epochs are weighted by the *galaxy* inverse-variance weight - (``gal_obs.weight.sum()``). This is the same *form* as - :func:`average_multiepoch_psf` (``weight.sum()`` per epoch, skipping - ``flags != 0`` fits) but not the identical weight: the reconvolution path - weights by the fixnoise-combined inverse variance of the noshear metacal - image, whereas this path uses the raw galaxy inverse variance. So the two - PSF families share an averaging *scheme* and differ in which PSF is fit - and in the precise per-epoch weighting factor. + Weighted by the raw galaxy inverse variance (``gal_obs.weight.sum()`` per + epoch); :func:`average_multiepoch_psf` uses the same scheme but the + fixnoise-combined metacal-image weight instead. The fit runs on a *copy* of each PSF observation so ``gal_obs.psf`` — the object metacal later deep-copies and consumes via @@ -1258,8 +1250,11 @@ def average_original_psf(gal_obs_list, psf_runner): would survive metacal's deep copy and be reused as the ``MetacalFitGaussPSF`` fallback when its own admom+ML PSF fits both fail, silently rescuing objects the base branch dropped (``BootPSFFailure``) and - changing the galaxy/shear result set. Fitting a copy keeps this add-column - refactor bit-identical on the galaxy results. + changing the galaxy/shear result set. Fitting a copy closes this + PSF-aliasing channel; combined with :func:`do_ngmix_metacal` seeding this + pre-fit from a *snapshot* of the metacal RNG (so the pre-fit does not + advance it), the add-column refactor stays bit-identical on the galaxy + results. Parameters ---------- @@ -1268,7 +1263,8 @@ def average_original_psf(gal_obs_list, psf_runner): (pre-metacal) PSF observation to fit, with no further ``.psf`` of its own so the runner fits the stamp itself. Left pristine. psf_runner : ngmix.runners.PSFRunner - The module's PSF runner, carrying the resolved ``psf_fit_prior``. + The module's PSF runner (built by :func:`make_runners` from the + shared ``prior``). Returns ------- @@ -1276,11 +1272,8 @@ def average_original_psf(gal_obs_list, psf_runner): Same keys as :func:`average_multiepoch_psf`. """ def fit(gal_obs): - # Fit a COPY of the PSF observation: PSFRunner.go fits the PSF stamp - # and sets its .meta['result'] (and .gmix on success). Reading from - # the copy leaves gal_obs.psf pristine — no gmix to leak through - # metacal's deep copy into the MetacalFitGaussPSF fallback. Failed - # fits keep flags != 0 and are dropped by _average_psf_fits. + # Fit a COPY so gal_obs.psf stays pristine for metacal — see docstring. + # Failed fits keep flags != 0 and are dropped by _average_psf_fits. psf_obs = gal_obs.psf.copy() psf_runner.go(psf_obs) return psf_obs.meta['result'], gal_obs.weight.sum() @@ -1379,14 +1372,15 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): runner, psf_runner = make_runners(prior, flux_guess, rng) - # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it, using - # the same psf_runner (so the same psf_fit_prior and centroid). This is - # the PSF_ORIG family; metacal below produces the PSF_RECONV family. Run - # first so the original PSF is fit on its own stamp, distinct from the - # round, enlarged kernel metacal convolves back in. average_original_psf - # fits a COPY of each gal_obs.psf, so gal_obs_list reaches boot.go below - # pristine — no stray gmix to leak into MetacalFitGaussPSF's fallback. - psf_orig_res = average_original_psf(gal_obs_list, psf_runner) + # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it. Use a + # psf_runner seeded from a snapshot of rng's state so this prefit does NOT + # advance the rng that boot.go consumes below — keeping the galaxy/shear + # results bit-identical to the no-prefit branch. average_original_psf fits a + # COPY of each gal_obs.psf, so gal_obs_list also reaches boot.go pristine. + prefit_rng = np.random.RandomState() + prefit_rng.set_state(rng.get_state()) + _, prefit_psf_runner = make_runners(prior, flux_guess, prefit_rng) + psf_orig_res = average_original_psf(gal_obs_list, prefit_psf_runner) metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py index 43140598..1461ecce 100644 --- a/src/shapepipe/modules/spread_model_package/spread_model.py +++ b/src/shapepipe/modules/spread_model_package/spread_model.py @@ -8,10 +8,10 @@ import galsim import numpy as np +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -from shapepipe.utilities import galaxy def get_sm(obj_vign, psf_vign, model_vign, weight_vign): @@ -102,7 +102,7 @@ def get_model(sigma, flux, img_shape, pixel_scale=0.186): """ # Get scale radius - scale_radius = 1 / 16 * galaxy.sigma_to_fwhm(sigma, pixel_scale=pixel_scale) + scale_radius = 1 / 16 * cs_size.sigma_to_fwhm(sigma) * pixel_scale # Get galaxy model gal_obj = galsim.Exponential(scale_radius=scale_radius, flux=flux) diff --git a/src/shapepipe/utilities/galaxy.py b/src/shapepipe/utilities/galaxy.py index b593df45..c533eac5 100644 --- a/src/shapepipe/utilities/galaxy.py +++ b/src/shapepipe/utilities/galaxy.py @@ -5,94 +5,3 @@ :Author: Martin Kilbinger """ - -import numpy as np -from cs_util import size as cs_size - - -def sigma_to_fwhm(sigma, pixel_scale=1.0): - r"""Convert Sigma to FWHM. - - Transform standard deviation of a 1D Gaussian, sigma, to FWHM - (Full Width Half Maximum). - - Thin wrapper over :func:`cs_util.size.sigma_to_fwhm`, the single - source of truth for the bare ``FWHM = 2 sqrt(2 ln 2) sigma`` - conversion; this layer adds the optional ``pixel_scale`` rescaling - and ShapePipe's input validation. - - Parameters - ---------- - sigma : numpy.ndarray - input standard deviation(s) - pixel_scale : float, optional, default=1 - pixel size in arcsec, set to 1 if no scaling - required - - Returns - ------- - fwhm : (array of) float - output fwhm(s) - - Raises - ------ - TypeError - If ``sigma`` is not of type numpy array or float - TypeError - If ``sigma`` array values are not of type float - TypeError - If ``pixel_scale`` is not of type float - ValueError - If ``sigma`` array values are not greater than 0.0 - ValueError - If ``sigma`` is not greater than 0.0 - ValueError - If ``pixel_scale`` is not greater than 0.0 - - Notes - ----- - To compute the FWHMh for a 1D Gaussian N(x), solve the equation - - .. math:: - - N(x) = (\sigma \sqrt{2\pi})^{-1} \exp[x^2/2\sigma^2] = \frac 1 2 N(x) - - - for :math:`x`. The FWHM is :math:`x + (-x) = 2x`. The solution is - - .. math:: - - \textrm{FWHM} = 2 \sqrt(2 \ln 2) \sigma \approx 2.355 \sigma - - """ - if not isinstance(sigma, (np.ndarray, float)): - raise TypeError( - f"Sigma must be of type numpy array or float, not {type(sigma)}." - ) - elif isinstance(sigma, np.ndarray) and sigma.dtype != np.float64: - raise TypeError( - f"Sigma array values must be of type float, not {sigma.dtype}." - ) - - if not isinstance(pixel_scale, float): - raise TypeError( - f"The pixel scale must of type float, not {type(pixel_scale)}." - ) - - if isinstance(sigma, np.ndarray) and np.any(sigma <= 0.0): - raise ValueError( - f"Found {sigma[sigma <=0].size} invalid standard deviation array " - + "values, all elements must to be greater than 0.0." - ) - elif isinstance(sigma, float) and sigma <= 0.0: - raise ValueError( - f"Invalid standard deviation {sigma}, needs to be greater than " - + "0.0." - ) - - if pixel_scale <= 0.0: - raise ValueError( - f"Invalid pixel scale {pixel_scale}, needs to be greater than 0.0." - ) - - return cs_size.sigma_to_fwhm(sigma) * pixel_scale diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py index b5077d3a..e2960701 100644 --- a/tests/module/test_make_cat.py +++ b/tests/module/test_make_cat.py @@ -2,10 +2,11 @@ Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue to lock in the renamed PSF-column grammar -``NGMIX_[_ERR]_[_]`` (shapepipe#749): the original -image PSF (``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) -are independent fits of *different* PSFs, no longer the single aliased value of -the pre-fix code. +``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three +OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, +``NGMIX_MCAL_TYPES_FAIL``): the original image PSF (``PSF_ORIG``) and the metacal +reconvolution kernel (``PSF_RECONV``) are independent fits of *different* PSFs, +no longer the single aliased value of the pre-fix code. """ import numpy as np diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index 9d0e640f..37db4e3f 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -238,7 +238,7 @@ def test_compile_results_psf_families_are_unaliased(): came from one ``average_multiepoch_psf`` result and so were byte- identical; here they must differ, each tracing its own source. The companion size un-aliasing is checked in - ``test_compile_results_size_columns_are_half_light_radii``. + ``test_compile_results_size_columns_are_unaliased``. """ from shapepipe.modules.ngmix_package.ngmix import Ngmix @@ -296,6 +296,14 @@ def test_compile_results_nan_fills_failed_fit_types(): assert failed["flags"] == [0x8] assert failed["mcal_flags"] == [0x8] and failed["mcal_flags"][0] != 0 + # The object-level PSF columns survive on a failed fit type: they are copied + # outside the flags==0 guard, so the failed type keeps a full-length PSF row + # (gating that copy on the galaxy flags would shorten it -> save crash). + npt.assert_allclose(failed["T_psf_orig"], [ORIG_PSF_T]) + npt.assert_allclose(failed["T_psf_reconv"], [0.09]) + npt.assert_allclose(failed["g1_psf_orig"], [ORIG_PSF_G[0]]) + npt.assert_allclose(failed["g1_psf_reconv"], [RECONV_PSF_G[0]]) + # successful types are untouched, but share the object's mcal_flags npt.assert_allclose(out["noshear"]["flux"], [100.0]) assert out["noshear"]["flags"] == [0] diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py index 25c0fc77..2314b6ca 100644 --- a/tests/module/test_psf_averaging_properties.py +++ b/tests/module/test_psf_averaging_properties.py @@ -20,6 +20,7 @@ import numpy as np import numpy.testing as npt +import pytest from astropy.io import fits from hypothesis import given, settings from hypothesis import strategies as st @@ -199,6 +200,31 @@ def test_single_survivor_returns_its_own_value(survivor, flagged_specs): assert out["n_epoch"] == 1 +@given( + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + min_size=1, max_size=5, + ) +) +@settings(deadline=None, max_examples=25) +def test_all_epochs_failed_raises_zero_division(flagged_specs): + """All-epochs-failed is the contract that wsum == 0 raises, not returns 0/NaN. + + An object whose PSF fit failed on every epoch reaches both PSF-family + wrappers (reconv and orig) with zero survivors, so ``wsum`` stays 0. The + core must raise ``ZeroDivisionError`` rather than silently divide — a + regression returning NaN/0 would be caught here. Every input epoch is + flagged (``flags != 0``) with an arbitrary positive weight, so no survivor + can rescue the sum. + """ + flagged = [ + (_result(np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=f), w) + for w, f in flagged_specs + ] + with pytest.raises(ZeroDivisionError): + _average_psf_fits(flagged) + + # --------------------------------------------------------------------------- # # (d) make_cat: an obj_id absent from the ngmix cat keeps its sentinel fill. # --------------------------------------------------------------------------- # diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index 5123d7f3..c7c361dc 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -2,9 +2,11 @@ Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under hypothesis to pin three invariants of the renamed grammar -``NGMIX[m]_[_ERR]_[_]`` (shapepipe#749), where the -ORIGINAL image PSF (``PSF_ORIG``) and the metacal RECONVOLUTION kernel -(``PSF_RECONV``) are independent fits of *different* PSFs: +``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three +OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, +``NGMIX_MCAL_TYPES_FAIL``), where the ORIGINAL image PSF (``PSF_ORIG``) and the +metacal RECONVOLUTION kernel (``PSF_RECONV``) are independent fits of +*different* PSFs: (a) un-aliasing holds for ALL inputs: for arbitrary distinct values fed as the orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column @@ -25,6 +27,7 @@ import numpy as np import numpy.testing as npt +import pytest from astropy.io import fits from hypothesis import given, settings from hypothesis import strategies as st @@ -156,10 +159,13 @@ def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" ) -# example/cfis/final_cat.param lives two levels up from tests/module/. -PARAM_PATH = ( - Path(__file__).resolve().parents[2] / "example" / "cfis" / "final_cat.param" -) +# The shipped final-catalogue param files, two levels up from tests/module/. +# Both are consumer contracts updated to the new grammar, so both are checked. +_EXAMPLE = Path(__file__).resolve().parents[2] / "example" +PARAM_PATHS = [ + _EXAMPLE / "cfis" / "final_cat.param", + _EXAMPLE / "unions_800" / "cat_matched.param", +] # The one param-file NGMIX token outside the _save_ngmix_data grammar: the # moments-failure flag, set by a different code path (not the metacal writer). @@ -169,9 +175,9 @@ def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): NON_WRITER_PARAM_TOKENS = {"NGMIX_MOM_FAIL"} -def _param_ngmix_tokens(): - """Every distinct NGMIX_* token named in the shipped param file.""" - text = PARAM_PATH.read_text() +def _param_ngmix_tokens(param_path): + """Every distinct NGMIX_* token named in a shipped param file.""" + text = param_path.read_text() return set(re.findall(r"\bNGMIX[A-Za-z0-9_]*", text)) @@ -317,15 +323,21 @@ def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): # (c) Param-file tokens are all producible by the writer # --------------------------------------------------------------------------- +@pytest.mark.parametrize( + "param_path", PARAM_PATHS, ids=lambda p: f"{p.parent.name}/{p.name}" +) @settings(deadline=None, max_examples=15) @given(obj_ids=obj_ids_strategy) -def test_param_file_ngmix_tokens_are_producible(obj_ids): +def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): """Every NGMIX_* token the param file names is a column the writer produces. - ``example/cfis/final_cat.param`` is the consumer contract for the final - catalogue; ``create_final_cat`` keeps only the listed columns, so a token it - names that the writer cannot emit is a silent, empty column downstream. The - one known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a + Each shipped final-catalogue param file (``example/cfis/final_cat.param`` + and ``example/unions_800/cat_matched.param``) is a consumer contract for the + final catalogue; ``create_final_cat`` keeps only the listed columns, so a + token it names that the writer cannot emit is a silent, empty column + downstream. Both files are checked so a future divergence in either (a + typo'd or stale NGMIX token) cannot escape the consistency check. The one + known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a different path — is excluded BY NAME, and we assert it is genuinely outside the producible set so the carve-out cannot hide a real grammar regression. @@ -333,7 +345,7 @@ def test_param_file_ngmix_tokens_are_producible(obj_ids): across inputs (the contract is structural, not data-dependent). """ produced = _produced_columns(obj_ids) - param_tokens = _param_ngmix_tokens() + param_tokens = _param_ngmix_tokens(param_path) # The carve-out is real: the excluded token is NOT producible. (If a future # change made the writer emit it, this fails and we drop the exclusion.) diff --git a/tests/module/test_psf_noop_property.py b/tests/module/test_psf_noop_property.py index 003efcaf..98f47b2b 100644 --- a/tests/module/test_psf_noop_property.py +++ b/tests/module/test_psf_noop_property.py @@ -6,14 +6,17 @@ each PSF observation, so the observation metacal later deep-copies and consumes (``boot.go(gal_obs_list)``) is left untouched. -This is the invariant that makes the add-column refactor bit-identical to a -no-original-fit branch on the galaxy/shear results: if the pre-fit cannot alter -``gal_obs.psf``, it cannot alter what metacal consumes, so the galaxy results -are unchanged. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the -observation it fits; were that ``gal_obs.psf`` itself, a stray gmix would -survive metacal's deep copy and be reused as the ``MetacalFitGaussPSF`` fallback -when admom+ML both fail, silently rescuing objects the base branch dropped -(``BootPSFFailure``). +Pristine ``gal_obs.psf`` is *one* of the two conditions for the add-column +refactor to be bit-identical to a no-original-fit branch on the galaxy/shear +results: it closes the PSF-aliasing channel, which this test pins. +``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the observation it +fits; were that ``gal_obs.psf`` itself, a stray gmix would survive metacal's +deep copy and be reused as the ``MetacalFitGaussPSF`` fallback when admom+ML +both fail, silently rescuing objects the base branch dropped +(``BootPSFFailure``). The *second* condition — the pre-fit not advancing the +RNG that metacal consumes — is ensured in ``do_ngmix_metacal`` by seeding the +pre-fit from a snapshot of the RNG state; together they leave the galaxy +results unchanged. The unit guard ``test_original_psf_prefit_leaves_gal_obs_psf_pristine`` in ``tests/module/test_ngmix.py`` pins this on one fixed input; here it is diff --git a/tests/module/test_psf_physics_properties.py b/tests/module/test_psf_physics_properties.py index 6b67f164..b0feeab5 100644 --- a/tests/module/test_psf_physics_properties.py +++ b/tests/module/test_psf_physics_properties.py @@ -63,10 +63,11 @@ def _do_ngmix_metacal_on_psf(psf_shear, psf_fwhm=0.55, img_size=51, seed=7, # Physically-sensible PSF strategies: ellipticity well inside |g| < 1 (real # PSFs are mildly elliptical; a magnitude up to ~0.18 keeps the Moffat shear # valid and the fit well-behaved), and a Moffat FWHM in arcsec spanning the -# realistic ground-based range. NB: the original-PSF fit uses the default galaxy -# prior (psf_fit_prior="galaxy"), which shrinks the *recovered* |g_orig| to -# O(1e-4) even for input |g|~0.1 (the shapepipe#749 prior-domination) -- so the -# dilation (T) inequality, not the rounding (|g|) one, is the primary guard. +# realistic ground-based range. NB: the original-PSF fit uses the same +# (galaxy-shaped) prior as the galaxy fit, which shrinks the *recovered* +# |g_orig| to O(1e-4) even for input |g|~0.1 (the shapepipe#749 +# prior-domination) -- so the dilation (T) inequality, not the rounding (|g|) +# one, is the primary guard. _g_complex = st.complex_numbers( min_magnitude=0.02, max_magnitude=0.18, allow_nan=False, allow_infinity=False ) @@ -90,8 +91,9 @@ def test_reconv_psf_rounder_and_larger_than_orig_over_random_psfs(g, psf_fwhm): dilated, so ``T_reconv`` exceeds ``T_orig`` by a clear margin (~0.01-0.03) and a strict ``>`` is asserted below -- a transposition makes ``T_reconv`` the smaller value and fails it hard. The ``|g|`` inequality corroborates but - with a thin margin: under the default galaxy prior the recovered ``|g_orig|`` - is only O(1e-4) (shapepipe#749 prior-domination), so it is a secondary check. + with a thin margin: under the same (galaxy-shaped) prior as the galaxy fit + the recovered ``|g_orig|`` is only O(1e-4) (shapepipe#749 prior-domination), + so it is a secondary check. """ _, reconv, orig = _do_ngmix_metacal_on_psf( (g.real, g.imag), psf_fwhm=psf_fwhm diff --git a/tests/module/test_utilities.py b/tests/module/test_utilities.py index da098d33..453455cb 100644 --- a/tests/module/test_utilities.py +++ b/tests/module/test_utilities.py @@ -5,126 +5,3 @@ :Author: Samuel Farrens """ - -from unittest import TestCase - -from hypothesis import given -from hypothesis import strategies as st -import numpy as np -import numpy.testing as npt - -from shapepipe.utilities import galaxy - -positive_floats = st.floats( - min_value=1.0e-100, - max_value=1.0e100, - allow_nan=False, - allow_infinity=False, -) - - -class GalaxyTestCase(TestCase): - - def setUp(self): - - self.sigma_float = 5.5 - self.sigma_array = np.arange(1, 4) * 0.1 - self.sigma_int = 1 - self.sigma_array_int = np.arange(3) - self.pixel_scale = 2.0 - self.pixel_int = 1 - self.sigma_float_exp = 12.9515102 - self.sigma_float_ps_exp = 25.9030204 - self.sigma_array_exp = np.array([0.235482, 0.47096401, 0.70644601]) - - def tearDown(self): - - self.sigma_float = None - self.sigma_array = None - self.sigma_int = None - self.sigma_array_int = None - self.pixel_scale = None - self.pixel_int = None - self.sigma_float_exp = None - self.sigma_float_ps_exp = None - self.sigma_array_exp = None - - def test_sigma_to_fwhm(self): - - npt.assert_almost_equal( - galaxy.sigma_to_fwhm(self.sigma_float), - self.sigma_float_exp, - err_msg="sigma_to_fwhm gave invalid result for float input", - ) - - npt.assert_almost_equal( - galaxy.sigma_to_fwhm(self.sigma_float, self.pixel_scale), - self.sigma_float_ps_exp, - err_msg=( - "sigma_to_fwhm gave invalid result for float input with " - + "non-default pixel scale" - ), - ) - - npt.assert_allclose( - galaxy.sigma_to_fwhm(self.sigma_array), - self.sigma_array_exp, - err_msg="sigma_to_fwhm gave invalid result for array input", - ) - - npt.assert_raises(TypeError, galaxy.sigma_to_fwhm, self.sigma_int) - - npt.assert_raises( - TypeError, - galaxy.sigma_to_fwhm, - self.sigma_array_int, - ) - - npt.assert_raises( - TypeError, - galaxy.sigma_to_fwhm, - self.sigma_float, - self.pixel_int, - ) - - npt.assert_raises(ValueError, galaxy.sigma_to_fwhm, -self.sigma_float) - - npt.assert_raises( - ValueError, - galaxy.sigma_to_fwhm, - np.negative(self.sigma_array), - ) - - npt.assert_raises( - ValueError, - galaxy.sigma_to_fwhm, - self.sigma_float, - -self.pixel_scale, - ) - - -@given(positive_floats, positive_floats) -def test_sigma_to_fwhm_is_linear_in_sigma_and_pixel_scale(sigma, pixel_scale): - - fwhm = galaxy.sigma_to_fwhm(sigma, pixel_scale) - - assert fwhm > 0.0 - npt.assert_allclose( - galaxy.sigma_to_fwhm(sigma) * pixel_scale, - fwhm, - rtol=1.0e-12, - ) - - -@given( - st.lists(positive_floats, min_size=1, max_size=20), - positive_floats, -) -def test_sigma_to_fwhm_preserves_array_shape_and_scale(sigmas, pixel_scale): - - sigma_array = np.array(sigmas, dtype=np.float64) - fwhm = galaxy.sigma_to_fwhm(sigma_array, pixel_scale) - - assert fwhm.shape == sigma_array.shape - assert np.all(fwhm > 0.0) - npt.assert_allclose(fwhm / sigma_array, fwhm[0] / sigma_array[0]) From 8ae32035cf2cd2ea49b4fa486b79220a36791102 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 17:08:57 +0200 Subject: [PATCH 13/30] refactor(ngmix): build the per-family PSF res-keys from one loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 12 hand-written g*/T_psf_{reconv,orig} assignments collapse to a single template applied over (family, source) — same keys, same values, but the two PSF families are now generated symmetrically, so one can't gain or misname a column the other lacks. Addresses a drift/hardcoding risk Cail flagged in review. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 29 +++++++++----------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 33222106..cc7ab87b 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -708,22 +708,19 @@ def process(self): if res.get(k, {}).get('flags', 0) != 0 ) res['mcal_flags'] = get_mcal_flags(res) - # Two distinct PSF families (shapepipe#749), each with its own - # ellipticity AND size, written under self-naming res-keys: - # reconvolution kernel (psf_res) -> *_psf_reconv - # original image PSF (psf_orig_res) -> *_psf_orig - res['g1_psf_reconv'] = psf_res['g_psf'][0] - res['g2_psf_reconv'] = psf_res['g_psf'][1] - res['g1_err_psf_reconv'] = psf_res['g_psf_err'][0] - res['g2_err_psf_reconv'] = psf_res['g_psf_err'][1] - res['T_psf_reconv'] = psf_res['T_psf'] - res['T_err_psf_reconv'] = psf_res['T_psf_err'] - res['g1_psf_orig'] = psf_orig_res['g_psf'][0] - res['g2_psf_orig'] = psf_orig_res['g_psf'][1] - res['g1_err_psf_orig'] = psf_orig_res['g_psf_err'][0] - res['g2_err_psf_orig'] = psf_orig_res['g_psf_err'][1] - res['T_psf_orig'] = psf_orig_res['T_psf'] - res['T_err_psf_orig'] = psf_orig_res['T_psf_err'] + # Two distinct PSF families (shapepipe#749), each carrying its own + # ellipticity AND size: the metacal reconvolution kernel (psf_res) + # and the original image PSF (psf_orig_res). Tag both into res from + # ONE template so the families stay symmetric — same quantities, + # parallel ``*_psf_{family}`` names — and cannot drift apart by a + # hand-edit to one and not the other. + for family, psf in (("reconv", psf_res), ("orig", psf_orig_res)): + res[f"g1_psf_{family}"] = psf["g_psf"][0] + res[f"g2_psf_{family}"] = psf["g_psf"][1] + res[f"g1_err_psf_{family}"] = psf["g_psf_err"][0] + res[f"g2_err_psf_{family}"] = psf["g_psf_err"][1] + res[f"T_psf_{family}"] = psf["T_psf"] + res[f"T_err_psf_{family}"] = psf["T_psf_err"] final_res.append(res) n_fitted += 1 count_batch += 1 From 20878746d8222e1156364e65931810dae4d8d544 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 17:23:08 +0200 Subject: [PATCH 14/30] refactor(make_cat): family-loop the PSF column write block The two parallel 6-call blocks (PSF_ORIG / PSF_RECONV) collapse to one template over (family, obj): the FITS column name and the res-key it reads are now generated from the same pair, so the write seam (where the grammar drift bit us) can't desync the two families. Behaviour-identical column set + values. Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 85 +++++++------------ 1 file changed, 33 insertions(+), 52 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index d9aaa5f2..1c7c5a6b 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -484,58 +484,39 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data["flags"][ind[0]], idx ) - # Original image PSF (see ngmix.average_original_psf). - self._add2dict( - f"{prefix}_G1_PSF_ORIG_{key}", - ncf_data["g1_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_PSF_ORIG_{key}", - ncf_data["g2_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G1_ERR_PSF_ORIG_{key}", - ncf_data["g1_err_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_ERR_PSF_ORIG_{key}", - ncf_data["g2_err_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_PSF_ORIG_{key}", - ncf_data["T_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_ERR_PSF_ORIG_{key}", - ncf_data["T_err_psf_orig"][ind[0]], idx - ) - - # Metacal reconvolution kernel (see - # ngmix.average_multiepoch_psf). - self._add2dict( - f"{prefix}_G1_PSF_RECONV_{key}", - ncf_data["g1_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_PSF_RECONV_{key}", - ncf_data["g2_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G1_ERR_PSF_RECONV_{key}", - ncf_data["g1_err_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_ERR_PSF_RECONV_{key}", - ncf_data["g2_err_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_PSF_RECONV_{key}", - ncf_data["T_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_ERR_PSF_RECONV_{key}", - ncf_data["T_err_psf_reconv"][ind[0]], idx - ) + # Original image PSF (average_original_psf) and metacal + # reconvolution kernel (average_multiepoch_psf). Both PSF + # families share ONE write template, so the FITS column + # name (``{obj}``) and the res-key it reads (``{family}``) + # are generated from the same pair and cannot drift apart. + for family, obj in ( + ("orig", "PSF_ORIG"), + ("reconv", "PSF_RECONV"), + ): + self._add2dict( + f"{prefix}_G1_{obj}_{key}", + ncf_data[f"g1_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_{obj}_{key}", + ncf_data[f"g2_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_{obj}_{key}", + ncf_data[f"g1_err_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_{obj}_{key}", + ncf_data[f"g2_err_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_{obj}_{key}", + ncf_data[f"T_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_{obj}_{key}", + ncf_data[f"T_err_psf_{family}"][ind[0]], idx + ) self._add2dict( f"NGMIX{m}_MCAL_FLAGS", From ade9cf323cf065a86fa527b205b1424b2101eca2 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:01:05 +0200 Subject: [PATCH 15/30] docs(CLAUDE): PYTHONPATH-shadow recipe for testing a library build in-container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document how to run the container against a different pure-Python library build (a worktree, an unreleased branch, this repo's own src/ against a stale image) by prepending the host checkout to PYTHONPATH — the local-testing counterpart of a git-ref dependency like `cs_util @ develop`. Includes the -o addopts='' tip for when a pytest plugin the config references isn't in the image. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- CLAUDE.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index c80029b1..aaec7943 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,6 +36,30 @@ way to get all of that is the container. sandbox with a host clone of the repo bind-mounted in and `pip install -e` pointed at it, so edits on the host are live inside the container. +**Shadow a library build with `PYTHONPATH` (no image rebuild).** When you need +the container to run a *different* build of a pure-Python library than the one +baked into its venv — a feature worktree, an unreleased branch, this repo's own +`src/` against a stale image — prepend the host checkout to `PYTHONPATH`. Python +resolves the prepended path first, so the on-disk version shadows +`/app/.venv/...` without touching the image. This is the local-testing +counterpart of a git-ref dependency (e.g. `cs_util @ develop` in +`pyproject.toml`): the dep change makes CI build the right version; the shadow +lets you test that version *now*, before any rebuild. The recipe: + +```bash +apptainer exec --bind /n17data,/automnt bash -c \ + "cd && \ + PYTHONPATH=/path/to/libfoo-checkout:/src \ + python -m pytest -o addopts='' -q" +``` + +Notes: the checkout path is the **parent** of the importable package dir (the +dir containing `foo/`, not `foo/` itself); list several `:`-separated to stack +shadows; `-o addopts=''` clears `pyproject.toml`'s pytest defaults when a plugin +they reference (e.g. `pytest-cov`) isn't in the image. Use this for a quick +verify; land the real fix as the `pyproject.toml` / `uv.lock` dep change so CI +and the next image agree. + **Testing container changes: build remotely, pull locally.** Don't `apptainer build` images on a cluster — quotas are tight and the build is slow. The loop for any change to `Dockerfile` / `pyproject.toml` / `uv.lock` is: edit From 98e822ad953a3c3b5a785c8a3691e07b7fd47593 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:22:40 +0200 Subject: [PATCH 16/30] fix(hsm): store true e-type distortion in E*_HSM columns, not g-type shear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SCIENCE-AFFECTING — the one value change in the column-grammar refactor. The HSM star/PSF moment columns are named E1/E2_*_HSM (e-type, the distortion convention the star/PSF residual ρ/τ statistics expect), but the producers were storing galsim's reduced shear `observed_shape.g1/.g2` (g-type). Name and value disagreed. This swaps the five producer sites to `observed_shape.e1/.e2` so the stored value is the true e-type distortion (|e| = 2|g|/(1+|g|²)), matching the column name. Sites (galsim.Shear.e1/.e2, distortion): psfex_interp.py _get_psfshapes (325-326), _get_starshapes (440-441) mccd_interpolation_script.py PSF (222-223), STAR (280-281) shapepipe_auxiliary_mccd.py PSF (421-422) Closes Sacha's review point (#761): HSM is e-type, ngmix is g-type — name and value must agree. Downstream consequence: ρ/τ-statistic numbers change. Fabian /Axel to confirm the e-type convention is what the residual stats want. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- .../modules/mccd_package/mccd_interpolation_script.py | 8 ++++---- .../modules/mccd_package/shapepipe_auxiliary_mccd.py | 4 ++-- .../modules/psfex_interp_package/psfex_interp.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index d6625bc3..4626ff25 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -219,8 +219,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -277,8 +277,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 44b7cf55..cc38b407 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -418,8 +418,8 @@ def mccd_interpolation_pipeline( PSF_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 956092f2..2f1a2db8 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -322,8 +322,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -437,8 +437,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] From 4b17f7ec769306f0e3fd8a586286cd3547e10f88 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:47:25 +0200 Subject: [PATCH 17/30] refactor(hsm): one column grammar for HSM star/PSF moments, size stored as T Rename every HSM moment column to the ShapePipe-v2 grammar ESTIMATOR_COMPONENT_OBJECT and store size as T = 2*sigma^2 (an area) sourced from cs_util.size, across the PSFEx and MCCD producers and their consumers: E1/E2_PSF_HSM -> HSM_E1/E2_PSF E1/E2_STAR_HSM -> HSM_E1/E2_STAR SIGMA_PSF/STAR_HSM -> HSM_T_PSF / HSM_T_STAR (value now T, not sigma) FLAG_PSF/STAR_HSM -> HSM_FLAG_PSF / HSM_FLAG_STAR Size is converted at the earliest honest point: the producers (psfex_interp, mccd_interpolation_script, shapepipe_auxiliary_mccd) store cs_size.sigma_to_T under the HSM_T_* keys -- in the FITS columns AND the SHAPES SqliteDict -- so every consumer reads T straight through. The hand-coded conversions in the consumers drop out: merge_starcat's np.sqrt(size) / **2 and mccd_plot_utilities' 2*sigma^2 arithmetic become plain HSM_T_* reads. One intended numeric change: the merge_starcat MCCD branch derives its output size from the external PSF_MOM_LIST/STAR_MOM_LIST[:,2] (a sigma), now via cs_size.sigma_to_T -- so that validation catalogue's size column changes from sigma^2 to T = 2*sigma^2. Extends test_psfex_interp with an HSM e-type property test (elliptical-PSF fixture, since e vs g vanishes on round shapes) and a producer test pinning the new grammar + T routing. Downstream consumers (sp_validation) break on both the rename and the sigma->T semantics; that ripple is absorbed by the separate #205 PR. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- .../mccd_package/mccd_interpolation_script.py | 37 ++--- .../mccd_package/mccd_plot_utilities.py | 18 +-- .../mccd_package/shapepipe_auxiliary_mccd.py | 9 +- .../merge_starcat_package/merge_starcat.py | 67 +++++---- .../psfex_interp_package/psfex_interp.py | 37 ++--- tests/module/test_psfex_interp.py | 141 ++++++++++++++++++ 6 files changed, 228 insertions(+), 81 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index 4626ff25..ddce0194 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -10,6 +10,7 @@ import mccd import numpy as np +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -243,10 +244,10 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } else: data = {"VIGNET": self.interp_PSFs} @@ -339,14 +340,14 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), - "E1_STAR_HSM": self.star_shapes[:, 0], - "E2_STAR_HSM": self.star_shapes[:, 1], - "SIGMA_STAR_HSM": self.star_shapes[:, 2], - "FLAG_STAR_HSM": self.star_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), + "HSM_E1_STAR": self.star_shapes[:, 0], + "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), + "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } data = {**data, **star_dict} @@ -536,16 +537,16 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["E1_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["E2_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E2_PSF"] = final_list[j][2][ where_res[0] ][1] - shape_dict["SIGMA_PSF_HSM"] = final_list[j][2][ - where_res[0] - ][2] - shape_dict["FLAG_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( + final_list[j][2][where_res[0]][2] + ) + shape_dict["HSM_FLAG_PSF"] = final_list[j][2][ where_res[0] ][3] output_dict[id_tmp][final_list[j][3][where_res[0]]][ diff --git a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py index cc20b651..1cf33ff8 100644 --- a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py +++ b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py @@ -254,23 +254,23 @@ def plot_meanshapes( ) # Flag mask - star_flags = starcat[hdu_no].data["FLAG_STAR_HSM"] - psf_flags = starcat[hdu_no].data["FLAG_PSF_HSM"] + star_flags = starcat[hdu_no].data["HSM_FLAG_STAR"] + psf_flags = starcat[hdu_no].data["HSM_FLAG_PSF"] flagmask = np.abs(star_flags - 1) * np.abs(psf_flags - 1) - # convert sigma to R^2's + # size column already holds T = 2 sigma^2 all_star_shapes = np.array( [ - starcat[hdu_no].data["E1_STAR_HSM"], - starcat[hdu_no].data["E2_STAR_HSM"], - 2.0 * starcat[hdu_no].data["SIGMA_STAR_HSM"] ** 2, + starcat[hdu_no].data["HSM_E1_STAR"], + starcat[hdu_no].data["HSM_E2_STAR"], + starcat[hdu_no].data["HSM_T_STAR"], ] ) all_psf_shapes = np.array( [ - starcat[hdu_no].data["E1_PSF_HSM"], - starcat[hdu_no].data["E2_PSF_HSM"], - 2.0 * starcat[hdu_no].data["SIGMA_PSF_HSM"] ** 2, + starcat[hdu_no].data["HSM_E1_PSF"], + starcat[hdu_no].data["HSM_E2_PSF"], + starcat[hdu_no].data["HSM_T_PSF"], ] ) all_CCDs = starcat[hdu_no].data["CCD_NB"] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index cc38b407..99dea6b9 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -14,6 +14,7 @@ import mccd import numpy as np from astropy.io import fits +from cs_util import size as cs_size from shapepipe.pipeline import file_io @@ -471,10 +472,10 @@ def shapepipe_write_output( if get_shapes: data = { "VIGNET": interp_PSFs, - "E1_PSF_HSM": PSF_shapes[:, 0], - "E2_PSF_HSM": PSF_shapes[:, 1], - "SIGMA_PSF_HSM": PSF_shapes[:, 2], - "FLAG_PSF_HSM": PSF_shapes[:, 3].astype(int), + "HSM_E1_PSF": PSF_shapes[:, 0], + "HSM_E2_PSF": PSF_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(PSF_shapes[:, 2]), + "HSM_FLAG_PSF": PSF_shapes[:, 3].astype(int), } else: data = {"VIGNET": interp_PSFs} diff --git a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py index 3b06cd07..c5d3eccd 100644 --- a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py +++ b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py @@ -12,6 +12,7 @@ import numpy as np from astropy.io import fits +from cs_util import size as cs_size from shapepipe.pipeline import file_io @@ -344,7 +345,7 @@ def process(self): ) ) - # shapes (convert sigmas to R^2) + # shapes (convert sigmas to T = 2 sigma^2) g1_psf += list( starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 0] ) @@ -352,12 +353,16 @@ def process(self): starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 1] ) size_psf += list( - starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 2] ** 2 + cs_size.sigma_to_T( + starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 2] + ) ) g1 += list(starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 0]) g2 += list(starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 1]) size += list( - starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 2] ** 2 + cs_size.sigma_to_T( + starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 2] + ) ) # flags @@ -479,21 +484,20 @@ def process(self): SEx_catalogue=True, ) - # Collect columns - # convert back to sigma for consistency + # Collect columns (size stored as T = 2 sigma^2) data = { "X": x, "Y": y, "RA": ra, "DEC": dec, - "E1_PSF_HSM": g1_psf, - "E2_PSF_HSM": g2_psf, - "SIGMA_PSF_HSM": np.sqrt(size_psf), - "E1_STAR_HSM": g1, - "E2_STAR_HSM": g2, - "SIGMA_STAR_HSM": np.sqrt(size), - "FLAG_PSF_HSM": flag_psf, - "FLAG_STAR_HSM": flag_star, + "HSM_E1_PSF": g1_psf, + "HSM_E2_PSF": g2_psf, + "HSM_T_PSF": size_psf, + "HSM_E1_STAR": g1, + "HSM_E2_STAR": g2, + "HSM_T_STAR": size, + "HSM_FLAG_PSF": flag_psf, + "HSM_FLAG_STAR": flag_star, "CCD_NB": ccd_nb, } @@ -580,17 +584,17 @@ def process(self): ra += list(data_j["RA"]) dec += list(data_j["DEC"]) - # shapes (convert sigmas to R^2) - g1_psf += list(data_j["E1_PSF_HSM"]) - g2_psf += list(data_j["E2_PSF_HSM"]) - size_psf += list(data_j["SIGMA_PSF_HSM"] ** 2) - g1 += list(data_j["E1_STAR_HSM"]) - g2 += list(data_j["E2_STAR_HSM"]) - size += list(data_j["SIGMA_STAR_HSM"] ** 2) + # shapes (size column already holds T = 2 sigma^2) + g1_psf += list(data_j["HSM_E1_PSF"]) + g2_psf += list(data_j["HSM_E2_PSF"]) + size_psf += list(data_j["HSM_T_PSF"]) + g1 += list(data_j["HSM_E1_STAR"]) + g2 += list(data_j["HSM_E2_STAR"]) + size += list(data_j["HSM_T_STAR"]) # flags - flag_psf += list(data_j["FLAG_PSF_HSM"]) - flag_star += list(data_j["FLAG_STAR_HSM"]) + flag_psf += list(data_j["HSM_FLAG_PSF"]) + flag_star += list(data_j["HSM_FLAG_STAR"]) # misc @@ -623,21 +627,20 @@ def process(self): SEx_catalogue=False, ) - # Collect columns - # convert back to sigma for consistency + # Collect columns (size stored as T = 2 sigma^2) data = { "X": x, "Y": y, "RA": ra, "DEC": dec, - "E1_PSF_HSM": g1_psf, - "E2_PSF_HSM": g2_psf, - "SIGMA_PSF_HSM": np.sqrt(size_psf), - "E1_STAR_HSM": g1, - "E2_STAR_HSM": g2, - "SIGMA_STAR_HSM": np.sqrt(size), - "FLAG_PSF_HSM": flag_psf, - "FLAG_STAR_HSM": flag_star, + "HSM_E1_PSF": g1_psf, + "HSM_E2_PSF": g2_psf, + "HSM_T_PSF": size_psf, + "HSM_E1_STAR": g1, + "HSM_E2_STAR": g2, + "HSM_T_STAR": size, + "HSM_FLAG_PSF": flag_psf, + "HSM_FLAG_STAR": flag_star, "MAG": mag, "SNR": snr, "ACCEPTED": psfex_acc, diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 2f1a2db8..7428281d 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -11,6 +11,7 @@ import numpy as np from astropy.io import fits +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -346,10 +347,10 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } else: data = {"VIGNET": self.interp_PSFs} @@ -499,14 +500,14 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), - "E1_STAR_HSM": self.star_shapes[:, 0], - "E2_STAR_HSM": self.star_shapes[:, 1], - "SIGMA_STAR_HSM": self.star_shapes[:, 2], - "FLAG_STAR_HSM": self.star_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), + "HSM_E1_STAR": self.star_shapes[:, 0], + "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), + "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } data = {**data, **star_dict} @@ -754,16 +755,16 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["E1_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["E2_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E2_PSF"] = final_list[j][2][ where_res[0] ][1] - shape_dict["SIGMA_PSF_HSM"] = final_list[j][2][ - where_res[0] - ][2] - shape_dict["FLAG_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( + final_list[j][2][where_res[0]][2] + ) + shape_dict["HSM_FLAG_PSF"] = final_list[j][2][ where_res[0] ][3] output_dict[id_tmp][final_list[j][3][where_res[0]]][ diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index 34da52e0..cb644875 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -2,11 +2,15 @@ from types import SimpleNamespace +import galsim from astropy.io import fits import numpy as np import numpy.testing as npt import pytest +from hypothesis import assume, given, settings +from hypothesis import strategies as st +from shapepipe.pipeline import file_io from shapepipe.modules.psfex_interp_package.psfex_interp import ( PSFExInterpolator, ) @@ -32,6 +36,45 @@ def _write_galaxy_catalogue(path, **columns): ).writeto(path) +def _elliptical_psf_stamps(shears, sigma=3.0, stamp=51, scale=1.0): + """Draw elliptical Gaussian PSF stamps with given e-type distortions. + + Each ``(e1, e2)`` is applied as a galsim *distortion* (``shear(e1=, e2=)``) + so the stamp is genuinely non-round; round stamps make ``e == g == 0`` and + the e/g grammar fix becomes invisible. + """ + return np.array( + [ + galsim.Gaussian(sigma=sigma) + .shear(e1=e1, e2=e2) + .drawImage(nx=stamp, ny=stamp, scale=scale, method="no_pixel") + .array + for e1, e2 in shears + ] + ) + + +@pytest.fixture(scope="module") +def psfex_interpolator(tmp_path_factory): + """A shape-computing interpolator backed by a minimal galaxy catalogue. + + Module-scoped so it can be driven by the hypothesis property test without + tripping the function-scoped-fixture health check; ``_get_psfshapes`` only + reads ``interp_PSFs`` and overwrites ``psf_shapes``, so reuse is safe. + """ + cat_path = tmp_path_factory.mktemp("psfex") / "galaxies.fits" + _write_galaxy_catalogue(cat_path, X_IMAGE=[1.5, 2.5], Y_IMAGE=[3.5, 4.5]) + return PSFExInterpolator( + dotpsf_path=None, + galcat_path=str(cat_path), + output_path=str(cat_path.parent), + img_number="001", + w_log=SimpleNamespace(info=lambda message: None), + pos_params=["X_IMAGE", "Y_IMAGE"], + get_shapes=True, + ) + + def test_get_galaxy_positions_reads_requested_columns(tmp_path): cat_path = tmp_path / "galaxies.fits" @@ -66,3 +109,101 @@ def test_get_galaxy_positions_raises_for_missing_column(tmp_path): with pytest.raises(KeyError, match="Y_IMAGE"): interp._get_galaxy_positions() + + +# Distortion magnitude well inside |e| < 1, large enough that the e-type and +# g-type representations of the same shape differ by far more than fit noise +# (so the property has teeth), any orientation. +_e_complex = st.complex_numbers( + min_magnitude=0.05, max_magnitude=0.6, allow_nan=False, allow_infinity=False +) + + +@settings(deadline=None, max_examples=25) +@given(e=_e_complex) +def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): + """The stored HSM PSF shapes are e-type distortion, not g-type shear. + + ``_get_psfshapes`` reads ``moms.observed_shape.e1/.e2``; this pins it to the + galsim *distortion* accessor and away from the reduced-shear ``.g1/.g2``. + The check is exact against the same image's ``observed_shape`` (independent + of how well HSM recovers the input), and the magnitude inequality is the + tooth: for any non-round shape ``|e| > |g|`` strictly, so storing ``.g`` + instead (the pre-fix bug) would fail it. + """ + psfex_interpolator.interp_PSFs = _elliptical_psf_stamps([(e.real, e.imag)]) + psfex_interpolator._get_psfshapes() + + moms = galsim.hsm.FindAdaptiveMom( + galsim.Image(psfex_interpolator.interp_PSFs[0]), strict=False + ) + e1, e2 = moms.observed_shape.e1, moms.observed_shape.e2 + g1, g2 = moms.observed_shape.g1, moms.observed_shape.g2 + + mag_e = np.hypot(e1, e2) + mag_g = np.hypot(g1, g2) + assume(mag_e - mag_g > 1e-3) # ensure a non-degenerate e-vs-g distinction + + # Stored values are exactly the e-type distortion components ... + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], e1) + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], e2) + + # ... whose magnitude strictly exceeds the g-type shear it would carry if + # the .g/.e swap regressed. + mag_stored = np.hypot( + psfex_interpolator.psf_shapes[0, 0], psfex_interpolator.psf_shapes[0, 1] + ) + npt.assert_allclose(mag_stored, mag_e) + assert mag_stored > mag_g + 1e-4 + + +def test_write_output_uses_hsm_grammar_and_routes_size_to_T( + tmp_path, monkeypatch +): + """``_write_output`` writes the HSM grammar names and stores T, not sigma. + + Captures the dict handed to ``save_as_fits`` and asserts the frozen + grammar (``HSM_E1_PSF`` / ``HSM_E2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, + no legacy ``*_PSF_HSM``) and that the size column is routed through + ``cs_util.size.sigma_to_T`` (``T = 2 sigma^2``) at the producer. + """ + cat_path = tmp_path / "galaxies.fits" + _write_galaxy_catalogue(cat_path, X_IMAGE=[1.5, 2.5], Y_IMAGE=[3.5, 4.5]) + interp = PSFExInterpolator( + dotpsf_path=None, + galcat_path=str(cat_path), + output_path=str(tmp_path), + img_number="001", + w_log=SimpleNamespace(info=lambda message: None), + pos_params=["X_IMAGE", "Y_IMAGE"], + get_shapes=True, + ) + interp.interp_PSFs = _elliptical_psf_stamps([(0.3, 0.1), (-0.2, 0.25)]) + interp._get_psfshapes() + + captured = {} + + def _capture(self, data=None, **kwargs): + captured["data"] = data + + monkeypatch.setattr(file_io.FITSCatalogue, "save_as_fits", _capture) + + interp._write_output() + data = captured["data"] + + assert { + "VIGNET", + "HSM_E1_PSF", + "HSM_E2_PSF", + "HSM_T_PSF", + "HSM_FLAG_PSF", + } <= set(data) + for legacy in ("E1_PSF_HSM", "E2_PSF_HSM", "SIGMA_PSF_HSM", "FLAG_PSF_HSM"): + assert legacy not in data + + # e-type distortion stored straight through from _get_psfshapes ... + npt.assert_array_equal(data["HSM_E1_PSF"], interp.psf_shapes[:, 0]) + npt.assert_array_equal(data["HSM_E2_PSF"], interp.psf_shapes[:, 1]) + # ... and the size column is T = 2 sigma^2 (sigma_to_T), not raw sigma. + npt.assert_allclose(data["HSM_T_PSF"], 2.0 * interp.psf_shapes[:, 2] ** 2) + assert not np.allclose(data["HSM_T_PSF"], interp.psf_shapes[:, 2]) From 85a18ab558e445d120f5bb09bab56ce7eb66801d Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:47:44 +0200 Subject: [PATCH 18/30] refactor: remove the spread_model pipeline module (unused, never cut on) Axel's review point #2: spread_model must never be cut on -- and it isn't used as a pipeline step. Delete the module and unwire it: - delete spread_model_package/ and spread_model_runner.py (only the runner imported the package, so the removal is self-contained) - make_cat_runner: drop spread_model_runner from the input modules, drop the sexcat_sm input pattern/ext and the SM_* config, remove the save_sm_data() call, and collapse the now-dead 3/4/5-input branching (the len==5 galsim-dual-shape path was dormant -- no galsim_shapes_runner exists) - example configs, module docstring, pipeline tutorial: drop spread_model The make_cat-side writer (save_sm_data) and its SPREAD_* columns are removed in the column-grammar commit that follows. summary_params_pre_v2.py keeps its historical v1 runner list untouched. Downstream, sp_validation cuts on SPREAD_MODEL (classification_galaxy_base, do_spread_model default True) -- that paired change rides the separate #205 PR. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- docs/source/pipeline_tutorial.md | 11 +- example/cfis/config_make_cat_mccd.ini | 13 +- example/cfis/config_make_cat_psfex.ini | 10 +- example/cfis/config_make_cat_psfex_nosm.ini | 2 - .../modules/make_cat_package/__init__.py | 20 +- src/shapepipe/modules/make_cat_runner.py | 69 +--- .../modules/spread_model_package/__init__.py | 36 -- .../spread_model_package/spread_model.py | 309 ------------------ src/shapepipe/modules/spread_model_runner.py | 69 ---- 9 files changed, 24 insertions(+), 515 deletions(-) delete mode 100644 src/shapepipe/modules/spread_model_package/__init__.py delete mode 100644 src/shapepipe/modules/spread_model_package/spread_model.py delete mode 100644 src/shapepipe/modules/spread_model_runner.py diff --git a/docs/source/pipeline_tutorial.md b/docs/source/pipeline_tutorial.md index 95a0aa94..5279b5f9 100644 --- a/docs/source/pipeline_tutorial.md +++ b/docs/source/pipeline_tutorial.md @@ -289,16 +289,15 @@ job_sp TILE_ID -j 16 is the selection of galaxies as extended objects compared to the PSF. First, the PSF model is interpolated to galaxy positions, according to the PSF model with `psfex_interp` or `mccd_interp`. Next, postage stamps around galaxies -of the weights maps are created via `vignetmaker`. Then, the spread model -is computed by the `spread_model` module. Finally, postage stamps +of the weights maps are created via `vignetmaker`. Finally, postage stamps around galaxies of single-exposure data is extracted with another call to `vignetmaker`. The output directory is -- `run_sp_MiViSmVi` if the PSF model is `mccd`; -- `run_sp_tile_PsViSmVi` for the `PSFEx` PSF model. +- `run_sp_MiViVi` if the PSF model is `mccd`; +- `run_sp_tile_PsViVi` for the `PSFEx` PSF model. -This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), `spread_model` (`Sm`), and the +This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), and the second call to `vignetmaker` (`Vi`). @@ -325,7 +324,7 @@ job_sp TILE_ID -j 64 This task first merges the `NJOB` parallel `ngmix` output files from the previous step into one output file. Then, previously obtained information are pasted into a _final_ shape catalogue via `make_cat`. Included are galaxy detection and basic measurement parameters, the PSF model at -galaxy positions, the spread-model classification, and the shape measurement. +galaxy positions, and the shape measurement. Two output directories are created. The first one is `run_sp_Ms` for the `merge_sep` run. diff --git a/example/cfis/config_make_cat_mccd.ini b/example/cfis/config_make_cat_mccd.ini index 5aa33d27..4ac61823 100644 --- a/example/cfis/config_make_cat_mccd.ini +++ b/example/cfis/config_make_cat_mccd.ini @@ -34,7 +34,7 @@ LOG_NAME = log_sp RUN_LOG_NAME = log_run_sp # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = last:sextractor_runner_run_1, last:spread_model_runner, last:mccd_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = last:sextractor_runner_run_1, last:mccd_interp_runner, last:merge_sep_cats_runner # Output directory OUTPUT_DIR = ./output @@ -56,12 +56,10 @@ TIMEOUT = 96:00:00 # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix -#, galsim +FILE_PATTERN = sexcat, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .fits, .sqlite, .fits -#, .fits +FILE_EXT = .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -69,9 +67,4 @@ FILE_EXT = .fits, .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = True -SM_STAR_THRESH = 0.003 -SM_GAL_THRESH = 0.01 - SHAPE_MEASUREMENT_TYPE = ngmix -#, galsim diff --git a/example/cfis/config_make_cat_psfex.ini b/example/cfis/config_make_cat_psfex.ini index a7407d99..552c578e 100644 --- a/example/cfis/config_make_cat_psfex.ini +++ b/example/cfis/config_make_cat_psfex.ini @@ -55,14 +55,14 @@ TIMEOUT = 96:00:00 [MAKE_CAT_RUNNER] # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:spread_model_runner, last:psfex_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:psfex_interp_runner, last:merge_sep_cats_runner # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix +FILE_PATTERN = sexcat, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .fits, .sqlite, .fits +FILE_EXT = .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -70,8 +70,4 @@ FILE_EXT = .fits, .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = True -SM_STAR_THRESH = 0.003 -SM_GAL_THRESH = 0.01 - SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/example/cfis/config_make_cat_psfex_nosm.ini b/example/cfis/config_make_cat_psfex_nosm.ini index 1983f91c..fc458232 100644 --- a/example/cfis/config_make_cat_psfex_nosm.ini +++ b/example/cfis/config_make_cat_psfex_nosm.ini @@ -71,6 +71,4 @@ FILE_EXT = .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = False - SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 1bd1cee5..84e6c97a 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -7,7 +7,6 @@ :Parent modules: - ``sextractor_runner`` -- ``spread_model_runner`` - ``psfex_interp_runner`` or ``mccd_interp_runner`` - ``ngmix_runner`` @@ -21,27 +20,12 @@ This module creates a *final* catalogue combining the output of various previous module runs. This gathers all relevant information on the measured galaxies for weak-lensing post-processing. This includes galaxy detection and -basic measurement parameters, the PSF model at galaxy positions, the -spread-model classification, and the shape measurement. +basic measurement parameters, the PSF model at galaxy positions, and the +shape measurement. Module-specific config file entries =================================== -SM_DO_CLASSIFICATION : bool, optional - Adds spread-model star/galaxy classification flag as the column - ``SPREAD_CLASS`` to the output if ``True`` -SM_STAR_THRESH : float, optional - Threshold :math:`s_{\rm star, thresh}` for star selection; object is - classified as a star if - :math:`|x s + 2 \sigma_s | < s_{\textrm{star, thresh}}` - where :math:`s` is the spread model and :math:`\sigma_s` is the spread - model error; default value is ``0.003`` -SM_GAL_THRESH : float, optional - Threshold :math:`s_{\rm gal, thresh}` for galaxy selection; object is - classified as a galaxy if - :math:`s + 2 \sigma_s > s_{\textrm{gal, thresh}}` where :math:`s` is the - spread model and :math:`\sigma_s` is the spread model error; default value - is ``0.01`` SHAPE_MEASUREMENT_TYPE : list Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` SAVE_PSF_DATA : bool, optional diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index fa1fc43f..6ab6449d 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -16,17 +16,15 @@ version="1.1", input_module=[ "sextractor_runner", - "spread_model_runner", "psfex_interp_runner", "ngmix_runner", ], file_pattern=[ "tile_sexcat", - "sexcat_sm", "galaxy_psf", "ngmix", ], - file_ext=[".fits", ".fits", ".sqlite", ".fits"], + file_ext=[".fits", ".sqlite", ".fits"], depends=["numpy", "sqlitedict"], ) def make_cat_runner( @@ -39,37 +37,11 @@ def make_cat_runner( ): """Define The Make Catalogue Runner.""" # Set input file paths - if len(input_file_list) == 3: - # No spread model input - ( - tile_sexcat_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list - sexcat_sm_path = None - else: - # With spread model input - ( - tile_sexcat_path, - sexcat_sm_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list[0:4] - if len(input_file_list) == 5: - # With second shape catalogue input - shape2_cat_path = input_file_list[4] - - # Fetch classification options - do_classif = config.getboolean( - module_config_sec, - "SM_DO_CLASSIFICATION", - ) - if do_classif: - star_thresh = config.getfloat(module_config_sec, "SM_STAR_THRESH") - gal_thresh = config.getfloat(module_config_sec, "SM_GAL_THRESH") - else: - star_thresh = None - gal_thresh = None + ( + tile_sexcat_path, + galaxy_psf_path, + shape1_cat_path, + ) = input_file_list # Fetch shape measurement type shape_type_list = config.getlist( @@ -99,35 +71,16 @@ def make_cat_runner( n_obj = make_cat.save_sextractor_data(final_cat_file, tile_sexcat_path) cat_size_sextractor = n_obj - # Save spread-model data - if sexcat_sm_path is None: - w_log.info("No sm cat input, setting spread model to 99") - else: - w_log.info("Save spread-model data") - cat_size_sm = make_cat.save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif, - star_thresh, - gal_thresh, - n_obj=n_obj - ) - - if cat_size_sextractor != cat_size_sm: - w_log( - f"Warnign: SExtractor catalogue {tile_sexcat_path} has different size" - + f" ({cat_size_sextractor} than spread_model catalogue" - + f" {sexcat_sm_path} ({cat_size_sm})" - ) - # Save shape data sc_inst = make_cat.SaveCatalogue(final_cat_file, cat_size_sextractor, w_log) w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - cat_path = ( - shape2_cat_path if shape_type == "galsim" else shape1_cat_path - ) + # A second, galsim-specific shape catalogue path (shape2_cat_path) + # was never produced by any runner in this pipeline (no + # galsim_shapes_runner exists) — galsim shapes are read from the + # same input as ngmix (shape1_cat_path). + cat_path = shape1_cat_path err_msg = sc_inst.process(shape_type.lower(), cat_path) diff --git a/src/shapepipe/modules/spread_model_package/__init__.py b/src/shapepipe/modules/spread_model_package/__init__.py deleted file mode 100644 index 336283e7..00000000 --- a/src/shapepipe/modules/spread_model_package/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -"""SPREAD MODEL PACKAGE. - -This package contains the module for ``spread_model``. - -:Author: Axel Guinot - -:Parent modules: - -- ``sextractor_runner`` -- ``psfex_interp_runner`` or ``mccd_interp_runner`` - and ``vignetmaker_runner`` - -:Input: SExtractor output catalogue, PSFEx output catalogue and vignet files - -:Output: Updated SExtractor or new FITS file - -Description -=========== - -This module refines the galaxy sample that will be used for shape measurement -using the spread model method of :cite:`desai:12` and :cite:`mohr:12`. - -Module-specific config file entries -=================================== - -PREFIX : str, optional - Ouput file prefix -PIXEL_SCALE : float - Pixel scale in arcseconds -OUTPUT_MODE : str - Output mode, options are ``add`` to add the outputs to the input - SExtractor catalogue or ``new`` to generte a new output catalogue - -""" - -__all__ = ["spread_model"] diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py deleted file mode 100644 index 1461ecce..00000000 --- a/src/shapepipe/modules/spread_model_package/spread_model.py +++ /dev/null @@ -1,309 +0,0 @@ -"""SPREAD MODEL. - -Class to compute the spread model, criterion to select galaxies - -:Author: Axel Guinot - -""" - -import galsim -import numpy as np -from cs_util import size as cs_size -from sqlitedict import SqliteDict - -from shapepipe.pipeline import file_io - - -def get_sm(obj_vign, psf_vign, model_vign, weight_vign): - """Get Spread Model. - - This method compute the spread moel for an object. - - Parameters - ---------- - obj_vign : numpy.ndarray - Vignet of the object - psf_vign : numpy.ndarray - Vignet of the gaussian model of the PSF - model_vign : numpy.ndarray - Vignet of the galaxy model - weight_vign : numpy.ndarray - Vignet of the weight at the object position - - Returns - ------- - tuple - Spread model and corresponding error values - - """ - # Mask invalid pixels - m = (obj_vign > -1e29) & (weight_vign > 0) - w = m.astype(float) - - # Set noise as inverse weight - noise_v = (1 / weight_vign).ravel() - - # Remove infinite noise pixels - noise_v[np.isinf(noise_v)] = 0 - - # Transform 2D vignets to 1D vectors - t_v = model_vign.ravel() - g_v = obj_vign.ravel() - psf_v = psf_vign.ravel() - w_v = w.ravel() - - # Compute scalar products used in spread model - tg = np.sum(t_v * w_v * g_v) - pg = np.sum(psf_v * w_v * g_v) - tp = np.sum(t_v * w_v * psf_v) - pp = np.sum(psf_v * w_v * psf_v) - - tnt = np.sum(t_v * noise_v * t_v * w_v) - pnp = np.sum(psf_v * noise_v * psf_v * w_v) - tnp = np.sum(t_v * noise_v * psf_v * w_v) - err = tnt * pg**2 + pnp * tg**2 - 2 * tnp * pg * tg - - # Compute spread model - if pg > 0: - sm = (tg / pg) - (tp / pp) - else: - sm = 1 - - if (pg > 0) & (err > 0): - sm_err = np.sqrt(err) / pg**2 - else: - sm_err = 1 - - return sm, sm_err - - -def get_model(sigma, flux, img_shape, pixel_scale=0.186): - """Get Model. - - This method computes - - an exponential galaxy model with scale radius = 1/16 FWHM - - a Gaussian model for the PSF - - Parameters - ---------- - sigma : float - Sigma of the PSF (in pixel units) - flux : float - Flux of the galaxy for the model - img_shape : list - Size of the output vignet ``[xsize, ysize]`` - pixel_scale : float, optional - Pixel scale to use for the model (in arcsec); default is ``0.186`` - - Returns - ------- - tuple - Vignet of the galaxy model and of the PSF model - - """ - # Get scale radius - scale_radius = 1 / 16 * cs_size.sigma_to_fwhm(sigma) * pixel_scale - - # Get galaxy model - gal_obj = galsim.Exponential(scale_radius=scale_radius, flux=flux) - - # Get PSF - psf_obj = galsim.Gaussian(sigma=sigma * pixel_scale) - - # Convolve both - gal_obj = galsim.Convolve(gal_obj, psf_obj) - - # Draw galaxy and PSF on vignets - gal_vign = gal_obj.drawImage( - nx=img_shape[0], ny=img_shape[1], scale=pixel_scale - ).array - - psf_vign = psf_obj.drawImage( - nx=img_shape[0], ny=img_shape[1], scale=pixel_scale - ).array - - return gal_vign, psf_vign - - -class SpreadModel(object): - """The Spread Model Class. - - Parameters - ---------- - sex_cat_path : str - Path to SExtractor catalogue - psf_cat_path : str - Path to PSF catalogue - weight_cat_path : str - Path to weight catalogue - output_path : str - Output file path of pasted catalog - pixel_scale : float - Pixel scale in arcsec - output_mode : str - Options are ``new`` or ``add`` - - Notes - ----- - For the ``output_mode``: - - - ``new`` will create a new catalogue with - ``[number, mag, sm, sm_err]`` - - ``add`` will output a copy of the input SExtractor with the columns - ``sm`` and ``sm_err`` - - """ - - def __init__( - self, - sex_cat_path, - psf_cat_path, - weight_cat_path, - output_path, - pixel_scale, - output_mode, - ): - - self._sex_cat_path = sex_cat_path - self._psf_cat_path = psf_cat_path - self._weight_cat_path = weight_cat_path - self._output_path = output_path - self._pixel_scale = pixel_scale - self._output_mode = output_mode - - def process(self): - """Process. - - Process the spread model computation - - """ - # Get data - sex_cat = file_io.FITSCatalogue(self._sex_cat_path, SEx_catalogue=True) - sex_cat.open() - obj_id = np.copy(sex_cat.get_data()["NUMBER"]) - obj_vign = np.copy(sex_cat.get_data()["VIGNET"]) - obj_mag = None - if self._output_mode == "new": - obj_mag = np.copy(sex_cat.get_data()["MAG_AUTO"]) - sex_cat.close() - - psf_cat = SqliteDict(self._psf_cat_path) - - weight_cat = file_io.FITSCatalogue( - self._weight_cat_path, - SEx_catalogue=True, - ) - weight_cat.open() - weigh_vign = weight_cat.get_data()["VIGNET"] - weight_cat.close() - - # Get spread model - skip_obj = False - spread_model_final = [] - spread_model_err_final = [] - for idx, id_tmp in enumerate(obj_id): - sigma_list = [] - - if psf_cat[str(id_tmp)] == "empty": - spread_model_final.append(-1) - spread_model_err_final.append(1) - continue - - psf_expccd_name = list(psf_cat[str(id_tmp)].keys()) - - for expccd_name_tmp in psf_expccd_name: - psf_cat_id_ccd = psf_cat[str(id_tmp)][expccd_name_tmp] - sigma_list.append(psf_cat_id_ccd["SHAPES"]["SIGMA_PSF_HSM"]) - - obj_sigma_tmp = np.mean(sigma_list) - if obj_sigma_tmp > 0: - obj_vign_tmp = obj_vign[idx] - obj_flux_tmp = 1.0 - obj_weight_tmp = weigh_vign[idx] - obj_model_tmp, obj_psf_tmp = get_model( - obj_sigma_tmp, - obj_flux_tmp, - obj_vign_tmp.shape, - self._pixel_scale, - ) - - obj_sm, obj_sm_err = get_sm( - obj_vign_tmp, obj_psf_tmp, obj_model_tmp, obj_weight_tmp - ) - else: - # size < 0, something is not right with this object - obj_sm, obj_sm_err = -1.0, -1.0 - - spread_model_final.append(obj_sm) - spread_model_err_final.append(obj_sm_err) - - spread_model_final = np.array(spread_model_final, dtype="float64") - spread_model_err_final = np.array( - spread_model_err_final, - dtype="float64", - ) - - psf_cat.close() - - self.save_results( - spread_model_final, spread_model_err_final, obj_mag, obj_id - ) - - def save_results(self, sm, sm_err, mag, number): - """Save Results. - - Save output catalogue with spread model and errors. - - Parameters - ---------- - sm : numpy.ndarray - Value of the spread model for all objects - sm_err : numpy.ndarray - Value of the spread model error for all objects - mag : numpy.ndarray - Magnitude of all objects (only for a new catalogue) - number : numpy.ndarray - ID of all objects (only for a new catalogue) - - Raises - ------ - ValueError - For incorrect output mode - - """ - if self._output_mode == "new": - new_cat = file_io.FITSCatalogue( - self._output_path, - SEx_catalogue=True, - open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, - ) - dict_data = { - "NUMBER": number, - "MAG": mag, - "SPREAD_MODEL": sm, - "SPREADERR_MODEL": sm_err, - } - new_cat.save_as_fits( - data=dict_data, sex_cat_path=self._sex_cat_path - ) - elif self._output_mode == "add": - ori_cat = file_io.FITSCatalogue( - self._sex_cat_path, - SEx_catalogue=True, - ) - ori_cat.open() - new_cat = file_io.FITSCatalogue( - self._output_path, - SEx_catalogue=True, - open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, - ) - ori_cat.add_col( - "SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat - ) - ori_cat.add_col("SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat) - ori_cat.close() - new_cat.open() - new_cat.add_col("SPREADERR_MODEL", sm_err) - new_cat.close() - else: - raise ValueError("Mode must be in [new, add].") diff --git a/src/shapepipe/modules/spread_model_runner.py b/src/shapepipe/modules/spread_model_runner.py deleted file mode 100644 index 16c9a2d3..00000000 --- a/src/shapepipe/modules/spread_model_runner.py +++ /dev/null @@ -1,69 +0,0 @@ -"""SPREAD MODEL RUNNER. - -Module runner for ``spread_model``. - -:Author: Axel Guinot - -""" - -from shapepipe.modules.module_decorator import module_runner -from shapepipe.modules.spread_model_package.spread_model import SpreadModel - - -@module_runner( - version="1.1", - input_module=[ - "sextractor_runner", - "psfex_interp_runner", - "vignetmaker_runner", - ], - file_pattern=["sexcat", "galaxy_psf", "weight_vign"], - file_ext=[".fits", ".sqlite", ".fits"], - depends=["numpy", "galsim"], - run_method="parallel", -) -def spread_model_runner( - input_file_list, - run_dirs, - file_number_string, - config, - module_config_sec, - w_log, -): - """Define The Spread Model Runner.""" - # Get input files - sex_cat_path, psf_cat_path, weight_cat_path = input_file_list - - # Get file prefix (optional) - if config.has_option(module_config_sec, "PREFIX"): - prefix = config.get(module_config_sec, "PREFIX") - if (prefix.lower() != "none") & (prefix != ""): - prefix = prefix + "_" - else: - prefix = "" - else: - prefix = "" - - # Get pixel scale and output mode - pixel_scale = config.getfloat(module_config_sec, "PIXEL_SCALE") - output_mode = config.get(module_config_sec, "OUTPUT_MODE") - - # Set output file path - file_name = f"{prefix}sexcat_sm{file_number_string}.fits" - output_path = f'{run_dirs["output"]}/{file_name}' - - # Create spread model class instance - sm_inst = SpreadModel( - sex_cat_path, - psf_cat_path, - weight_cat_path, - output_path, - pixel_scale, - output_mode, - ) - - # Process spread model computation - sm_inst.process() - - # No return objects - return None, None From 4f0756d0e60d343ecfba18582f37cff649661804 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:48:04 +0200 Subject: [PATCH 19/30] refactor(make_cat): one column grammar for ngmix, galsim & HSM-sink columns Apply the ShapePipe-v2 grammar (ESTIMATOR_COMPONENT_OBJECT, uppercase; galaxy the implicit default object with no token; ellipticity split into named scalar G1/G2 for g-type and E1/E2 for e-type; one stored size T = 2*sigma^2 via cs_util.size) to every column make_cat writes, and drop the spread_model writer: ngmix galaxy: NGMIX_*_GAL_ -> NGMIX_*_ (drop GAL; realigns with what sp_validation already reads, e.g. NGMIX_T_NOSHEAR) galsim: GALSIM_GAL_ELL[_ERR|_UNCORR]_ packed(N,2) -> scalar GALSIM_{G1,G2}[_ERR|_UNCORR]_; GALSIM_PSF_ELL_ -> GALSIM_{G1,G2}_PSF_; GALSIM_*_SIGMA_ -> GALSIM_T[_PSF]_ via cs_size.sigma_to_T. Also fixes a latent NameError (undefined n_obj -> len(self._obj_id)) in this dormant block. HSM sink: _save_psf_data's PSF_ELL_ -> HSM_E1/E2_PSF_, PSF_FWHM_ -> HSM_T_PSF_ (read straight from the SHAPES dict, now T), PSF_FLAG_ -> HSM_FLAG_PSF_ spread_model: delete the save_sm_data writer and its SPREAD_MODEL / SPREADERR_MODEL / SPREAD_CLASS columns Updates the example .param allow-lists (final_cat, cat_matched) and the make_cat /grammar/averaging property tests; adds test_galsim_grammar_properties. ngmix PSF (PSF_ORIG/PSF_RECONV) columns were already grammar-correct. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- example/cfis/final_cat.param | 97 +++---- example/unions_800/cat_matched.param | 97 +++---- .../modules/make_cat_package/make_cat.py | 272 ++++++++---------- .../module/test_galsim_grammar_properties.py | 251 ++++++++++++++++ tests/module/test_make_cat.py | 36 ++- tests/module/test_psf_averaging_properties.py | 16 +- tests/module/test_psf_grammar_properties.py | 128 ++++++++- 7 files changed, 609 insertions(+), 288 deletions(-) create mode 100644 tests/module/test_galsim_grammar_properties.py diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index a469aac7..d403b034 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -15,13 +15,6 @@ NGMIX_MCAL_FLAGS NGMIX_G1_PSF_ORIG_NOSHEAR NGMIX_G2_PSF_ORIG_NOSHEAR -# spread class -#SPREAD_CLASS - -# spread model flag and error -#SPREAD_MODEL -#SPREADERR_MODEL - # Number of epochs (exposures) N_EPOCH NGMIX_N_EPOCH @@ -30,45 +23,45 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_G1_GAL_1M -NGMIX_G2_GAL_1M -NGMIX_G1_GAL_1P -NGMIX_G2_GAL_1P -NGMIX_G1_GAL_2M -NGMIX_G2_GAL_2M -NGMIX_G1_GAL_2P -NGMIX_G2_GAL_2P -NGMIX_G1_GAL_NOSHEAR -NGMIX_G2_GAL_NOSHEAR -#NGMIX_G1_ERR_GAL_1M -#NGMIX_G2_ERR_GAL_1M -#NGMIX_G1_ERR_GAL_1P -#NGMIX_G2_ERR_GAL_1P -#NGMIX_G1_ERR_GAL_2M -#NGMIX_G2_ERR_GAL_2M -#NGMIX_G1_ERR_GAL_2P -#NGMIX_G2_ERR_GAL_2P -NGMIX_G1_ERR_GAL_NOSHEAR -NGMIX_G2_ERR_GAL_NOSHEAR +NGMIX_G1_1M +NGMIX_G2_1M +NGMIX_G1_1P +NGMIX_G2_1P +NGMIX_G1_2M +NGMIX_G2_2M +NGMIX_G1_2P +NGMIX_G2_2P +NGMIX_G1_NOSHEAR +NGMIX_G2_NOSHEAR +#NGMIX_G1_ERR_1M +#NGMIX_G2_ERR_1M +#NGMIX_G1_ERR_1P +#NGMIX_G2_ERR_1P +#NGMIX_G1_ERR_2M +#NGMIX_G2_ERR_2M +#NGMIX_G1_ERR_2P +#NGMIX_G2_ERR_2P +NGMIX_G1_ERR_NOSHEAR +NGMIX_G2_ERR_NOSHEAR # flags -NGMIX_FLAGS_GAL_1M -NGMIX_FLAGS_GAL_1P -NGMIX_FLAGS_GAL_2M -NGMIX_FLAGS_GAL_2P -NGMIX_FLAGS_GAL_NOSHEAR +NGMIX_FLAGS_1M +NGMIX_FLAGS_1P +NGMIX_FLAGS_2M +NGMIX_FLAGS_2P +NGMIX_FLAGS_NOSHEAR # size and error -NGMIX_T_GAL_1M -NGMIX_T_GAL_1P -NGMIX_T_GAL_2M -NGMIX_T_GAL_2P -NGMIX_T_GAL_NOSHEAR -NGMIX_T_ERR_GAL_1M -NGMIX_T_ERR_GAL_1P -NGMIX_T_ERR_GAL_2M -NGMIX_T_ERR_GAL_2P -NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_1M +NGMIX_T_1P +NGMIX_T_2M +NGMIX_T_2P +NGMIX_T_NOSHEAR +NGMIX_T_ERR_1M +NGMIX_T_ERR_1P +NGMIX_T_ERR_2M +NGMIX_T_ERR_2P +NGMIX_T_ERR_NOSHEAR NGMIX_T_PSF_RECONV_1M NGMIX_T_PSF_RECONV_1P NGMIX_T_PSF_RECONV_2M @@ -76,16 +69,16 @@ NGMIX_T_PSF_RECONV_2P NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_GAL_1M -NGMIX_FLUX_GAL_1P -NGMIX_FLUX_GAL_2M -NGMIX_FLUX_GAL_2P -NGMIX_FLUX_GAL_NOSHEAR -NGMIX_FLUX_ERR_GAL_1M -NGMIX_FLUX_ERR_GAL_1P -NGMIX_FLUX_ERR_GAL_2M -NGMIX_FLUX_ERR_GAL_2P -NGMIX_FLUX_ERR_GAL_NOSHEAR +NGMIX_FLUX_1M +NGMIX_FLUX_1P +NGMIX_FLUX_2M +NGMIX_FLUX_2P +NGMIX_FLUX_NOSHEAR +NGMIX_FLUX_ERR_1M +NGMIX_FLUX_ERR_1P +NGMIX_FLUX_ERR_2M +NGMIX_FLUX_ERR_2P +NGMIX_FLUX_ERR_NOSHEAR # magnitudes MAG_AUTO diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index e1869df5..f387be00 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -15,13 +15,6 @@ NGMIX_MCAL_FLAGS NGMIX_G1_PSF_ORIG_NOSHEAR NGMIX_G2_PSF_ORIG_NOSHEAR -# spread class -SPREAD_CLASS - -# spread model flag and error -SPREAD_MODEL -SPREADERR_MODEL - # Number of epochs (exposures) N_EPOCH NGMIX_N_EPOCH @@ -30,45 +23,45 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_G1_GAL_1M -NGMIX_G2_GAL_1M -NGMIX_G1_GAL_1P -NGMIX_G2_GAL_1P -NGMIX_G1_GAL_2M -NGMIX_G2_GAL_2M -NGMIX_G1_GAL_2P -NGMIX_G2_GAL_2P -NGMIX_G1_GAL_NOSHEAR -NGMIX_G2_GAL_NOSHEAR -#NGMIX_G1_ERR_GAL_1M -#NGMIX_G2_ERR_GAL_1M -#NGMIX_G1_ERR_GAL_1P -#NGMIX_G2_ERR_GAL_1P -#NGMIX_G1_ERR_GAL_2M -#NGMIX_G2_ERR_GAL_2M -#NGMIX_G1_ERR_GAL_2P -#NGMIX_G2_ERR_GAL_2P -NGMIX_G1_ERR_GAL_NOSHEAR -NGMIX_G2_ERR_GAL_NOSHEAR +NGMIX_G1_1M +NGMIX_G2_1M +NGMIX_G1_1P +NGMIX_G2_1P +NGMIX_G1_2M +NGMIX_G2_2M +NGMIX_G1_2P +NGMIX_G2_2P +NGMIX_G1_NOSHEAR +NGMIX_G2_NOSHEAR +#NGMIX_G1_ERR_1M +#NGMIX_G2_ERR_1M +#NGMIX_G1_ERR_1P +#NGMIX_G2_ERR_1P +#NGMIX_G1_ERR_2M +#NGMIX_G2_ERR_2M +#NGMIX_G1_ERR_2P +#NGMIX_G2_ERR_2P +NGMIX_G1_ERR_NOSHEAR +NGMIX_G2_ERR_NOSHEAR # flags -NGMIX_FLAGS_GAL_1M -NGMIX_FLAGS_GAL_1P -NGMIX_FLAGS_GAL_2M -NGMIX_FLAGS_GAL_2P -NGMIX_FLAGS_GAL_NOSHEAR +NGMIX_FLAGS_1M +NGMIX_FLAGS_1P +NGMIX_FLAGS_2M +NGMIX_FLAGS_2P +NGMIX_FLAGS_NOSHEAR # size and error -NGMIX_T_GAL_1M -NGMIX_T_GAL_1P -NGMIX_T_GAL_2M -NGMIX_T_GAL_2P -NGMIX_T_GAL_NOSHEAR -NGMIX_T_ERR_GAL_1M -NGMIX_T_ERR_GAL_1P -NGMIX_T_ERR_GAL_2M -NGMIX_T_ERR_GAL_2P -NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_1M +NGMIX_T_1P +NGMIX_T_2M +NGMIX_T_2P +NGMIX_T_NOSHEAR +NGMIX_T_ERR_1M +NGMIX_T_ERR_1P +NGMIX_T_ERR_2M +NGMIX_T_ERR_2P +NGMIX_T_ERR_NOSHEAR NGMIX_T_PSF_RECONV_1M NGMIX_T_PSF_RECONV_1P NGMIX_T_PSF_RECONV_2M @@ -76,16 +69,16 @@ NGMIX_T_PSF_RECONV_2P NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_GAL_1M -NGMIX_FLUX_GAL_1P -NGMIX_FLUX_GAL_2M -NGMIX_FLUX_GAL_2P -NGMIX_FLUX_GAL_NOSHEAR -NGMIX_FLUX_ERR_GAL_1M -NGMIX_FLUX_ERR_GAL_1P -NGMIX_FLUX_ERR_GAL_2M -NGMIX_FLUX_ERR_GAL_2P -NGMIX_FLUX_ERR_GAL_NOSHEAR +NGMIX_FLUX_1M +NGMIX_FLUX_1P +NGMIX_FLUX_2M +NGMIX_FLUX_2P +NGMIX_FLUX_NOSHEAR +NGMIX_FLUX_ERR_1M +NGMIX_FLUX_ERR_1P +NGMIX_FLUX_ERR_2M +NGMIX_FLUX_ERR_2P +NGMIX_FLUX_ERR_NOSHEAR # Flag for multiple objects due to overlapping # tiles diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 1c7c5a6b..e28ac643 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -139,77 +139,6 @@ def save_sextractor_data(final_cat_file, sexcat_path, remove_vignet=True): return cat_size -def save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif=True, - star_thresh=0.003, - gal_thresh=0.01, - n_obj=-1, -): - r"""Save Spread-Model Data. - - Save the spread-model data into the final catalogue. - - Parameters - ---------- - final_cat_file : file_io.FITSCatalogue - Final catalogue - sexcat_sm_path : str - Path to spread-model catalogue to save. If ``None``, spread_model is - set to 99 - do_classif : bool - If ``True`` objects will be classified into stars, galaxies, and other, - using the classifier - :math:`{\rm class} = {\rm sm} + 2 * {\rm sm}_{\rm err}` - star_thresh : float - Threshold for star selection; object is classified as star if - :math:`|{\rm class}| <` ``star_thresh`` - gal_thresh : float - Threshold for galaxy selection; object is classified as galaxy if - :math:`{\rm class} >` ``gal_thresh`` - nobj : int, optional - Number of objects, only used if sexcat_sm_path is ``-1`` - - Returns - ------- - int - Number of objects saved - """ - final_cat_file.open() - - if sexcat_sm_path is not None: - sexcat_sm_file = file_io.FITSCatalogue( - sexcat_sm_path, - SEx_catalogue=True, - ) - sexcat_sm_file.open() - - sm = np.copy(sexcat_sm_file.get_data()["SPREAD_MODEL"]) - sm_err = np.copy(sexcat_sm_file.get_data()["SPREADERR_MODEL"]) - - sexcat_sm_file.close() - - else: - sm = np.ones(n_obj) * 99 - sm_err = np.ones(n_obj) * 99 - - final_cat_file.add_col("SPREAD_MODEL", sm) - final_cat_file.add_col("SPREADERR_MODEL", sm_err) - - if do_classif: - obj_flag = np.ones_like(sm, dtype="int16") * 2 - classif = sm + 2.0 * sm_err - obj_flag[np.where(np.abs(classif) < star_thresh)] = 0 - obj_flag[np.where(classif > gal_thresh)] = 1 - - final_cat_file.add_col("SPREAD_CLASS", obj_flag) - - final_cat_file.close() - - return n_obj - - class SaveCatalogue: """Save Catalogue. @@ -329,12 +258,15 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. - Column grammar: ``NGMIX[m]_[_ERR]__``, plus - three OBJECT/SHEAR-less per-object metadata columns + Column grammar: ``NGMIX[m]_[_ERR][_]_``, + plus three OBJECT/SHEAR-less per-object metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, - ``NGMIX_MCAL_TYPES_FAIL``). ``OBJECT`` is one of ``GAL`` (galaxy), - ``PSF_ORIG`` (the original image PSF, fit by - :func:`ngmix.average_original_psf`), or ``PSF_RECONV`` (the metacal + ``NGMIX_MCAL_TYPES_FAIL``). The galaxy is the implicit default object + and carries NO ``OBJECT`` token (``NGMIX_G1_NOSHEAR``, dropping the + ``GAL`` segment carried by the pre-#761 names). The explicit PSF + objects are ``PSF_ORIG`` + (the original image PSF, fit by + :func:`ngmix.average_original_psf`) and ``PSF_RECONV`` (the metacal reconvolution kernel, fit by :func:`ngmix.average_multiepoch_psf`); see those functions for what each PSF family IS. ``PSF_ORIG`` and ``PSF_RECONV`` are independent fits of different PSFs, no longer the @@ -387,32 +319,32 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" - # Galaxy (GAL), original image PSF (PSF_ORIG) and metacal + # Galaxy (no object token), original image PSF (PSF_ORIG) and metacal # reconvolution kernel (PSF_RECONV); see ngmix.average_original_psf / # average_multiepoch_psf for what each PSF family is. G1/G2 are scalar # reduced-shear components, not a 2-vector. Sentinels: # sizes/fluxes/mags/flags 0, *_ERR fluxes/mags -1, ellipticities -10, # *_ERR sizes 1e30. for key_str in ( - f"{prefix}_T_GAL_", - f"{prefix}_SNR_GAL_", - f"{prefix}_FLUX_GAL_", - f"{prefix}_MAG_GAL_", - f"{prefix}_FLAGS_GAL_", + f"{prefix}_T_", + f"{prefix}_SNR_", + f"{prefix}_FLUX_", + f"{prefix}_MAG_", + f"{prefix}_FLAGS_", f"{prefix}_T_PSF_ORIG_", f"{prefix}_T_PSF_RECONV_", ): self._update_dict(key_str, np.zeros(n_obj)) for key_str in ( - f"{prefix}_FLUX_ERR_GAL_", - f"{prefix}_MAG_ERR_GAL_", + f"{prefix}_FLUX_ERR_", + f"{prefix}_MAG_ERR_", ): self._update_dict(key_str, np.ones(n_obj) * -1) for key_str in ( - f"{prefix}_G1_GAL_", - f"{prefix}_G2_GAL_", - f"{prefix}_G1_ERR_GAL_", - f"{prefix}_G2_ERR_GAL_", + f"{prefix}_G1_", + f"{prefix}_G2_", + f"{prefix}_G1_ERR_", + f"{prefix}_G2_ERR_", f"{prefix}_G1_PSF_ORIG_", f"{prefix}_G2_PSF_ORIG_", f"{prefix}_G1_ERR_PSF_ORIG_", @@ -424,7 +356,7 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ): self._update_dict(key_str, np.ones(n_obj) * -10.0) for key_str in ( - f"{prefix}_T_ERR_GAL_", + f"{prefix}_T_ERR_", f"{prefix}_T_ERR_PSF_ORIG_", f"{prefix}_T_ERR_PSF_RECONV_", ): @@ -441,46 +373,46 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): # Galaxy shape, size and photometry. self._add2dict( - f"{prefix}_G1_GAL_{key}", ncf_data["g1"][ind[0]], idx + f"{prefix}_G1_{key}", ncf_data["g1"][ind[0]], idx ) self._add2dict( - f"{prefix}_G2_GAL_{key}", ncf_data["g2"][ind[0]], idx + f"{prefix}_G2_{key}", ncf_data["g2"][ind[0]], idx ) self._add2dict( - f"{prefix}_G1_ERR_GAL_{key}", + f"{prefix}_G1_ERR_{key}", ncf_data["g1_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_G2_ERR_GAL_{key}", + f"{prefix}_G2_ERR_{key}", ncf_data["g2_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_T_GAL_{key}", ncf_data["T"][ind[0]], idx + f"{prefix}_T_{key}", ncf_data["T"][ind[0]], idx ) self._add2dict( - f"{prefix}_T_ERR_GAL_{key}", + f"{prefix}_T_ERR_{key}", ncf_data["T_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_SNR_GAL_{key}", ncf_data["s2n"][ind[0]], idx + f"{prefix}_SNR_{key}", ncf_data["s2n"][ind[0]], idx ) self._add2dict( - f"{prefix}_FLUX_GAL_{key}", + f"{prefix}_FLUX_{key}", ncf_data["flux"][ind[0]], idx ) self._add2dict( - f"{prefix}_FLUX_ERR_GAL_{key}", + f"{prefix}_FLUX_ERR_{key}", ncf_data["flux_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_MAG_GAL_{key}", ncf_data["mag"][ind[0]], idx + f"{prefix}_MAG_{key}", ncf_data["mag"][ind[0]], idx ) self._add2dict( - f"{prefix}_MAG_ERR_GAL_{key}", + f"{prefix}_MAG_ERR_{key}", ncf_data["mag_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_FLAGS_GAL_{key}", + f"{prefix}_FLAGS_{key}", ncf_data["flags"][ind[0]], idx ) @@ -559,21 +491,25 @@ def _save_galsim_shapes(self, galsim_cat_path): galsim_id = galsim_cat_file.get_data()["id"] for key_str in ( - "GALSIM_GAL_SIGMA_", - "GALSIM_PSF_SIGMA_", + "GALSIM_T_", + "GALSIM_T_PSF_", "GALSIM_FLUX_", "GALSIM_MAG_", ): self._update_dict(key_str, np.zeros(len(self._obj_id))) for key_str in ("GALSIM_FLUX_ERR_", "GALSIM_MAG_ERR_", "GALSIM_RES_"): - self._update_dict(key_str, np.ones(n_obj) * -1) + self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) for key_str in ( - "GALSIM_GAL_ELL_", - "GALSIM_GAL_ELL_ERR_", - "GALSIM_GAL_ELL_UNCORR_", - "GALSIM_PSF_ELL_", + "GALSIM_G1_", + "GALSIM_G2_", + "GALSIM_G1_ERR_", + "GALSIM_G2_ERR_", + "GALSIM_G1_UNCORR_", + "GALSIM_G2_UNCORR_", + "GALSIM_G1_PSF_", + "GALSIM_G2_PSF_", ): - self._update_dict(key_str, np.ones((len(self._obj_id), 2)) * -10.0) + self._update_dict(key_str, np.ones(len(self._obj_id)) * -10.0) self._update_dict( "GALSIM_FLAGS_", np.ones(len(self._obj_id), dtype="int16"), @@ -589,50 +525,66 @@ def _save_galsim_shapes(self, galsim_cat_path): if key == "ORIGINAL_PSF": - uncorr_g = ( - gcf_data["gal_uncorr_g1"][ind[0]], - gcf_data["gal_uncorr_g2"][ind[0]], + # PSF columns sourced from the galaxy uncorr fields + # for this special extension (asymmetry preserved). + self._add2dict( + f"GALSIM_G1_PSF_{key}", + gcf_data["gal_uncorr_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_PSF_{key}", + gcf_data["gal_uncorr_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_T_PSF_{key}", + cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), + idx ) - psf_sig = gcf_data["gal_sigma"][ind[0]] - self._add2dict(f"GALSIM_PSF_ELL_{key}", uncorr_g, idx) - self._add2dict(f"GALSIM_PSF_SIGMA_{key}", psf_sig, idx) else: - g = ( - gcf_data["gal_g1"][ind[0]], - gcf_data["gal_g2"][ind[0]], + self._add2dict( + f"GALSIM_G1_{key}", gcf_data["gal_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_{key}", gcf_data["gal_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G1_ERR_{key}", + gcf_data["gal_g1_err"][ind[0]], idx ) - g_err = ( - gcf_data["gal_g1_err"][ind[0]], - gcf_data["gal_g2_err"][ind[0]], + self._add2dict( + f"GALSIM_G2_ERR_{key}", + gcf_data["gal_g2_err"][ind[0]], idx ) - self._add2dict(f"GALSIM_GAL_ELL_{key}", g, idx) - self._add2dict(f"GALSIM_GAL_ELL_ERR_{key}", g_err, idx) - uncorr_g = ( - gcf_data["gal_uncorr_g1"][ind[0]], - gcf_data["gal_uncorr_g2"][ind[0]], + self._add2dict( + f"GALSIM_G1_UNCORR_{key}", + gcf_data["gal_uncorr_g1"][ind[0]], idx ) self._add2dict( - f"GALSIM_GAL_ELL_UNCORR_{key}", - uncorr_g, - idx, + f"GALSIM_G2_UNCORR_{key}", + gcf_data["gal_uncorr_g2"][ind[0]], idx ) - sigma = gcf_data["gal_sigma"][ind[0]] - self._add2dict(f"GALSIM_GAL_SIGMA_{key}", sigma, idx) + self._add2dict( + f"GALSIM_T_{key}", + cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), + idx + ) - psf_g = ( - gcf_data["psf_g1"][ind[0]], - gcf_data["psf_g2"][ind[0]], + self._add2dict( + f"GALSIM_G1_PSF_{key}", + gcf_data["psf_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_PSF_{key}", + gcf_data["psf_g2"][ind[0]], idx ) - psf_sigma = gcf_data["psf_sigma"][ind[0]] - self._add2dict(f"GALSIM_PSF_ELL_{key}", psf_g, idx) self._add2dict( - f"GALSIM_PSF_SIGMA_{key}", - psf_sigma, - idx, + f"GALSIM_T_PSF_{key}", + cs_size.sigma_to_T(gcf_data["psf_sigma"][ind[0]]), + idx ) flux = gcf_data["gal_flux"][ind[0]] @@ -669,20 +621,29 @@ def _save_psf_data(self, galaxy_psf_path): max_epoch = np.max(self._final_cat_file.get_data()["N_EPOCH"]) + 1 self._output_dict = { - f"PSF_ELL_{idx + 1}": np.ones((len(self._obj_id), 2)) * -10.0 + f"HSM_E1_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) } self._output_dict = { **self._output_dict, **{ - f"PSF_FWHM_{idx + 1}": np.zeros(len(self._obj_id)) + f"HSM_E2_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) }, } self._output_dict = { **self._output_dict, **{ - f"PSF_FLAG_{idx + 1}": np.ones(len(self._obj_id), dtype="int16") + f"HSM_T_PSF_{idx + 1}": np.zeros(len(self._obj_id)) + for idx in range(max_epoch) + }, + } + self._output_dict = { + **self._output_dict, + **{ + f"HSM_FLAG_PSF_{idx + 1}": np.ones( + len(self._obj_id), dtype="int16" + ) for idx in range(max_epoch) }, } @@ -696,21 +657,28 @@ def _save_psf_data(self, galaxy_psf_path): gpc_data = galaxy_psf_cat[str(id_tmp)][key] - if gpc_data["SHAPES"]["FLAG_PSF_HSM"] != 0: + if gpc_data["SHAPES"]["HSM_FLAG_PSF"] != 0: continue - e_psf = ( - gpc_data["SHAPES"]["E1_PSF_HSM"], - gpc_data["SHAPES"]["E2_PSF_HSM"], + self._add2dict( + f"HSM_E1_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_E1_PSF"], idx + ) + self._add2dict( + f"HSM_E2_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_E2_PSF"], idx ) - self._add2dict(f"PSF_ELL_{epoch + 1}", e_psf, idx) - psf_fwhm = cs_size.sigma_to_fwhm( - gpc_data["SHAPES"]["SIGMA_PSF_HSM"] + # HSM_T_PSF already holds T (sigma_to_T applied at the + # producer's _interpolate_me); read straight through. + self._add2dict( + f"HSM_T_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_T_PSF"], idx ) - self._add2dict(f"PSF_FWHM_{epoch + 1}", psf_fwhm, idx) - flag_psf = gpc_data["SHAPES"]["FLAG_PSF_HSM"] - self._add2dict(f"PSF_FLAG_{epoch + 1}", flag_psf, idx) + self._add2dict( + f"HSM_FLAG_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_FLAG_PSF"], idx + ) galaxy_psf_cat.close() diff --git a/tests/module/test_galsim_grammar_properties.py b/tests/module/test_galsim_grammar_properties.py new file mode 100644 index 00000000..28346eaf --- /dev/null +++ b/tests/module/test_galsim_grammar_properties.py @@ -0,0 +1,251 @@ +"""PROPERTY-BASED TESTS FOR THE GALSIM GALAXY/PSF COLUMN GRAMMAR. + +Drives ``SaveCatalogue._save_galsim_shapes`` (the make_cat writer) to pin the +renamed galsim grammar (shapepipe#761): the packed length-2 +``GALSIM_GAL_ELL_*`` / ``GALSIM_PSF_ELL_*`` columns are split into scalar +``GALSIM_G1_*`` / ``GALSIM_G2_*`` (g-type — galsim is correctly g-type, no +e/g value fix needed here), the galaxy carries no ``GAL`` token (implicit +default object, same rule as ngmix), and the raw ``GALSIM_GAL_SIGMA_*`` / +``GALSIM_PSF_SIGMA_*`` columns are replaced by the single stored size +``GALSIM_T_*`` (``T = 2 sigma^2``, via ``cs_util.size.sigma_to_T``). + +The ``ORIGINAL_PSF`` extension is a deliberate asymmetry, preserved rather +than normalized away: its PSF columns are sourced from the *galaxy's* +uncorrected fields (``gal_uncorr_g1/g2``, ``gal_sigma``), not the +per-extension ``psf_g1/g2``/``psf_sigma`` fields every other extension uses. +""" + +import re +import tempfile +from pathlib import Path + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from cs_util import size as cs_size +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue + +# --------------------------------------------------------------------------- +# Harness +# --------------------------------------------------------------------------- + +# Source columns _save_galsim_shapes reads per extension. The ORIGINAL_PSF +# extension only consumes the gal_uncorr_g1/g2 + gal_sigma subset, but the +# real galsim catalogue carries the full set on every extension, so the +# fixture mirrors that rather than special-casing the row shape. +GALSIM_KEYS = [ + "id", + "gal_g1", "gal_g2", "gal_g1_err", "gal_g2_err", + "gal_uncorr_g1", "gal_uncorr_g2", "gal_sigma", + "psf_g1", "psf_g2", "psf_sigma", + "gal_flux", "gal_flux_err", "gal_mag", "gal_mag_err", + "gal_flag", "gal_resolution", +] +INT_KEYS = {"id", "gal_flag"} + +# Per-key multiplier so a single scalar ``seed`` fans out into twelve +# distinct values — a swapped source (e.g. psf_g1 <-> gal_uncorr_g1) breaks +# at least one equality below. +_MULT = { + "gal_g1": 1.0, "gal_g2": 1.3, "gal_g1_err": 1.7, "gal_g2_err": 2.1, + "gal_uncorr_g1": 2.6, "gal_uncorr_g2": 3.1, "gal_sigma": 0.4, + "psf_g1": 0.6, "psf_g2": 0.8, "psf_sigma": 0.5, + "gal_flux": 10.0, "gal_flux_err": 1.0, "gal_mag": 5.0, + "gal_mag_err": 0.2, +} + + +def _galsim_row(obj_id, seed): + """One object's per-key values for one extension, scaled by ``seed``.""" + row = {"id": obj_id, "gal_flag": 0, "gal_resolution": 0.5} + row.update({key: mult * seed for key, mult in _MULT.items()}) + return row + + +def _write_galsim_cat(path, obj_ids, seed_by_ext): + """Write a synthetic galsim FITS, one extension per ``seed_by_ext`` key. + + Extension order is preserved (dict order), matching how + ``get_ext_name()[1:]`` drives ``self._key_ends`` in production. + """ + hdus = [fits.PrimaryHDU()] + for ext, seed in seed_by_ext.items(): + rows = [_galsim_row(oid, seed) for oid in obj_ids] + cols = [ + fits.Column( + name=key, + format="K" if key in INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in GALSIM_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_galsim(galsim_path, obj_ids): + """Drive ``_save_galsim_shapes`` and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_ids) + inst._output_dict = {} + inst._save_galsim_shapes(str(galsim_path)) + return inst._output_dict + + +# --------------------------------------------------------------------------- +# Strategies +# --------------------------------------------------------------------------- + +obj_ids_strategy = st.lists( + st.integers(min_value=1, max_value=10_000), min_size=1, max_size=5, + unique=True, +) +# A finite seed bounded away from zero, so every per-key value it scales is +# itself non-zero and the family-vs-family / key-vs-key comparisons below are +# never vacuously satisfied by an accidental 0 == 0. +_seed = st.floats( + min_value=0.05, max_value=5.0, allow_nan=False, allow_infinity=False, +) + + +# --------------------------------------------------------------------------- +# (a) Scalar split + size routing, regular extension +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed=_seed) +def test_regular_extension_scalar_split_and_size(obj_ids, seed): + """A non-``ORIGINAL_PSF`` extension routes each source to its own column. + + ``GALSIM_G1/G2_`` from ``gal_g1/g2``, the ``_ERR``/``_UNCORR`` + variants from their namesake sources, ``GALSIM_T_`` from + ``sigma_to_T(gal_sigma)``, and the PSF family + (``GALSIM_G1/G2_PSF_``, ``GALSIM_T_PSF_``) from ``psf_g1/g2`` / + ``sigma_to_T(psf_sigma)`` — never from the galaxy's own fields. + """ + ext = "NOSHEAR" + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, {ext: seed}) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + n = len(obj_ids) + row = _galsim_row(obj_ids[0], seed) + npt.assert_allclose(out[f"GALSIM_G1_{ext}"], [row["gal_g1"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_{ext}"], [row["gal_g2"]] * n) + npt.assert_allclose(out[f"GALSIM_G1_ERR_{ext}"], [row["gal_g1_err"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_ERR_{ext}"], [row["gal_g2_err"]] * n) + npt.assert_allclose( + out[f"GALSIM_G1_UNCORR_{ext}"], [row["gal_uncorr_g1"]] * n + ) + npt.assert_allclose( + out[f"GALSIM_G2_UNCORR_{ext}"], [row["gal_uncorr_g2"]] * n + ) + npt.assert_allclose( + out[f"GALSIM_T_{ext}"], [cs_size.sigma_to_T(row["gal_sigma"])] * n + ) + npt.assert_allclose(out[f"GALSIM_G1_PSF_{ext}"], [row["psf_g1"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_PSF_{ext}"], [row["psf_g2"]] * n) + npt.assert_allclose( + out[f"GALSIM_T_PSF_{ext}"], [cs_size.sigma_to_T(row["psf_sigma"])] * n + ) + # The galaxy and PSF families must stay distinct (un-aliased), and the + # raw-sigma value must never leak through unconverted. + assert not np.allclose(out[f"GALSIM_T_{ext}"], row["gal_sigma"]) + assert not np.allclose(out[f"GALSIM_G1_{ext}"], out[f"GALSIM_G1_PSF_{ext}"]) + + npt.assert_allclose(out[f"GALSIM_FLUX_{ext}"], [row["gal_flux"]] * n) + npt.assert_allclose(out[f"GALSIM_FLUX_ERR_{ext}"], [row["gal_flux_err"]] * n) + npt.assert_allclose(out[f"GALSIM_MAG_{ext}"], [row["gal_mag"]] * n) + npt.assert_allclose(out[f"GALSIM_MAG_ERR_{ext}"], [row["gal_mag_err"]] * n) + npt.assert_allclose(out[f"GALSIM_RES_{ext}"], [row["gal_resolution"]] * n) + + +# --------------------------------------------------------------------------- +# (b) ORIGINAL_PSF asymmetry: sourced from the galaxy's uncorrected fields +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed_main=_seed, seed_origpsf=_seed) +def test_original_psf_extension_sourced_from_gal_uncorr( + obj_ids, seed_main, seed_origpsf +): + """``ORIGINAL_PSF`` columns trace to ``gal_uncorr_*``/``gal_sigma``, NOT + ``psf_*`` — the one preserved asymmetry in the galsim grammar. + + The two extensions are written with independent seeds, and the + ``ORIGINAL_PSF`` row's own ``psf_g1/g2``/``psf_sigma`` are deliberately + distinct from its ``gal_uncorr_*``/``gal_sigma`` (the seed multipliers + differ), so a regression that wired the regular per-extension ``psf_*`` + sourcing into ``ORIGINAL_PSF`` would be caught, not masked by equal + values. + """ + seed_by_ext = {"NOSHEAR": seed_main, "ORIGINAL_PSF": seed_origpsf} + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, seed_by_ext) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + n = len(obj_ids) + op_row = _galsim_row(obj_ids[0], seed_origpsf) + npt.assert_allclose( + out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g1"]] * n + ) + npt.assert_allclose( + out["GALSIM_G2_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g2"]] * n + ) + npt.assert_allclose( + out["GALSIM_T_PSF_ORIGINAL_PSF"], + [cs_size.sigma_to_T(op_row["gal_sigma"])] * n, + ) + # Not sourced from this extension's own psf_g1/g2/psf_sigma (those are + # scaled by the same seed but through different multipliers, so equality + # here would mean the asymmetry was lost). + assert not np.allclose( + out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["psf_g1"]] * n + ) + assert not np.allclose( + out["GALSIM_T_PSF_ORIGINAL_PSF"], + [cs_size.sigma_to_T(op_row["psf_sigma"])] * n, + ) + + +# --------------------------------------------------------------------------- +# (c) Every emitted column matches the new grammar; no legacy tokens survive +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed=_seed) +def test_emitted_columns_match_grammar_no_legacy_tokens(obj_ids, seed): + """Every ``GALSIM_*`` column matches the scalar grammar; nothing packed. + + No ``GALSIM_GAL_ELL*``/``GALSIM_PSF_ELL*`` (packed length-2 ellipticity) + and no ``GALSIM_*_SIGMA*`` (raw size) survives — both replaced by scalar + ``G1``/``G2`` components and the single stored size ``T``. The galaxy + also carries no ``GAL`` token (implicit default object, same rule as + ngmix). + """ + ext_names = ["NOSHEAR", "ORIGINAL_PSF"] + seed_by_ext = {ext: seed * (i + 1) for i, ext in enumerate(ext_names)} + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, seed_by_ext) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + assert out, "writer produced no columns" + + _component = "G1|G2|T" + _qual = "(?:_ERR|_UNCORR)?" + _ext = "|".join(re.escape(e) for e in ext_names) + grammar_re = re.compile( + rf"^GALSIM_(?:{_component}){_qual}(?:_PSF)?_(?:{_ext})$" + rf"|^GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_(?:{_ext})$" + ) + for col in out: + assert grammar_re.match(col), f"off-grammar column emitted: {col!r}" + assert "_ELL" not in col, col + assert "SIGMA" not in col, col + assert "_GAL_" not in col and not col.endswith("_GAL"), col diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py index e2960701..d5d24942 100644 --- a/tests/module/test_make_cat.py +++ b/tests/module/test_make_cat.py @@ -1,12 +1,16 @@ """UNIT TESTS FOR MODULE PACKAGE: MAKE_CAT. Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue -to lock in the renamed PSF-column grammar -``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three -OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, -``NGMIX_MCAL_TYPES_FAIL``): the original image PSF (``PSF_ORIG``) and the metacal -reconvolution kernel (``PSF_RECONV``) are independent fits of *different* PSFs, -no longer the single aliased value of the pre-fix code. +to lock in the column grammar ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_SHEAR]`` +(shapepipe#749, #761): galaxy is the implicit default object and carries no +``GAL`` token (``NGMIX_G1_NOSHEAR``, never ``NGMIX_G1_GAL_NOSHEAR``), while +the PSF families keep an explicit object token — +``NGMIX_[_ERR]__`` for ``PSF_ORIG``/``PSF_RECONV`` — +plus the three OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, +``NGMIX_N_EPOCH``, ``NGMIX_MCAL_TYPES_FAIL``). The original image PSF +(``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) are +independent fits of *different* PSFs, no longer the single aliased value of +the pre-#749 code. """ import numpy as np @@ -130,13 +134,13 @@ def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): # Every per-shear family is present under the new grammar. for shear in SHEAR_EXTS: for col in ( - f"NGMIX_G1_GAL_{shear}", f"NGMIX_G2_GAL_{shear}", - f"NGMIX_G1_ERR_GAL_{shear}", f"NGMIX_G2_ERR_GAL_{shear}", - f"NGMIX_T_GAL_{shear}", f"NGMIX_T_ERR_GAL_{shear}", - f"NGMIX_SNR_GAL_{shear}", - f"NGMIX_FLUX_GAL_{shear}", f"NGMIX_FLUX_ERR_GAL_{shear}", - f"NGMIX_MAG_GAL_{shear}", f"NGMIX_MAG_ERR_GAL_{shear}", - f"NGMIX_FLAGS_GAL_{shear}", + f"NGMIX_G1_{shear}", f"NGMIX_G2_{shear}", + f"NGMIX_G1_ERR_{shear}", f"NGMIX_G2_ERR_{shear}", + f"NGMIX_T_{shear}", f"NGMIX_T_ERR_{shear}", + f"NGMIX_SNR_{shear}", + f"NGMIX_FLUX_{shear}", f"NGMIX_FLUX_ERR_{shear}", + f"NGMIX_MAG_{shear}", f"NGMIX_MAG_ERR_{shear}", + f"NGMIX_FLAGS_{shear}", f"NGMIX_G1_PSF_ORIG_{shear}", f"NGMIX_G2_PSF_ORIG_{shear}", f"NGMIX_T_PSF_ORIG_{shear}", f"NGMIX_G1_PSF_RECONV_{shear}", f"NGMIX_G2_PSF_RECONV_{shear}", @@ -154,6 +158,8 @@ def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): assert "_Tpsf" not in col and "TPSF" not in col, col assert not col.startswith("NGMIX_ELL_"), col assert "NGMIXm" not in col, col # moments branch off by default + # Galaxy is the implicit default object — no GAL token (shapepipe#761). + assert "_GAL_" not in col and not col.endswith("_GAL"), col def test_save_ngmix_data_psf_families_trace_distinct_sources(tmp_path): @@ -228,11 +234,11 @@ def test_save_ngmix_data_fills_sentinels_for_absent_objects(tmp_path): npt.assert_allclose(t_orig[present], row["T_psf_orig"]) npt.assert_allclose(t_orig[absent], [0.0, 0.0]) - t_err_gal = np.asarray(out["NGMIX_T_ERR_GAL_NOSHEAR"]) + t_err_gal = np.asarray(out["NGMIX_T_ERR_NOSHEAR"]) npt.assert_allclose(t_err_gal[present], row["T_err"]) npt.assert_allclose(t_err_gal[absent], [1e30, 1e30]) - flux_err = np.asarray(out["NGMIX_FLUX_ERR_GAL_NOSHEAR"]) + flux_err = np.asarray(out["NGMIX_FLUX_ERR_NOSHEAR"]) npt.assert_allclose(flux_err[present], row["flux_err"]) npt.assert_allclose(flux_err[absent], [-1.0, -1.0]) diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py index 2314b6ca..3afaa7bb 100644 --- a/tests/module/test_psf_averaging_properties.py +++ b/tests/module/test_psf_averaging_properties.py @@ -233,18 +233,18 @@ def test_all_epochs_failed_raises_zero_division(flagged_specs): # rows are overwritten (mirrors make_cat._save_ngmix_data). The property: a row # whose obj_id never appears among the ngmix ids keeps exactly these. _SENTINELS = { - "NGMIX_T_GAL_NOSHEAR": 0.0, - "NGMIX_SNR_GAL_NOSHEAR": 0.0, - "NGMIX_FLAGS_GAL_NOSHEAR": 0.0, + "NGMIX_T_NOSHEAR": 0.0, + "NGMIX_SNR_NOSHEAR": 0.0, + "NGMIX_FLAGS_NOSHEAR": 0.0, "NGMIX_T_PSF_ORIG_NOSHEAR": 0.0, "NGMIX_T_PSF_RECONV_NOSHEAR": 0.0, - "NGMIX_FLUX_ERR_GAL_NOSHEAR": -1.0, - "NGMIX_MAG_ERR_GAL_NOSHEAR": -1.0, - "NGMIX_G1_GAL_NOSHEAR": -10.0, - "NGMIX_G2_GAL_NOSHEAR": -10.0, + "NGMIX_FLUX_ERR_NOSHEAR": -1.0, + "NGMIX_MAG_ERR_NOSHEAR": -1.0, + "NGMIX_G1_NOSHEAR": -10.0, + "NGMIX_G2_NOSHEAR": -10.0, "NGMIX_G1_PSF_ORIG_NOSHEAR": -10.0, "NGMIX_G1_PSF_RECONV_NOSHEAR": -10.0, - "NGMIX_T_ERR_GAL_NOSHEAR": 1e30, + "NGMIX_T_ERR_NOSHEAR": 1e30, "NGMIX_T_ERR_PSF_ORIG_NOSHEAR": 1e30, "NGMIX_T_ERR_PSF_RECONV_NOSHEAR": 1e30, "NGMIX_N_EPOCH": 0.0, diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index c7c361dc..09eaf181 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -2,7 +2,9 @@ Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under hypothesis to pin three invariants of the renamed grammar -``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three +``NGMIX[m]_[_ERR][_]_`` (shapepipe#749, #761): the +galaxy is the implicit default object and carries NO ``OBJECT`` token +(``NGMIX_G1_NOSHEAR``, never ``NGMIX_G1_GAL_NOSHEAR``), plus the three OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, ``NGMIX_MCAL_TYPES_FAIL``), where the ORIGINAL image PSF (``PSF_ORIG``) and the metacal RECONVOLUTION kernel (``PSF_RECONV``) are independent fits of @@ -12,14 +14,25 @@ orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column traces to its orig source and every ``*_PSF_RECONV_*`` to its reconv source, and the two families are never crossed; -(b) every column name the writer emits matches the grammar regex; +(b) every column name the writer emits matches the grammar regex (the OBJECT + group is optional, so a regressed ``GAL`` token is rejected, not + absorbed); (c) every ``NGMIX_*`` token the shipped param file (``example/cfis/final_cat.param``) names is a column the writer can - produce — writer/param-file consistency. - -Setup mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with -the five shear-type extensions carrying the exact ``compile_results`` key set, -read back through ``_save_ngmix_data``. + produce — writer/param-file consistency; +(d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ + OBJECT[_metacaltype]`` — holds across all three estimator families: ngmix + and galsim split ellipticity into ``G1``/``G2`` (g-type), HSM into + ``E1``/``E2`` (e-type); every family stores exactly one size, ``T``; HSM + keeps the singular ``FLAG`` token, ngmix/galsim keep plural ``FLAGS``. + This part is a static example-based check (not writer-driven), so it + covers galsim/HSM without depending on those producer modules — see + ``test_galsim_grammar_properties.py`` and ``test_psfex_interp.py`` for + the writer-driven coverage of those families. + +Setup for (a)-(c) mirrors ``tests/module/test_make_cat.py``: a synthetic +ngmix FITS with the five shear-type extensions carrying the exact +``compile_results`` key set, read back through ``_save_ngmix_data``. """ import re @@ -151,11 +164,18 @@ def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): # NGMIX[m]_[_ERR]__, plus the three per-object # metadata columns that carry neither OBJECT nor SHEAR. Anchored so a stray # legacy token (_PSFo, _Tpsf, NGMIX_ELL_*) fails to match. +# Galaxy is the implicit default object — no token (shapepipe#761), so the +# OBJECT group is OPTIONAL: ``NGMIX_G1_NOSHEAR`` (galaxy, no object token) +# matches alongside ``NGMIX_G1_PSF_ORIG_NOSHEAR`` (explicit PSF object). This +# also makes the regex auto-reject any future ``GAL``-token regression: a +# stray ``_GAL_`` between COMPONENT and SHEAR cannot be absorbed by the +# optional group (``GAL`` is not one of its alternatives) and there is no +# other slot for it to land in. _COMPONENT = "G1|G2|T|SNR|FLUX|MAG|FLAGS" -_OBJECT = "GAL|PSF_ORIG|PSF_RECONV" +_OBJECT = "PSF_ORIG|PSF_RECONV" _SHEAR = "NOSHEAR|1P|1M|2P|2M" GRAMMAR_RE = re.compile( - rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?_(?:{_OBJECT})_(?:{_SHEAR})$" + rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?(?:_(?:{_OBJECT}))?_(?:{_SHEAR})$" rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" ) @@ -317,6 +337,10 @@ def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): assert out, "writer produced no columns" for col in out: assert GRAMMAR_RE.match(col), f"off-grammar column emitted: {col!r}" + # Galaxy is the implicit default object — a regressed GAL token would + # match neither the optional OBJECT group nor any other slot, but + # assert it directly so the failure message names the real defect. + assert "_GAL_" not in col and not col.endswith("_GAL"), col # --------------------------------------------------------------------------- @@ -363,3 +387,89 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): # Non-vacuous: the writer DOES cover real grammar tokens from the param # file (so the assertion above is exercised against a non-empty set). assert (param_tokens - NON_WRITER_PARAM_TOKENS) & produced + + +# --------------------------------------------------------------------------- +# (d) The FULL frozen grammar, across all three estimator families +# --------------------------------------------------------------------------- +# +# ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_metacaltype]`` (shapepipe#761). This +# section encodes the grammar itself as a static property — example column +# names, not a driven writer — so it covers ngmix, galsim, and HSM together +# without depending on the galsim/HSM producer modules (covered by their own +# writer-driving tests: ``test_galsim_grammar_properties.py``, +# ``test_psfex_interp.py``). What it pins: +# +# - galaxy is the implicit default object, no token, for BOTH g-type writers +# (ngmix, galsim); +# - ellipticity is split into named scalar components, never a packed +# 2-vector: ``G1``/``G2`` for g-type (ngmix, galsim), ``E1``/``E2`` for +# e-type (HSM); +# - exactly one stored size, ``T`` (no ``SIGMA``/``FWHM``/``r50``); +# - HSM keeps the singular ``FLAG`` token; ngmix/galsim keep plural ``FLAGS``. + +FROZEN_GRAMMAR_RE = re.compile( + r"^(?:" + # ngmix: galaxy (no object token) or explicit PSF_ORIG/PSF_RECONV. + r"NGMIXm?_(?:G1|G2|T|SNR|FLUX|MAG|FLAGS)(?:_ERR)?" + r"(?:_(?:PSF_ORIG|PSF_RECONV))?_(?:NOSHEAR|1P|1M|2P|2M)" + r"|NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)" + # galsim: galaxy (no object token) or explicit PSF object; is a + # data-driven FITS extension name, so left as a generic uppercase token. + r"|GALSIM_(?:G1|G2|T)(?:_ERR|_UNCORR)?(?:_PSF)?_[A-Z0-9_]+" + r"|GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_[A-Z0-9_]+" + # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch + # sink in make_cat._save_psf_data appends a bare epoch index. + r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" + r"|HSM_FLAG_(?:PSF|STAR)(?:_\d+)?" + r")$" +) + +# Representative columns the new grammar PRODUCES — one or more per family, +# chosen to exercise every rule above (GAL-drop, G1/G2 vs E1/E2, single T, +# FLAG vs FLAGS). +_GRAMMAR_VALID_EXAMPLES = [ + "NGMIX_G1_NOSHEAR", # galaxy, no object token + "NGMIX_T_ERR_PSF_ORIG_1M", + "NGMIX_FLAGS_2P", + "NGMIXm_G1_PSF_RECONV_NOSHEAR", + "NGMIX_MCAL_FLAGS", + "GALSIM_G1_NOSHEAR", # galaxy, no object token + "GALSIM_G2_UNCORR_NOSHEAR", + "GALSIM_T_PSF_ORIGINAL_PSF", + "GALSIM_FLAGS_NOSHEAR", + "HSM_E1_PSF", + "HSM_T_STAR", + "HSM_FLAG_PSF", + "HSM_E1_PSF_3", # make_cat multi-epoch sink + "HSM_T_PSF_2", + "HSM_FLAG_STAR_1", +] + +# Pre-#761 / off-grammar columns the rename REMOVES — each violates exactly +# one rule, named so a failure here points at the specific regression. +_GRAMMAR_INVALID_EXAMPLES = [ + "NGMIX_G1_GAL_NOSHEAR", # GAL token regression + "NGMIX_ELL_NOSHEAR", # packed ellipticity + "E1_PSF_HSM", # pre-rename HSM naming (not HSM_-prefixed) + "SIGMA_PSF_HSM", # raw sigma, not T + "HSM_SIGMA_PSF", # stored sigma instead of T + "HSM_FLAGS_PSF", # plural — HSM is singular FLAG + "GALSIM_GAL_ELL_NOSHEAR", # GAL token + packed ellipticity + "GALSIM_GAL_SIGMA_NOSHEAR", # GAL token + raw sigma + "GALSIM_PSF_ELL_NOSHEAR", # packed ellipticity + "GALSIM_FWHM_NOSHEAR", # FWHM, not T + "SPREAD_MODEL", # removed entirely, not renamed +] + + +@pytest.mark.parametrize("col", _GRAMMAR_VALID_EXAMPLES) +def test_frozen_grammar_accepts_new_grammar_examples(col): + """One representative column per family/rule matches the frozen grammar.""" + assert FROZEN_GRAMMAR_RE.match(col), f"should match grammar: {col!r}" + + +@pytest.mark.parametrize("col", _GRAMMAR_INVALID_EXAMPLES) +def test_frozen_grammar_rejects_legacy_examples(col): + """Each pre-#761 / removed column is rejected, not silently absorbed.""" + assert not FROZEN_GRAMMAR_RE.match(col), f"should NOT match grammar: {col!r}" From f9a8f0509e8b0e198e1c2676ef849113fa85ebe8 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:49:52 +0200 Subject: [PATCH 20/30] build: track cs_util @ develop (git ref) for the size/shape helpers shapepipe's column grammar sources size (T = 2*sigma^2) from cs_util.size, which landed in cs_util 0.2.x. Point the dependency at cs_util's develop branch via [tool.uv.sources] (a git ref, mirroring the ngmix entry) so new cs_util helpers are available without shipping a release and rebuilding the image each time, and raise the floor cs_util>=0.2.1 to make the cs_util.size requirement explicit rather than coincidental on PyPI's current latest. uv.lock pins the resolved develop commit. CI rebuilds the image from this pyproject/lock; local in-container runs use the PYTHONPATH-shadow of an on-disk cs_util checkout until the dev image is rebuilt (see CLAUDE.md). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- pyproject.toml | 7 ++++++- uv.lock | 8 ++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9097b3cc..69c73f06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "astropy>=7.0", # major 6 → 7 "astroquery", "canfar", - "cs_util>=0.1.9", + "cs_util>=0.2.1", "galsim>=2.8", "h5py", "joblib>=1.4", @@ -90,6 +90,11 @@ environments = ["sys_platform == 'linux'"] [tool.uv.sources] ngmix = { git = "https://github.com/esheldon/ngmix", tag = "v2.4.0" } +# Track cs_util's develop branch (a git ref, not a pinned release) so new +# cs_util.size / cs_util.shape helpers are available without shipping a release +# and rebuilding the image each time. shapepipe's column grammar sources size +# (T = 2*sigma^2) and ellipticity conversions from cs_util. +cs_util = { git = "https://github.com/CosmoStat/cs_util", branch = "develop" } [build-system] # Declaring a build backend makes shapepipe an installed package (not a uv diff --git a/uv.lock b/uv.lock index b483439a..f5fe9c70 100644 --- a/uv.lock +++ b/uv.lock @@ -655,7 +655,7 @@ wheels = [ [[package]] name = "cs-util" version = "0.2.1" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/CosmoStat/cs_util?branch=develop#ff8fa2b2168737353ac867cc21a8e95be082f17b" } dependencies = [ { name = "astropy", marker = "sys_platform == 'linux'" }, { name = "camb", marker = "sys_platform == 'linux'" }, @@ -670,10 +670,6 @@ dependencies = [ { name = "swig", marker = "sys_platform == 'linux'" }, { name = "vos", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e0/08/d759e6af88c8a53b9c1c863a3bc3b9b5b26cf434f7232875a4bb75bec514/cs_util-0.2.1.tar.gz", hash = "sha256:5dd9be50249c412743ed116c0cb3364d5bcf1ad907be692dc52db0059a00412d", size = 26713, upload-time = "2026-06-10T23:58:23.017Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/2f/c8d89e8013567c3cf5535b3b941b81033e3c4a81101f3953d611af817dca/cs_util-0.2.1-py3-none-any.whl", hash = "sha256:577abc9ba04da8c78bdba9f6d235deecac99c266a0937e2c44ea1df5b50224de", size = 32209, upload-time = "2026-06-10T23:58:21.766Z" }, -] [[package]] name = "cycler" @@ -3515,7 +3511,7 @@ requires-dist = [ { name = "black", marker = "extra == 'lint'" }, { name = "build", marker = "extra == 'release'" }, { name = "canfar" }, - { name = "cs-util", specifier = ">=0.1.9" }, + { name = "cs-util", git = "https://github.com/CosmoStat/cs_util?branch=develop" }, { name = "fitsio", marker = "extra == 'fitsio'" }, { name = "galsim", specifier = ">=2.8" }, { name = "h5py" }, From d5b3e9251cdafc13b01b88cefe6fb016fb9b3d93 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 08:51:21 +0200 Subject: [PATCH 21/30] Go back to g-type ellipticities --- .../modules/mccd_package/mccd_interpolation_script.py | 8 ++++---- .../modules/mccd_package/shapepipe_auxiliary_mccd.py | 4 ++-- .../modules/psfex_interp_package/psfex_interp.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index ddce0194..69d6c181 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -220,8 +220,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -278,8 +278,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 99dea6b9..9b2c94a7 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -419,8 +419,8 @@ def mccd_interpolation_pipeline( PSF_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 7428281d..db7ac3fe 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -323,8 +323,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -438,8 +438,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] From 7d527d3da3d6646053e2fab9e271aebba3fcb1ba Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 08:56:08 +0200 Subject: [PATCH 22/30] Rename the HSM columns with G1/2 instead of E1/2 --- .../mccd_package/mccd_interpolation_script.py | 12 +++++----- .../mccd_package/mccd_plot_utilities.py | 8 +++---- .../mccd_package/shapepipe_auxiliary_mccd.py | 4 ++-- .../merge_starcat_package/merge_starcat.py | 24 +++++++++---------- .../psfex_interp_package/psfex_interp.py | 16 ++++++------- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index 69d6c181..c288ac9e 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -340,12 +340,12 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), - "HSM_E1_STAR": self.star_shapes[:, 0], - "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_G1_STAR": self.star_shapes[:, 0], + "HSM_G2_STAR": self.star_shapes[:, 1], "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } @@ -537,10 +537,10 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["HSM_E1_PSF"] = final_list[j][2][ + shape_dict["HSM_G1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["HSM_E2_PSF"] = final_list[j][2][ + shape_dict["HSM_G2_PSF"] = final_list[j][2][ where_res[0] ][1] shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( diff --git a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py index 1cf33ff8..02bc55a8 100644 --- a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py +++ b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py @@ -261,15 +261,15 @@ def plot_meanshapes( # size column already holds T = 2 sigma^2 all_star_shapes = np.array( [ - starcat[hdu_no].data["HSM_E1_STAR"], - starcat[hdu_no].data["HSM_E2_STAR"], + starcat[hdu_no].data["HSM_G1_STAR"], + starcat[hdu_no].data["HSM_G2_STAR"], starcat[hdu_no].data["HSM_T_STAR"], ] ) all_psf_shapes = np.array( [ - starcat[hdu_no].data["HSM_E1_PSF"], - starcat[hdu_no].data["HSM_E2_PSF"], + starcat[hdu_no].data["HSM_G1_PSF"], + starcat[hdu_no].data["HSM_G2_PSF"], starcat[hdu_no].data["HSM_T_PSF"], ] ) diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 9b2c94a7..943433e8 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -472,8 +472,8 @@ def shapepipe_write_output( if get_shapes: data = { "VIGNET": interp_PSFs, - "HSM_E1_PSF": PSF_shapes[:, 0], - "HSM_E2_PSF": PSF_shapes[:, 1], + "HSM_G1_PSF": PSF_shapes[:, 0], + "HSM_G2_PSF": PSF_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(PSF_shapes[:, 2]), "HSM_FLAG_PSF": PSF_shapes[:, 3].astype(int), } diff --git a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py index c5d3eccd..7bc0cb76 100644 --- a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py +++ b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py @@ -490,11 +490,11 @@ def process(self): "Y": y, "RA": ra, "DEC": dec, - "HSM_E1_PSF": g1_psf, - "HSM_E2_PSF": g2_psf, + "HSM_G1_PSF": g1_psf, + "HSM_G2_PSF": g2_psf, "HSM_T_PSF": size_psf, - "HSM_E1_STAR": g1, - "HSM_E2_STAR": g2, + "HSM_G1_STAR": g1, + "HSM_G2_STAR": g2, "HSM_T_STAR": size, "HSM_FLAG_PSF": flag_psf, "HSM_FLAG_STAR": flag_star, @@ -585,11 +585,11 @@ def process(self): dec += list(data_j["DEC"]) # shapes (size column already holds T = 2 sigma^2) - g1_psf += list(data_j["HSM_E1_PSF"]) - g2_psf += list(data_j["HSM_E2_PSF"]) + g1_psf += list(data_j["HSM_G1_PSF"]) + g2_psf += list(data_j["HSM_G2_PSF"]) size_psf += list(data_j["HSM_T_PSF"]) - g1 += list(data_j["HSM_E1_STAR"]) - g2 += list(data_j["HSM_E2_STAR"]) + g1 += list(data_j["HSM_G1_STAR"]) + g2 += list(data_j["HSM_G2_STAR"]) size += list(data_j["HSM_T_STAR"]) # flags @@ -633,11 +633,11 @@ def process(self): "Y": y, "RA": ra, "DEC": dec, - "HSM_E1_PSF": g1_psf, - "HSM_E2_PSF": g2_psf, + "HSM_G1_PSF": g1_psf, + "HSM_G2_PSF": g2_psf, "HSM_T_PSF": size_psf, - "HSM_E1_STAR": g1, - "HSM_E2_STAR": g2, + "HSM_G1_STAR": g1, + "HSM_G2_STAR": g2, "HSM_T_STAR": size, "HSM_FLAG_PSF": flag_psf, "HSM_FLAG_STAR": flag_star, diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index db7ac3fe..c14aeb13 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -347,8 +347,8 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } @@ -500,12 +500,12 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), - "HSM_E1_STAR": self.star_shapes[:, 0], - "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_G1_STAR": self.star_shapes[:, 0], + "HSM_G2_STAR": self.star_shapes[:, 1], "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } @@ -755,10 +755,10 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["HSM_E1_PSF"] = final_list[j][2][ + shape_dict["HSM_G1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["HSM_E2_PSF"] = final_list[j][2][ + shape_dict["HSM_G2_PSF"] = final_list[j][2][ where_res[0] ][1] shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( From 0df4f2c313c06e061edf45354d7fa25fe8cfa008 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 09:31:00 +0200 Subject: [PATCH 23/30] (fix) Remaining E1/2 HSM column instead of G1/2 --- .../modules/mccd_package/mccd_interpolation_script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index c288ac9e..bfd9f341 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -244,8 +244,8 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } From 726207b5ff277e3f13c549abfa0407a40084dd21 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 09:33:00 +0200 Subject: [PATCH 24/30] (fix) Change the HSM label in the associated pytest --- tests/module/test_psfex_interp.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index cb644875..ba8bf550 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -163,7 +163,7 @@ def test_write_output_uses_hsm_grammar_and_routes_size_to_T( """``_write_output`` writes the HSM grammar names and stores T, not sigma. Captures the dict handed to ``save_as_fits`` and asserts the frozen - grammar (``HSM_E1_PSF`` / ``HSM_E2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, + grammar (``HSM_G1_PSF`` / ``HSM_G2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, no legacy ``*_PSF_HSM``) and that the size column is routed through ``cs_util.size.sigma_to_T`` (``T = 2 sigma^2``) at the producer. """ @@ -193,17 +193,17 @@ def _capture(self, data=None, **kwargs): assert { "VIGNET", - "HSM_E1_PSF", - "HSM_E2_PSF", + "HSM_G1_PSF", + "HSM_G2_PSF", "HSM_T_PSF", "HSM_FLAG_PSF", } <= set(data) for legacy in ("E1_PSF_HSM", "E2_PSF_HSM", "SIGMA_PSF_HSM", "FLAG_PSF_HSM"): assert legacy not in data - # e-type distortion stored straight through from _get_psfshapes ... - npt.assert_array_equal(data["HSM_E1_PSF"], interp.psf_shapes[:, 0]) - npt.assert_array_equal(data["HSM_E2_PSF"], interp.psf_shapes[:, 1]) + # g-type distortion stored straight through from _get_psfshapes ... + npt.assert_array_equal(data["HSM_G1_PSF"], interp.psf_shapes[:, 0]) + npt.assert_array_equal(data["HSM_G2_PSF"], interp.psf_shapes[:, 1]) # ... and the size column is T = 2 sigma^2 (sigma_to_T), not raw sigma. npt.assert_allclose(data["HSM_T_PSF"], 2.0 * interp.psf_shapes[:, 2] ** 2) assert not np.allclose(data["HSM_T_PSF"], interp.psf_shapes[:, 2]) From bcf4def2ba2884d16e6108c3d20067dafb432b69 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 09:45:28 +0200 Subject: [PATCH 25/30] (fix) Non-passing test because test was based on e-type ellipticity instead of g-type --- tests/module/test_psfex_interp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index ba8bf550..52c21e5a 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -144,9 +144,9 @@ def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): mag_g = np.hypot(g1, g2) assume(mag_e - mag_g > 1e-3) # ensure a non-degenerate e-vs-g distinction - # Stored values are exactly the e-type distortion components ... - npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], e1) - npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], e2) + # Stored values are exactly the g-type distortion components ... + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], g1) + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], g2) # ... whose magnitude strictly exceeds the g-type shear it would carry if # the .g/.e swap regressed. From c5b428b6015ad9d8ebd93937d66e84379960ed61 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 10:02:39 +0200 Subject: [PATCH 26/30] (fix) Fix the magnitude comparison test between e- and g-type ellipticities. --- tests/module/test_psfex_interp.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index 52c21e5a..5e133fda 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -148,13 +148,12 @@ def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], g1) npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], g2) - # ... whose magnitude strictly exceeds the g-type shear it would carry if - # the .g/.e swap regressed. + # ... whose magnitude strictly subceeds the e-type shear. mag_stored = np.hypot( psfex_interpolator.psf_shapes[0, 0], psfex_interpolator.psf_shapes[0, 1] ) - npt.assert_allclose(mag_stored, mag_e) - assert mag_stored > mag_g + 1e-4 + npt.assert_allclose(mag_stored, mag_g) + assert mag_e > mag_stored + 1e-4 def test_write_output_uses_hsm_grammar_and_routes_size_to_T( From 66b338d29a7b09d47756192b15f86da065e7e0f2 Mon Sep 17 00:00:00 2001 From: Cail McLean Daley Date: Wed, 1 Jul 2026 13:41:40 +0200 Subject: [PATCH 27/30] ngmix: give the PSF observation a finite weight map (#749) (#779) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ngmix: give the PSF observation a finite weight map (#749) make_ngmix_observation built psf_obs weightless, so ngmix defaulted to unit weight on the unit-flux PSF stamp and the galaxy GPriorBA(0.4) prior handed to the diagnostic PSF fitter swamped the likelihood. The recovered PSF shape collapsed toward the prior (zero ellipticity), corrupting the *_psf_orig diagnostic columns (#749), and the small finite-weight noise budget present in the pre-v2.0 esheldon/aguinot code was lost in the rewrite (the now-retired #774 — the same bug from the noise-budget angle). Build psf_obs with a flat weight = 1/PSF_NOISE**2 (PSF_NOISE = 1e-5, the esheldon/aguinot value Axel's reproduction used), forced float for parity with the galaxy weight — the PSF analogue of the galaxy weight built just below. Weight only; PSFEx/MCCD models already carry a little noise, so no explicit stamp-noise injection is needed (Axel's guidance on #749). The exact value is non-critical: validated flat across 1e-4..1e-6 on the twin. Calibration is untouched: metacal fits this same psf_obs with a prior-free AdmomFitter, which is insensitive to a flat weight's absolute scale, so the calibrated shear is invariant (verified bit-for-bit on the twin). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_017KQn6mXoL4XMgscZDsaTkB * tests: guard the PSF-obs weight end-to-end (#749) Three assertions in test_ngmix_weight_validation.py, on the real production path now that this sits on the column grammar (#761): - structural: make_ngmix_observation builds psf_obs with the finite 1/PSF_NOISE**2 weight, never unit weight; - recovery (#749 done-condition): through do_ngmix_metacal / average_original_psf, the *_psf_orig column recovers the injected PSF moment ellipticity to 1e-3 with the weight, and collapses below 10% without it (the contrast that proves the weight is load-bearing for the diagnostic columns); - invariance: every metacal-type shear is bit-for-bit identical between the unit-weight (pre-fix) and shipped PSF_NOISE builds (metacal's prior-free AdmomFitter is insensitive to the flat weight's scale). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_017KQn6mXoL4XMgscZDsaTkB --------- Co-authored-by: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 23 +++- tests/module/test_ngmix_weight_validation.py | 112 +++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index cc7ab87b..371264e2 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -23,6 +23,15 @@ METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') +# Noise budget for the PSF observation's flat weight map (psf_wt = +# 1/PSF_NOISE**2). Mirrors the esheldon/aguinot pattern (sigma ~ 1e-5/1e-6); +# 1e-5 is the value Axel Guinot's #749 reproduction used. The fit is driven +# by the *relative* weighting of the likelihood vs the prior, so the exact +# value is non-critical once it is finite (validated on the digital twin: the +# recovered PSF shape/size are flat across 1e-4..1e-6). See +# make_ngmix_observation. +PSF_NOISE = 1e-5 + class MetacalResult(NamedTuple): """Return of :func:`do_ngmix_metacal`: the metacal fit plus both PSFs. @@ -1091,7 +1100,19 @@ def make_ngmix_observation( col=(psf.shape[1] - 1) / 2, wcs=wcs, ) - psf_obs = Observation(psf, jacobian=psf_jacob) + # A model fit always needs a weight: without one ngmix defaults to unit + # weights on a unit-flux PSF stamp, so the galaxy GPriorBA(0.4) prior + # handed to the diagnostic PSF fitter swamps the (tiny) likelihood and the + # recovered PSF shape collapses toward the prior (#749). PSFEx/MCCD models + # already carry a little noise, so a finite flat weight matching the + # esheldon/aguinot noise budget (psf_noise ~ 1e-5) suffices to restore the + # likelihood — no explicit stamp-noise injection needed (#774). This is the + # PSF analogue of the galaxy weight built just below; force float so an + # integer-typed stamp cannot truncate the weight (matching that build). + # Metacal's own PSF fit is prior-free (AdmomFitter) and so is insensitive + # to this flat weight's scale, leaving the calibration untouched. + psf_wt = np.full_like(psf, 1.0 / PSF_NOISE ** 2, dtype=float) + psf_obs = Observation(psf, weight=psf_wt, jacobian=psf_jacob) gal_masked, weight_map, noise_img = prepare_ngmix_weights( gal, weight, flag, rng, bkg_rms=bkg_rms diff --git a/tests/module/test_ngmix_weight_validation.py b/tests/module/test_ngmix_weight_validation.py index 2dad3a8c..abc908fd 100644 --- a/tests/module/test_ngmix_weight_validation.py +++ b/tests/module/test_ngmix_weight_validation.py @@ -41,6 +41,7 @@ import pytest from shapepipe.modules.ngmix_package.ngmix import ( + PSF_NOISE, Postage_stamp, do_ngmix_metacal, get_prior, @@ -318,3 +319,114 @@ def test_metacal_ivar_beats_binary_under_noise_gradient( f"metacal ivar/binary shape-scatter ratio {ratio:.3f} >= 0.9: the" " rms-weight advantage does not survive the metacal/fixnoise path" ) + + +# --------------------------------------------------------------------------- +# PSF-observation weight (#749 / #774) +# +# make_ngmix_observation built the PSF observation weightless, so ngmix +# defaulted to unit weight on the unit-flux PSF stamp and the galaxy +# GPriorBA(0.4) prior shared with the PSF fitter swamped the likelihood. The +# recovered PSF *shape* then collapsed toward the prior (zero ellipticity). +# With the column grammar (#761) this corrupts the *_psf_orig diagnostic +# columns, fit from the original image PSF by average_original_psf. A finite +# flat weight = 1/PSF_NOISE**2 restores the likelihood. The fix touches only +# the diagnostic original-PSF fit (run with the galaxy prior); metacal fits the +# same psf_obs with a prior-free AdmomFitter, which is insensitive to a flat +# weight's scale, so the calibrated shear is unchanged. +# --------------------------------------------------------------------------- + +PSF_SHEAR = (0.05, -0.03) # injected PSF ellipticity for the recovery test + + +def _psf_orig_via_metacal(psf_noise, psf_shear=PSF_SHEAR, seed=7): + """Run the real production path and return (HSM truth, psf_orig, resdict). + + Builds a sheared-PSF stamp with the module's own ``make_data`` simulator, + pushes it through ``do_ngmix_metacal`` (which fits the original image PSF + via ``average_original_psf`` into the ``*_psf_orig`` columns), under a + given ``PSF_NOISE``. ``psf_noise=1.0`` reproduces the pre-fix unit weight. + """ + import shapepipe.modules.ngmix_package.ngmix as ngm + from shapepipe.testing.simulate import make_data + + old = ngm.PSF_NOISE + ngm.PSF_NOISE = psf_noise + try: + rng = np.random.RandomState(seed) + prior = get_prior(PIX_SCALE, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), shear=(0.0, 0.0), noise=1e-5, + n_epochs=2, img_size=51, psf_shear=psf_shear, + ) + hsm = galsim.hsm.FindAdaptiveMom(galsim.Image(psfs[0], scale=PIX_SCALE)) + g_truth = np.array([hsm.observed_shape.g1, hsm.observed_shape.g2]) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + (stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs) = ( + gals, psfs, weights, flags, jacobs, + ) + resdict, _psf_reconv, psf_orig = do_ngmix_metacal( + stamp, prior, 1.0, rng, + ) + return g_truth, psf_orig, resdict + finally: + ngm.PSF_NOISE = old + + +def test_make_ngmix_observation_psf_obs_is_weighted(): + """Structural guard: psf_obs carries the finite 1/PSF_NOISE**2 weight. + + The whole #749 fix is this weight; a regression to the weightless build + (unit weight) is what we must never ship again. + """ + from shapepipe.testing.simulate import make_data + + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), shear=(0.0, 0.0), noise=1e-5, + n_epochs=1, img_size=51, psf_shear=PSF_SHEAR, + ) + obs = make_ngmix_observation( + gals[0], weights[0], flags[0], psfs[0], jacobs[0], + np.random.RandomState(0), + ) + npt.assert_allclose(obs.psf.weight, 1.0 / PSF_NOISE ** 2) + assert (obs.psf.weight > 1.0).all(), "PSF obs fell back to unit weight" + + +def test_psf_weight_recovers_original_psf_ellipticity(): + """#749 done-condition, end to end: *_psf_orig recovers the PSF shape. + + Through the real ``do_ngmix_metacal`` / ``average_original_psf`` path, the + original-PSF ellipticity matches the injected PSF moment shape with the + weight, and collapses toward the prior (zero) without it — the contrast + that proves the weight is load-bearing for the diagnostic columns. + """ + g_truth, psf_orig_w, _ = _psf_orig_via_metacal(PSF_NOISE) + _, psf_orig_u, _ = _psf_orig_via_metacal(1.0) # pre-fix unit weight + + # The fix: original-PSF column recovers the PSF moment shape. + npt.assert_allclose(psf_orig_w["g_psf"], g_truth, atol=1e-3) + + # The bug it cures: weightless collapses the shape toward the prior. + assert np.linalg.norm(psf_orig_u["g_psf"]) < 0.1 * np.linalg.norm(g_truth), ( + "weightless original-PSF fit did not collapse -- the #749 contrast is" + " gone, so this test no longer proves the weight is load-bearing" + ) + + +def test_psf_weight_leaves_metacal_shear_invariant(): + """The fix must not move the calibration: metacal shear flat in PSF_NOISE. + + Metacal fits the same psf_obs with a prior-free AdmomFitter (then dilates + to a round reconvolution kernel); a prior-free moment fit is insensitive to + a flat weight's absolute scale, so every metacal-type shear is invariant to + the diagnostic weight. Asserted bit-for-bit between the unit weight + (pre-fix) and the shipped PSF_NOISE. + """ + _, _, res_w = _psf_orig_via_metacal(PSF_NOISE) + _, _, res_u = _psf_orig_via_metacal(1.0) + for mtype in ("noshear", "1p", "1m", "2p", "2m"): + npt.assert_allclose( + res_w[mtype]["g"], res_u[mtype]["g"], rtol=0, atol=1e-6, + err_msg=f"PSF-obs weight moved the metacal {mtype} shear", + ) From e036a95aaca5e393002b359c7258dbebe1e2054c Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Wed, 1 Jul 2026 18:33:33 +0200 Subject: [PATCH 28/30] refactor(make_cat): remove the dead galsim shape serialization path ShapePipe has no galsim shape-measurement runner -- only ngmix_runner measures shapes and writes NGMIX_* columns. The galsim family in make_cat (_save_galsim_shapes, the process() "galsim" branch) could only be populated by reading an external shape catalogue that nothing in the pipeline produces; every production config pins SHAPE_MEASUREMENT_TYPE = ngmix. #761 renamed these columns onto the v2 grammar, but they are not creatable end-to-end -- confirmed from the sp_validation side, where the mirror galsim consumer was removed as dead code (sp_validation #201). Remove the dormant writer rather than carry grammar-renamed columns no pipeline can exercise. - delete SaveCatalogue._save_galsim_shapes and its process() "galsim" dispatch branch; process modes are now "ngmix" or "psf" - drop the now-orphaned cs_util.size import (only the galsim block used it) - delete tests/module/test_galsim_grammar_properties.py, which drove the removed writer Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0191bMtgT2gcmcT7fNYbGLS1 --- .../modules/make_cat_package/make_cat.py | 142 +--------- .../module/test_galsim_grammar_properties.py | 251 ------------------ 2 files changed, 3 insertions(+), 390 deletions(-) delete mode 100644 tests/module/test_galsim_grammar_properties.py diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index e28ac643..ff8dd455 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -13,7 +13,6 @@ from astropy import coordinates as coords from astropy import units as u from astropy.wcs import WCS -from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -172,7 +171,7 @@ def process( Parameters ---------- mode : str - Run mode, options are ``ngmix``, ``galsim`` or ``psf`` + Run mode, options are ``ngmix`` or ``psf`` cat_path : str Path to input catalogue moments : bool @@ -192,15 +191,13 @@ def process( err_msg = None if mode == "ngmix": err_msg = self._save_ngmix_data(cat_path, moments) - elif mode == "galsim": - self._save_galsim_shapes(cat_path) elif mode == "psf": self._save_psf_data(cat_path) else: err_msg = ( f"Invalid process mode ({mode}) for " - + '``make_cat.Savecatalogue``. Options are "ngmix", ' - + '"galsim" or "psf".' + + '``make_cat.Savecatalogue``. Options are "ngmix" ' + + 'or "psf".' ) if err_msg is None: @@ -472,139 +469,6 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): return None - def _save_galsim_shapes(self, galsim_cat_path): - """Save GalSim Shapes. - - Save the GalSim catalogue into the final one. - - Parameters - ---------- - galsim_cat_path : str - Path to GalSim catalogue to save - - """ - galsim_cat_file = file_io.FITSCatalogue(galsim_cat_path) - galsim_cat_file.open() - - self._key_ends = galsim_cat_file.get_ext_name()[1:] - - galsim_id = galsim_cat_file.get_data()["id"] - - for key_str in ( - "GALSIM_T_", - "GALSIM_T_PSF_", - "GALSIM_FLUX_", - "GALSIM_MAG_", - ): - self._update_dict(key_str, np.zeros(len(self._obj_id))) - for key_str in ("GALSIM_FLUX_ERR_", "GALSIM_MAG_ERR_", "GALSIM_RES_"): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) - for key_str in ( - "GALSIM_G1_", - "GALSIM_G2_", - "GALSIM_G1_ERR_", - "GALSIM_G2_ERR_", - "GALSIM_G1_UNCORR_", - "GALSIM_G2_UNCORR_", - "GALSIM_G1_PSF_", - "GALSIM_G2_PSF_", - ): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -10.0) - self._update_dict( - "GALSIM_FLAGS_", - np.ones(len(self._obj_id), dtype="int16"), - ) - - for idx, id_tmp in enumerate(self._obj_id): - ind = np.where(id_tmp == galsim_id)[0] - if len(ind) > 0: - - for key in self._key_ends: - - gcf_data = galsim_cat_file.get_data(key) - - if key == "ORIGINAL_PSF": - - # PSF columns sourced from the galaxy uncorr fields - # for this special extension (asymmetry preserved). - self._add2dict( - f"GALSIM_G1_PSF_{key}", - gcf_data["gal_uncorr_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_PSF_{key}", - gcf_data["gal_uncorr_g2"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_T_PSF_{key}", - cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), - idx - ) - - else: - - self._add2dict( - f"GALSIM_G1_{key}", gcf_data["gal_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_{key}", gcf_data["gal_g2"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G1_ERR_{key}", - gcf_data["gal_g1_err"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_ERR_{key}", - gcf_data["gal_g2_err"][ind[0]], idx - ) - - self._add2dict( - f"GALSIM_G1_UNCORR_{key}", - gcf_data["gal_uncorr_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_UNCORR_{key}", - gcf_data["gal_uncorr_g2"][ind[0]], idx - ) - - self._add2dict( - f"GALSIM_T_{key}", - cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), - idx - ) - - self._add2dict( - f"GALSIM_G1_PSF_{key}", - gcf_data["psf_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_PSF_{key}", - gcf_data["psf_g2"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_T_PSF_{key}", - cs_size.sigma_to_T(gcf_data["psf_sigma"][ind[0]]), - idx - ) - - flux = gcf_data["gal_flux"][ind[0]] - flux_err = gcf_data["gal_flux_err"][ind[0]] - self._add2dict(f"GALSIM_FLUX_{key}", flux, idx) - self._add2dict(f"GALSIM_FLUX_ERR_{key}", flux_err, idx) - - mag = gcf_data["gal_mag"][ind[0]] - mag_err = gcf_data["gal_mag_err"][ind[0]] - self._add2dict(f"GALSIM_MAG_{key}", mag, idx) - self._add2dict(f"GALSIM_MAG_ERR_{key}", mag_err, idx) - - flags = gcf_data["gal_flag"][ind[0]] - self._add2dict(f"GALSIM_FLAGS_{key}", flags, idx) - - res = gcf_data["gal_resolution"][ind[0]] - self._add2dict(f"GALSIM_RES_{key}", res, idx) - - galsim_cat_file.close() - def _save_psf_data(self, galaxy_psf_path): """Save PSF data. diff --git a/tests/module/test_galsim_grammar_properties.py b/tests/module/test_galsim_grammar_properties.py deleted file mode 100644 index 28346eaf..00000000 --- a/tests/module/test_galsim_grammar_properties.py +++ /dev/null @@ -1,251 +0,0 @@ -"""PROPERTY-BASED TESTS FOR THE GALSIM GALAXY/PSF COLUMN GRAMMAR. - -Drives ``SaveCatalogue._save_galsim_shapes`` (the make_cat writer) to pin the -renamed galsim grammar (shapepipe#761): the packed length-2 -``GALSIM_GAL_ELL_*`` / ``GALSIM_PSF_ELL_*`` columns are split into scalar -``GALSIM_G1_*`` / ``GALSIM_G2_*`` (g-type — galsim is correctly g-type, no -e/g value fix needed here), the galaxy carries no ``GAL`` token (implicit -default object, same rule as ngmix), and the raw ``GALSIM_GAL_SIGMA_*`` / -``GALSIM_PSF_SIGMA_*`` columns are replaced by the single stored size -``GALSIM_T_*`` (``T = 2 sigma^2``, via ``cs_util.size.sigma_to_T``). - -The ``ORIGINAL_PSF`` extension is a deliberate asymmetry, preserved rather -than normalized away: its PSF columns are sourced from the *galaxy's* -uncorrected fields (``gal_uncorr_g1/g2``, ``gal_sigma``), not the -per-extension ``psf_g1/g2``/``psf_sigma`` fields every other extension uses. -""" - -import re -import tempfile -from pathlib import Path - -import numpy as np -import numpy.testing as npt -from astropy.io import fits -from cs_util import size as cs_size -from hypothesis import given, settings -from hypothesis import strategies as st - -from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue - -# --------------------------------------------------------------------------- -# Harness -# --------------------------------------------------------------------------- - -# Source columns _save_galsim_shapes reads per extension. The ORIGINAL_PSF -# extension only consumes the gal_uncorr_g1/g2 + gal_sigma subset, but the -# real galsim catalogue carries the full set on every extension, so the -# fixture mirrors that rather than special-casing the row shape. -GALSIM_KEYS = [ - "id", - "gal_g1", "gal_g2", "gal_g1_err", "gal_g2_err", - "gal_uncorr_g1", "gal_uncorr_g2", "gal_sigma", - "psf_g1", "psf_g2", "psf_sigma", - "gal_flux", "gal_flux_err", "gal_mag", "gal_mag_err", - "gal_flag", "gal_resolution", -] -INT_KEYS = {"id", "gal_flag"} - -# Per-key multiplier so a single scalar ``seed`` fans out into twelve -# distinct values — a swapped source (e.g. psf_g1 <-> gal_uncorr_g1) breaks -# at least one equality below. -_MULT = { - "gal_g1": 1.0, "gal_g2": 1.3, "gal_g1_err": 1.7, "gal_g2_err": 2.1, - "gal_uncorr_g1": 2.6, "gal_uncorr_g2": 3.1, "gal_sigma": 0.4, - "psf_g1": 0.6, "psf_g2": 0.8, "psf_sigma": 0.5, - "gal_flux": 10.0, "gal_flux_err": 1.0, "gal_mag": 5.0, - "gal_mag_err": 0.2, -} - - -def _galsim_row(obj_id, seed): - """One object's per-key values for one extension, scaled by ``seed``.""" - row = {"id": obj_id, "gal_flag": 0, "gal_resolution": 0.5} - row.update({key: mult * seed for key, mult in _MULT.items()}) - return row - - -def _write_galsim_cat(path, obj_ids, seed_by_ext): - """Write a synthetic galsim FITS, one extension per ``seed_by_ext`` key. - - Extension order is preserved (dict order), matching how - ``get_ext_name()[1:]`` drives ``self._key_ends`` in production. - """ - hdus = [fits.PrimaryHDU()] - for ext, seed in seed_by_ext.items(): - rows = [_galsim_row(oid, seed) for oid in obj_ids] - cols = [ - fits.Column( - name=key, - format="K" if key in INT_KEYS else "D", - array=np.array([row[key] for row in rows]), - ) - for key in GALSIM_KEYS - ] - hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) - fits.HDUList(hdus).writeto(path, overwrite=True) - - -def _run_save_galsim(galsim_path, obj_ids): - """Drive ``_save_galsim_shapes`` and return its populated output dict.""" - inst = object.__new__(SaveCatalogue) - inst._obj_id = np.asarray(obj_ids) - inst._output_dict = {} - inst._save_galsim_shapes(str(galsim_path)) - return inst._output_dict - - -# --------------------------------------------------------------------------- -# Strategies -# --------------------------------------------------------------------------- - -obj_ids_strategy = st.lists( - st.integers(min_value=1, max_value=10_000), min_size=1, max_size=5, - unique=True, -) -# A finite seed bounded away from zero, so every per-key value it scales is -# itself non-zero and the family-vs-family / key-vs-key comparisons below are -# never vacuously satisfied by an accidental 0 == 0. -_seed = st.floats( - min_value=0.05, max_value=5.0, allow_nan=False, allow_infinity=False, -) - - -# --------------------------------------------------------------------------- -# (a) Scalar split + size routing, regular extension -# --------------------------------------------------------------------------- - -@settings(deadline=None, max_examples=25) -@given(obj_ids=obj_ids_strategy, seed=_seed) -def test_regular_extension_scalar_split_and_size(obj_ids, seed): - """A non-``ORIGINAL_PSF`` extension routes each source to its own column. - - ``GALSIM_G1/G2_`` from ``gal_g1/g2``, the ``_ERR``/``_UNCORR`` - variants from their namesake sources, ``GALSIM_T_`` from - ``sigma_to_T(gal_sigma)``, and the PSF family - (``GALSIM_G1/G2_PSF_``, ``GALSIM_T_PSF_``) from ``psf_g1/g2`` / - ``sigma_to_T(psf_sigma)`` — never from the galaxy's own fields. - """ - ext = "NOSHEAR" - path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name - _write_galsim_cat(path, obj_ids, {ext: seed}) - out = _run_save_galsim(path, obj_ids) - Path(path).unlink() - - n = len(obj_ids) - row = _galsim_row(obj_ids[0], seed) - npt.assert_allclose(out[f"GALSIM_G1_{ext}"], [row["gal_g1"]] * n) - npt.assert_allclose(out[f"GALSIM_G2_{ext}"], [row["gal_g2"]] * n) - npt.assert_allclose(out[f"GALSIM_G1_ERR_{ext}"], [row["gal_g1_err"]] * n) - npt.assert_allclose(out[f"GALSIM_G2_ERR_{ext}"], [row["gal_g2_err"]] * n) - npt.assert_allclose( - out[f"GALSIM_G1_UNCORR_{ext}"], [row["gal_uncorr_g1"]] * n - ) - npt.assert_allclose( - out[f"GALSIM_G2_UNCORR_{ext}"], [row["gal_uncorr_g2"]] * n - ) - npt.assert_allclose( - out[f"GALSIM_T_{ext}"], [cs_size.sigma_to_T(row["gal_sigma"])] * n - ) - npt.assert_allclose(out[f"GALSIM_G1_PSF_{ext}"], [row["psf_g1"]] * n) - npt.assert_allclose(out[f"GALSIM_G2_PSF_{ext}"], [row["psf_g2"]] * n) - npt.assert_allclose( - out[f"GALSIM_T_PSF_{ext}"], [cs_size.sigma_to_T(row["psf_sigma"])] * n - ) - # The galaxy and PSF families must stay distinct (un-aliased), and the - # raw-sigma value must never leak through unconverted. - assert not np.allclose(out[f"GALSIM_T_{ext}"], row["gal_sigma"]) - assert not np.allclose(out[f"GALSIM_G1_{ext}"], out[f"GALSIM_G1_PSF_{ext}"]) - - npt.assert_allclose(out[f"GALSIM_FLUX_{ext}"], [row["gal_flux"]] * n) - npt.assert_allclose(out[f"GALSIM_FLUX_ERR_{ext}"], [row["gal_flux_err"]] * n) - npt.assert_allclose(out[f"GALSIM_MAG_{ext}"], [row["gal_mag"]] * n) - npt.assert_allclose(out[f"GALSIM_MAG_ERR_{ext}"], [row["gal_mag_err"]] * n) - npt.assert_allclose(out[f"GALSIM_RES_{ext}"], [row["gal_resolution"]] * n) - - -# --------------------------------------------------------------------------- -# (b) ORIGINAL_PSF asymmetry: sourced from the galaxy's uncorrected fields -# --------------------------------------------------------------------------- - -@settings(deadline=None, max_examples=25) -@given(obj_ids=obj_ids_strategy, seed_main=_seed, seed_origpsf=_seed) -def test_original_psf_extension_sourced_from_gal_uncorr( - obj_ids, seed_main, seed_origpsf -): - """``ORIGINAL_PSF`` columns trace to ``gal_uncorr_*``/``gal_sigma``, NOT - ``psf_*`` — the one preserved asymmetry in the galsim grammar. - - The two extensions are written with independent seeds, and the - ``ORIGINAL_PSF`` row's own ``psf_g1/g2``/``psf_sigma`` are deliberately - distinct from its ``gal_uncorr_*``/``gal_sigma`` (the seed multipliers - differ), so a regression that wired the regular per-extension ``psf_*`` - sourcing into ``ORIGINAL_PSF`` would be caught, not masked by equal - values. - """ - seed_by_ext = {"NOSHEAR": seed_main, "ORIGINAL_PSF": seed_origpsf} - path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name - _write_galsim_cat(path, obj_ids, seed_by_ext) - out = _run_save_galsim(path, obj_ids) - Path(path).unlink() - - n = len(obj_ids) - op_row = _galsim_row(obj_ids[0], seed_origpsf) - npt.assert_allclose( - out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g1"]] * n - ) - npt.assert_allclose( - out["GALSIM_G2_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g2"]] * n - ) - npt.assert_allclose( - out["GALSIM_T_PSF_ORIGINAL_PSF"], - [cs_size.sigma_to_T(op_row["gal_sigma"])] * n, - ) - # Not sourced from this extension's own psf_g1/g2/psf_sigma (those are - # scaled by the same seed but through different multipliers, so equality - # here would mean the asymmetry was lost). - assert not np.allclose( - out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["psf_g1"]] * n - ) - assert not np.allclose( - out["GALSIM_T_PSF_ORIGINAL_PSF"], - [cs_size.sigma_to_T(op_row["psf_sigma"])] * n, - ) - - -# --------------------------------------------------------------------------- -# (c) Every emitted column matches the new grammar; no legacy tokens survive -# --------------------------------------------------------------------------- - -@settings(deadline=None, max_examples=25) -@given(obj_ids=obj_ids_strategy, seed=_seed) -def test_emitted_columns_match_grammar_no_legacy_tokens(obj_ids, seed): - """Every ``GALSIM_*`` column matches the scalar grammar; nothing packed. - - No ``GALSIM_GAL_ELL*``/``GALSIM_PSF_ELL*`` (packed length-2 ellipticity) - and no ``GALSIM_*_SIGMA*`` (raw size) survives — both replaced by scalar - ``G1``/``G2`` components and the single stored size ``T``. The galaxy - also carries no ``GAL`` token (implicit default object, same rule as - ngmix). - """ - ext_names = ["NOSHEAR", "ORIGINAL_PSF"] - seed_by_ext = {ext: seed * (i + 1) for i, ext in enumerate(ext_names)} - path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name - _write_galsim_cat(path, obj_ids, seed_by_ext) - out = _run_save_galsim(path, obj_ids) - Path(path).unlink() - - assert out, "writer produced no columns" - - _component = "G1|G2|T" - _qual = "(?:_ERR|_UNCORR)?" - _ext = "|".join(re.escape(e) for e in ext_names) - grammar_re = re.compile( - rf"^GALSIM_(?:{_component}){_qual}(?:_PSF)?_(?:{_ext})$" - rf"|^GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_(?:{_ext})$" - ) - for col in out: - assert grammar_re.match(col), f"off-grammar column emitted: {col!r}" - assert "_ELL" not in col, col - assert "SIGMA" not in col, col - assert "_GAL_" not in col and not col.endswith("_GAL"), col From df84064f1bf569f234e44c803dc8fe4006c59935 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Wed, 1 Jul 2026 18:33:48 +0200 Subject: [PATCH 29/30] refactor(make_cat): restrict SHAPE_MEASUREMENT_TYPE to ngmix ngmix is now the pipeline's only shape estimator, so make_cat_runner validates SHAPE_MEASUREMENT_TYPE against ngmix alone and raises on anything else. The knob is kept (rather than hardcoded) as the extension point for a future estimator family, mirroring sp_validation's retained `prefix`. The shape-write loop drops the galsim-vs-ngmix catalogue selection -- shapes are always read from the single ngmix input. Docstring and the cat_matched.param comment lose their galsim references. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0191bMtgT2gcmcT7fNYbGLS1 --- example/unions_800/cat_matched.param | 2 +- src/shapepipe/modules/make_cat_package/__init__.py | 2 +- src/shapepipe/modules/make_cat_runner.py | 14 +++----------- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index f387be00..bfe7c160 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -87,7 +87,7 @@ FLAG_TILING # magnitude, mainly for plots MAG_AUTO -# SNR from SExtractor, used for cuts on GALSIM shapes +# SNR from SExtractor, used for shape cuts SNR_WIN # PSF size measured on original image diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 84e6c97a..22b48d81 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -27,7 +27,7 @@ =================================== SHAPE_MEASUREMENT_TYPE : list - Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` + Shape measurement method; the only supported option is ``ngmix`` SAVE_PSF_DATA : bool, optional Save PSF information if ``True``; default value is ``False`` TILE_LIST : str, optional diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index 6ab6449d..7bf293cd 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -49,10 +49,8 @@ def make_cat_runner( "SHAPE_MEASUREMENT_TYPE", ) for shape_type in shape_type_list: - if shape_type.lower() not in ["ngmix", "galsim"]: - raise ValueError( - "SHAPE_MEASUREMENT_TYPE must be in [ngmix, galsim]" - ) + if shape_type.lower() != "ngmix": + raise ValueError("SHAPE_MEASUREMENT_TYPE must be ngmix") # Fetch PSF data option if config.has_option(module_config_sec, "SAVE_PSF_DATA"): @@ -76,13 +74,7 @@ def make_cat_runner( w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - # A second, galsim-specific shape catalogue path (shape2_cat_path) - # was never produced by any runner in this pipeline (no - # galsim_shapes_runner exists) — galsim shapes are read from the - # same input as ngmix (shape1_cat_path). - cat_path = shape1_cat_path - err_msg = sc_inst.process(shape_type.lower(), cat_path) - + err_msg = sc_inst.process(shape_type.lower(), shape1_cat_path) # If error message: delete (incomplete) output file and raise error if err_msg is not None: From 1c69af55fca03d253982eb796f23d35856e123f5 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Wed, 1 Jul 2026 18:33:58 +0200 Subject: [PATCH 30/30] test: drop galsim from the frozen column-grammar examples The static grammar test covered ngmix, galsim, and HSM. With the galsim writer gone, drop its regex branch and its valid/invalid example columns; the frozen grammar now spans the two families ShapePipe actually produces (ngmix, HSM). Also drop the docstring pointer to the deleted test_galsim_grammar_properties.py. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0191bMtgT2gcmcT7fNYbGLS1 --- tests/module/test_psf_grammar_properties.py | 46 +++++++-------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index 09eaf181..593614a1 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -21,14 +21,13 @@ (``example/cfis/final_cat.param``) names is a column the writer can produce — writer/param-file consistency; (d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ - OBJECT[_metacaltype]`` — holds across all three estimator families: ngmix - and galsim split ellipticity into ``G1``/``G2`` (g-type), HSM into - ``E1``/``E2`` (e-type); every family stores exactly one size, ``T``; HSM - keeps the singular ``FLAG`` token, ngmix/galsim keep plural ``FLAGS``. - This part is a static example-based check (not writer-driven), so it - covers galsim/HSM without depending on those producer modules — see - ``test_galsim_grammar_properties.py`` and ``test_psfex_interp.py`` for - the writer-driven coverage of those families. + OBJECT[_metacaltype]`` — holds across both estimator families: ngmix + splits ellipticity into ``G1``/``G2`` (g-type), HSM into ``E1``/``E2`` + (e-type); every family stores exactly one size, ``T``; HSM keeps the + singular ``FLAG`` token, ngmix keeps plural ``FLAGS``. This part is a + static example-based check (not writer-driven), so it covers HSM without + depending on its producer module — see ``test_psfex_interp.py`` for the + writer-driven coverage of that family. Setup for (a)-(c) mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with the five shear-type extensions carrying the exact @@ -390,23 +389,22 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): # --------------------------------------------------------------------------- -# (d) The FULL frozen grammar, across all three estimator families +# (d) The FULL frozen grammar, across both estimator families # --------------------------------------------------------------------------- # # ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_metacaltype]`` (shapepipe#761). This # section encodes the grammar itself as a static property — example column -# names, not a driven writer — so it covers ngmix, galsim, and HSM together -# without depending on the galsim/HSM producer modules (covered by their own -# writer-driving tests: ``test_galsim_grammar_properties.py``, -# ``test_psfex_interp.py``). What it pins: +# names, not a driven writer — so it covers ngmix and HSM together without +# depending on the HSM producer module (covered by its own writer-driving +# test, ``test_psfex_interp.py``). What it pins: # -# - galaxy is the implicit default object, no token, for BOTH g-type writers -# (ngmix, galsim); +# - galaxy is the implicit default object, no token, for the g-type ngmix +# writer; # - ellipticity is split into named scalar components, never a packed -# 2-vector: ``G1``/``G2`` for g-type (ngmix, galsim), ``E1``/``E2`` for -# e-type (HSM); +# 2-vector: ``G1``/``G2`` for g-type (ngmix), ``E1``/``E2`` for e-type +# (HSM); # - exactly one stored size, ``T`` (no ``SIGMA``/``FWHM``/``r50``); -# - HSM keeps the singular ``FLAG`` token; ngmix/galsim keep plural ``FLAGS``. +# - HSM keeps the singular ``FLAG`` token; ngmix keeps plural ``FLAGS``. FROZEN_GRAMMAR_RE = re.compile( r"^(?:" @@ -414,10 +412,6 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): r"NGMIXm?_(?:G1|G2|T|SNR|FLUX|MAG|FLAGS)(?:_ERR)?" r"(?:_(?:PSF_ORIG|PSF_RECONV))?_(?:NOSHEAR|1P|1M|2P|2M)" r"|NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)" - # galsim: galaxy (no object token) or explicit PSF object; is a - # data-driven FITS extension name, so left as a generic uppercase token. - r"|GALSIM_(?:G1|G2|T)(?:_ERR|_UNCORR)?(?:_PSF)?_[A-Z0-9_]+" - r"|GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_[A-Z0-9_]+" # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch # sink in make_cat._save_psf_data appends a bare epoch index. r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" @@ -434,10 +428,6 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "NGMIX_FLAGS_2P", "NGMIXm_G1_PSF_RECONV_NOSHEAR", "NGMIX_MCAL_FLAGS", - "GALSIM_G1_NOSHEAR", # galaxy, no object token - "GALSIM_G2_UNCORR_NOSHEAR", - "GALSIM_T_PSF_ORIGINAL_PSF", - "GALSIM_FLAGS_NOSHEAR", "HSM_E1_PSF", "HSM_T_STAR", "HSM_FLAG_PSF", @@ -455,10 +445,6 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "SIGMA_PSF_HSM", # raw sigma, not T "HSM_SIGMA_PSF", # stored sigma instead of T "HSM_FLAGS_PSF", # plural — HSM is singular FLAG - "GALSIM_GAL_ELL_NOSHEAR", # GAL token + packed ellipticity - "GALSIM_GAL_SIGMA_NOSHEAR", # GAL token + raw sigma - "GALSIM_PSF_ELL_NOSHEAR", # packed ellipticity - "GALSIM_FWHM_NOSHEAR", # FWHM, not T "SPREAD_MODEL", # removed entirely, not renamed ]