From a3314bc7a158d6dc93a4ff517bed64c0c2f54554 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 03:46:07 +0200
Subject: [PATCH 01/16] refactor(src): dissolve basic.py into calibration +
 statistics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The module named "basic" was a grab-bag: the 546-line `metacal` response
class (the heart of shear calibration) plus galaxy-selection masks and a
handful of cosmology-independent statistics helpers — none of which "basic"
described. Its symbols now live where they belong, and basic.py is deleted.

- `metacal` class + `mask_gal_size`/`mask_gal_SNR` (galaxy selection) →
  calibration.py, joining the m/c routines that already consumed a
  `gal_metacal` instance. One subsystem, one module.
- `jackknif_weighted_average2`, `corr_from_cov`, `chi2_and_pte`,
  `cov_from_one_covariance` → new statistics.py (a clean leaf: numpy/scipy
  only; calibration imports the jackknife from it).
- Every importer repointed (papers, scripts, the two scratch/guerrini
  import lines — path-only, his logic untouched); dead `from sp_validation
  import basic` lines removed from calibration.py and cat.py; `__all__` and
  the architecture docs updated.
- Tests split: metacal + mask pins → test_calibration.py, jackknife pin →
  test_statistics.py; test_basic.py removed.

All moved code is byte-identical to the original (md5-verified); value-drift
pins (metacal R-matrix rtol 1e-12) and the full suite pass in-container,
except the pre-existing galaxy/cs_util.size old-sandbox gap. No circular
imports. Verified by an adversarial multi-agent pass (byte-identity,
no-stale-refs, value-pins, no-cycles).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CLAUDE.md                                     |   4 +-
 papers/catalog/hist_mag.py                    |   2 +-
 scratch/guerrini/compute_pte_cell.py          |   2 +-
 scratch/guerrini/one_covariance.py            |   2 +-
 .../calibrate_comprehensive_cat.py            |   2 +-
 scripts/calibration/extract_info.py           |   4 +-
 .../examples/demo_calibrate_minimal_cat.py    |   2 +-
 src/sp_validation/__init__.py                 |   3 +-
 src/sp_validation/basic.py                    | 714 ------------------
 src/sp_validation/calibration.py              | 580 +++++++++++++-
 src/sp_validation/cat.py                      |   1 -
 src/sp_validation/statistics.py               | 125 +++
 src/sp_validation/tests/test_basic.py         | 279 -------
 src/sp_validation/tests/test_calibration.py   | 228 +++++-
 src/sp_validation/tests/test_statistics.py    |  59 ++
 15 files changed, 993 insertions(+), 1014 deletions(-)
 delete mode 100644 src/sp_validation/basic.py
 create mode 100644 src/sp_validation/statistics.py
 delete mode 100644 src/sp_validation/tests/test_basic.py
 create mode 100644 src/sp_validation/tests/test_statistics.py

diff --git a/CLAUDE.md b/CLAUDE.md
index 2d8e5d8b..e985d6c2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -38,8 +38,7 @@ is the container (full scientific stack pre-built). For a local dev environment:
 
 ### Core Package Structure (`src/sp_validation/`)
 - `b_modes.py`: Pure E-/B-mode decomposition (COSEBIS, pseudo-Cℓ)
-- `basic.py`: Basic utilities and mathematical functions
-- `calibration.py`: Shear calibration routines
+- `calibration.py`: Shear calibration — the `metacal` response class, galaxy selection masks (size/SNR), and m/c calibration routines
 - `cat.py`: Catalogue handling and manipulation
 - `cosmo_val.py`: Cosmology validation routines
 - `cosmology.py`: Cosmological calculations and theory
@@ -47,6 +46,7 @@ is the container (full scientific stack pre-built). For a local dev environment:
 - `io.py`: Input/output utilities
 - `plots.py`: Plotting functions
 - `rho_tau.py`: Rho and tau statistics calculations
+- `statistics.py`: Cosmology-independent statistics (jackknife resampling, χ²/PTE, covariance↔correlation, OneCovariance reshaping)
 - `survey.py`: Survey-level operations
 - `util.py`: General utilities
 
diff --git a/papers/catalog/hist_mag.py b/papers/catalog/hist_mag.py
index 7d202d7e..edc45fa1 100644
--- a/papers/catalog/hist_mag.py
+++ b/papers/catalog/hist_mag.py
@@ -26,7 +26,7 @@
 
 from sp_validation import run_joint_cat as sp_joint
 from sp_validation import util
-from sp_validation.basic import metacal
+from sp_validation.calibration import metacal
 from sp_validation import calibration
 import sp_validation.cat as cat
 
diff --git a/scratch/guerrini/compute_pte_cell.py b/scratch/guerrini/compute_pte_cell.py
index 63f7a52a..e543eef6 100644
--- a/scratch/guerrini/compute_pte_cell.py
+++ b/scratch/guerrini/compute_pte_cell.py
@@ -10,7 +10,7 @@
 import numpy as np
 import treecorr
 from sp_validation.cosmo_val import CosmologyValidation
-from sp_validation.basic import chi2_and_pte
+from sp_validation.statistics import chi2_and_pte
 import scipy.stats as stats
 import camb
 
diff --git a/scratch/guerrini/one_covariance.py b/scratch/guerrini/one_covariance.py
index 2b7f1657..378c505d 100644
--- a/scratch/guerrini/one_covariance.py
+++ b/scratch/guerrini/one_covariance.py
@@ -13,7 +13,7 @@
 
 import matplotlib.scale as mscale
 from sp_validation.rho_tau import SquareRootScale
-from sp_validation.basic import corr_from_cov, cov_from_one_covariance
+from sp_validation.statistics import corr_from_cov, cov_from_one_covariance
 
 mscale.register_scale(SquareRootScale)
 
diff --git a/scripts/calibration/calibrate_comprehensive_cat.py b/scripts/calibration/calibrate_comprehensive_cat.py
index 5f51cdc2..9596523e 100644
--- a/scripts/calibration/calibrate_comprehensive_cat.py
+++ b/scripts/calibration/calibrate_comprehensive_cat.py
@@ -25,7 +25,7 @@
 import sp_validation.cat as cat
 from sp_validation import calibration
 from sp_validation import run_joint_cat as sp_joint
-from sp_validation.basic import metacal
+from sp_validation.calibration import metacal
 
 # %%
 # Initialize calibration class instance
diff --git a/scripts/calibration/extract_info.py b/scripts/calibration/extract_info.py
index 372e3994..bad32a08 100644
--- a/scripts/calibration/extract_info.py
+++ b/scripts/calibration/extract_info.py
@@ -37,7 +37,7 @@
 
 #from sp_validation.cat import *
 from sp_validation import cat as spv_cat
-from sp_validation.basic import *
+from sp_validation.calibration import metacal
 from sp_validation.calibration import *
 from sp_validation.galaxy import *
 from sp_validation.io import *
@@ -319,7 +319,7 @@
 from lenspack.geometry.projections.gnom import radec2xy
 from uncertainties import ufloat
 
-from sp_validation.basic import *
+from sp_validation.calibration import metacal
 from sp_validation.calibration import *
 from sp_validation.plot_style import *
 from sp_validation.plots import *
diff --git a/scripts/examples/demo_calibrate_minimal_cat.py b/scripts/examples/demo_calibrate_minimal_cat.py
index 5bc3dd81..0d064585 100644
--- a/scripts/examples/demo_calibrate_minimal_cat.py
+++ b/scripts/examples/demo_calibrate_minimal_cat.py
@@ -25,7 +25,7 @@
 
 from sp_validation import run_joint_cat as sp_joint
 from sp_validation import util
-from sp_validation.basic import metacal
+from sp_validation.calibration import metacal
 import sp_validation.cat as cat
 
 # Initialize calibration class instance
diff --git a/src/sp_validation/__init__.py b/src/sp_validation/__init__.py
index 24bc7577..f1119d7c 100644
--- a/src/sp_validation/__init__.py
+++ b/src/sp_validation/__init__.py
@@ -3,12 +3,12 @@
 __all__ = [
     'util',
     'io',
-    'basic',
     'cosmo_val',
     'galaxy',
     'cosmology',
     'glass_mock',
     'calibration',
+    'statistics',
     'cat',
     'plot_style',
     'plots',
@@ -19,7 +19,6 @@
 # Explicit imports to avoid circular issues
 #from . import util
 #from . import io
-#from . import basic
 #from . import galaxy
 #from . import cosmology
 #from . import calibration
diff --git a/src/sp_validation/basic.py b/src/sp_validation/basic.py
deleted file mode 100644
index 1b5646c1..00000000
--- a/src/sp_validation/basic.py
+++ /dev/null
@@ -1,714 +0,0 @@
-"""BASIC.
-
-:Name: basic.py
-
-:Description: This file contains methods for calibration (metacalibration) and
-              basic validation of a weak-lensing shape catalogue
-              independent of cosmology.
-
-:Author: Axel Guinot, Martin Kilbinger
-
-"""
-
-import numpy as np
-from scipy import stats
-from scipy.interpolate import interp1d
-from scipy.spatial import cKDTree
-from scipy.special import gamma
-
-from tqdm import tqdm
-import operator as op
-import itertools as itools
-from joblib import Parallel, delayed
-
-from astropy.io import fits
-from astropy import units as u
-from astropy import coordinates as coords
-from astropy.wcs import WCS
-from astropy.table import Table
-
-
-class metacal:
-    """Metacal.
-
-    Metacalibration.
-
-    Parameters
-    ----------
-    data :
-        input galaxy catalogue
-    mask : array of bool
-        mask according to galaxy selection, e.g. spread_model
-    masking_type : string, optional, default='gal'
-        masking type, one in 'gal', 'gal_mom', 'star'
-    step : float, optional, default=0.01
-        step h in finite differences
-    prefix : string, optional, default='NGMIX'
-        to specify columns in input catalogue
-    snr_min : float, optional, default=10
-        signal-to-noise minimum
-    snr_max; float, optional, default=500
-        signal-to-noise maximum
-    rel_size_min : float, optional, default=0.5
-        relative size minimum
-    rel_size_max : float, optional, default=3.0
-        relative size maximum
-    size_corr_ell : bool, optional, default=True
-    global_R_weight : str, optional,
-        weight column name for global response matrix; default is ``None``
-        (unweighted mean)
-    sigma_eps : float, optional
-        ellipticity dispersion (one component) for computation
-        of weights; default is 0.34
-    col_2d : bool, optional
-        if `True` (default, ellipticity in one 2D column;
-        if `False`, ellipticity in two columns ELL_0, ELL_1
-    verbose : bool, optional, default=False
-        verbose output if True
-
-    """
-
-    def __init__(
-        self,
-        data,
-        mask,
-        masking_type='gal',
-        step=0.01,
-        prefix='NGMIX',
-        snr_min=10,
-        snr_max=500,
-        rel_size_min=0.5,
-        rel_size_max=3.0,
-        size_corr_ell=True,
-        global_R_weight=None,
-        sigma_eps=0.34,
-        col_2d=True,
-        verbose=False,
-    ):
-
-        self._masking_type = masking_type
-        self._step = step
-
-        # Cuts
-        self._snr_min = snr_min
-        self._snr_max = snr_max
-        self._rel_size_min = rel_size_min
-        self._rel_size_max = rel_size_max
-        self._size_corr_ell = size_corr_ell
-        if verbose:
-            print(
-                f'Metacal cuts: {snr_min}<snr<{snr_max}, '
-                + f'rel_size_min={rel_size_min}, '
-                + f'rel_size_max={rel_size_max}, '
-                + f'size_corr_ell={size_corr_ell}'
-            )
-            
-        self._global_R_weight = global_R_weight
-
-        self._sigma_eps = sigma_eps
-        self._col_2d = col_2d
-
-        self._verbose = verbose
-
-        self._prefix = prefix
-
-        self._read_data(data, mask)
-        self._compute_calibration()
-
-    def _read_data(self, data, mask):
-        """Read Data.
-
-        Read relevant data columns.
-        """
-        m1 = {}
-        p1 = {}
-        m2 = {}
-        p2 = {}
-        ns = {}
-
-        masked_data = data[mask]
-        if self._prefix == 'NGMIX':
-            m1, p1, m2, p2, ns = self._read_data_ngmix(
-                masked_data,
-                m1,
-                p1,
-                m2,
-                p2,
-                ns,
-            )
-        elif self._prefix == 'GALSIM':
-            m1, p1, m2, p2, ns = self._read_data_galsim(
-                masked_data,
-                m1,
-                p1,
-                m2,
-                p2,
-                ns,
-            )
-
-        print("FHP/MK hack using p1 PSF for ns in cuts")
-        indices = np.where(mask)[0]
-        col_noshear = f"{self._prefix}_Tpsf_NOSHEAR"
-        col_1p = f"{self._prefix}_Tpsf_1P"
-        new_psf = data[col_1p][indices]
-
-        # Overwriting incorrect no-shear PSF size to the one from 1p
-        ns["Tpsf"] = new_psf
-
-        self.m1 = m1
-        self.p1 = p1
-        self.m2 = m2
-        self.p2 = p2
-        self.ns = ns
-
-    def _read_data_ngmix(self, masked_data, m1, p1, m2, p2, ns):
-        """Read Data Ngmix.
-
-        Read data from ngmix catalogue.
-        
-        """
-
-        for name_shear, dict_tmp in zip(
-            ['1M', '1P', '2M', '2P', 'NOSHEAR'],
-            [m1, p1, m2, p2, ns]
-        ):
-
-            if self._verbose:
-                print('Extracting {}'.format(name_shear))
-
-            dict_tmp['flag'] = (
-                masked_data[f'{self._prefix}_FLAGS_{name_shear}']
-            )
-
-            if self._col_2d:
-                # Ellipticity in one 2D column
-                for comp in (0, 1):
-                    dict_tmp[f"g{comp+1}"] = (
-                        masked_data[f"{self._prefix}_ELL_{name_shear}"][:, comp]
-                    )
-            else:
-                # Ellipcitiy in two different columns
-                for comp in (0, 1):
-                    dict_tmp[f"g{comp+1}"] = (
-                        masked_data[f"{self._prefix}_ELL_{name_shear}_{comp}"]
-                    )
-
-            for key in ("flux", "flux_err", "T", "T_err"):
-                dict_tmp[key] = (
-                    masked_data[f'{self._prefix}_{key.upper()}_{name_shear}']
-                )
-
-            dict_tmp['Tpsf'] = (
-                masked_data[f'{self._prefix}_Tpsf_{name_shear}']
-            )
-
-        ns["C11"], ns["C22"], ns["w"] = self.get_variance_ivweights(
-            masked_data,
-            self._sigma_eps,
-            self._prefix,
-            mask=None,
-            col_2d=self._col_2d,
-        )
-
-        self._n_input = len(masked_data)
-        self._n_after_gal_mask = len(dict_tmp['flag'])
-        if self._verbose:
-            print(f"Number of objects on metacal input = {self._n_input}")
-            print(
-                "Number of objects after galaxy selection masking ="
-                + f" {self._n_after_gal_mask}"
-            )
-        
-        return m1, p1, m2, p2, ns
-
-    @staticmethod
-    def get_variance_ivweights(data, sigma_eps, prefix="NGMIX", mask=None, col_2d=True):
-        """Get Variance IVWEIGHTS.
-
-        Compute variance and inverse-variance weights.
-
-        Parameters
-        ----------
-        data : numpy.ndarray
-            input data
-        sigma_eps : float
-            ellipticity dispersion
-        prefix : str, optional
-            shape measurement identifier; default is "NGMIX"
-        mask : list, optional
-            indicates valid objects with ``True`` values; default is ``None`` = use all objects
-            type has to be bool
-        col_2d : bool, optional
-            if ``True`` (default), ellipticity is given in single 2D column;
-            if ``False``, ellipticity is expected in two 1D columns.
-
-        Returns
-        -------
-        float
-            variance first component
-        float
-            variance second component
-        float
-            weight
-
-        """
-        if mask is not None:
-            if col_2d:
-                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0][mask]
-                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1][mask]
-            else:
-                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"][mask]
-                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"][mask]
-        else:
-            if col_2d:
-                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0]
-                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1]
-            else:
-                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"]
-                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"]
-
-        iv_w = 1 / (2 * sigma_eps ** 2 + C11 + C22)
-
-        return C11, C22, iv_w
-
-    def _read_data_galsim(self, masked_data, m1, p1, m2, p2, ns):
-        """Read Data Galsim.
-
-        Read data from galsim catalogue.
-
-        """
-        prefix_mom = 'GALSIM_GAL'
-
-        for name_shear, dict_tmp in zip(
-            ['1m', '1p', '2m', '2p', 'noshear'],
-            [m1, p1, m2, p2, ns]
-        ):
-
-            if self._verbose:
-                print('Extracting {}'.format(name_shear))
-
-            dict_tmp['flag'] = (
-                masked_data[f'{self._prefix}_FLAGS_{name_shear.upper()}']
-            )
-            dict_tmp['g1'] = masked_data[
-                f'{prefix_mom}_ELL_UNCORR_{name_shear.upper()}'
-            ][:, 0]
-            dict_tmp['g2'] = masked_data[
-                f'{prefix_mom}_ELL_UNCORR_{name_shear.upper()}'
-            ][:, 1]
-
-            dict_tmp['T'] = (
-                masked_data[f'{prefix_mom}_SIGMA_{name_shear.upper()}']
-            )
-            dict_tmp['Tpsf'] = (
-                masked_data[f'{self._prefix}_PSF_SIGMA_{name_shear.upper()}']
-            )
-
-        self.snr_sextractor = masked_data['SNR_WIN']
-        ns['C11'] = masked_data[f'{prefix_mom}_ELL_ERR_NOSHEAR'][:, 0]
-        ns['C22'] = masked_data[f'{prefix_mom}_ELL_ERR_NOSHEAR'][:, 1]
-        ns['w'] = (
-            1. / (2 * self._sigma_eps ** 2 + dict_tmp['C11'] + dict_tmp['C22'])
-        )
-
-        return m1, p1, m2, p2, ns
-
-    def _compute_calibration(self):
-        """Compute Calibration.
-
-        Perform masking and compute calibration.
-        """
-        if self._masking_type == 'gal':
-            self._masking_gal()
-        elif self._masking_type == 'galmom':
-            self._masking_gal_mom()
-        elif self._masking_type == 'star':
-            self._masking_star()
-        else:
-            raise ValueError(f'Invalid masking type \'{self._masking_type}\'')
-
-        self._shear_response()
-        self._selection_response()
-        self._total_response()
-        # self._shear_response_std(stat_operator=lambda x:
-        # jackknif_weighted_average(x, np.ones_like(x)))
-
-    def add_cuts(self, snr_min=10, snr_max=500, rel_size_min=0.5):
-        """Add Cuts.
-
-        Apply additional cuts to metacal galaxy catalogue.
-        """
-        if snr_min < self._snr_min or \
-           snr_max > self._snr_max or \
-           rel_size_min < self._rel_size_min:
-            print(
-                'At least on cut is less stringend than existing one, '
-                + 'skipping...'
-            )
-            return
-
-        self._snr_min = snr_min
-        self._snr_max = snr_max
-        self._rel_size_min = rel_size_min
-        self._rel_size_max = rel_size_max
-        if self._verbose:
-            print(
-                f'Metacal new cuts: {snr_min}<snr<{snr_max}, '
-                + f'rel_size_min={rel_size_min}'
-            )
-
-        self._compute_calibration()
-
-    def _masking_gal(self):
-        """Masking Gal.
-
-        Mask metacal catalogue, i.e. apply cuts.
-        """
-        self.mask_dict = {}
-        for data, name in zip(
-            [self.ns, self.m1, self.p1, self.m2, self.p2],
-            ['ns', 'm1', 'p1', 'm2', 'p2']
-        ):
-            Tr_tmp = data['T']
-            if self._size_corr_ell:
-                Tr_tmp *= (
-                    (1 - (data['g1'] ** 2 + data['g2'] ** 2))
-                    / (1 + (data['g1'] ** 2 + data['g2'] ** 2))
-                )
-            if hasattr(self, 'snr_sextractor'):
-                snr_flux = self.snr_sextractor
-            else:
-                snr_flux = data['flux'] / data['flux_err']
-
-            Tpsf = data['Tpsf']
-
-            mask_tmp = (
-                (data['flag'] == 0)
-                & (Tr_tmp / Tpsf > self._rel_size_min)
-                & (Tr_tmp / Tpsf < self._rel_size_max)
-                & (snr_flux > self._snr_min)
-                & (snr_flux < self._snr_max)
-            )
-
-            # Take care of rotated version
-            ind_masked = np.where(mask_tmp == True)[0]
-
-            self.mask_dict[name] = ind_masked
-
-    def _masking_gal_mom(self):
-        """Add docstring.
-
-        ...
-
-        """
-        self.mask_dict = {}
-        for data, name in zip(
-            [self.ns, self.m1, self.p1, self.m2, self.p2],
-            ['ns', 'm1', 'p1', 'm2', 'p2']
-        ):
-            Tr_tmp = data['T']
-            if self._size_corr_ell:
-                Tr_tmp *= (
-                    (1 - (data['g1'] ** 2 + data['g2'] ** 2))
-                    / (1 + (data['g1'] ** 2 + data['g2'] ** 2))
-                )
-            snr_flux = data['flux'] / data['flux_err']
-
-            mask_tmp = (
-                (data['flag'] == 0)
-                & (1 - data['Tpsf'] / data['T'] > self._rel_size_min)
-                & (data['s2n'] > self._snr_min)
-                & (data['s2n'] < self._snr_max)
-                & (data['g1'] != -10)
-                & (data['g1'] != 0)
-            )
-
-            # Take care of rotated version
-            ind_masked = np.where(mask_tmp == True)[0]
-
-            self.mask_dict[name] = ind_masked
-
-    def _masking_star(self):
-        """Add docstring.
-
-        ...
-
-        """
-        self.mask_dict = {}
-        for data, name in zip(
-            [self.ns, self.m1, self.p1, self.m2, self.p2],
-            ['ns', 'm1', 'p1', 'm2', 'p2']
-        ):
-            if hasattr(self, 'snr_sextractor'):
-                snr_flux = self.snr_sextractor
-            else:
-                snr_flux = data['flux'] / data['flux_err']
-            mask_tmp = (data['flag'] == 0) & (snr_flux > 10) & (snr_flux < 500)
-
-            # Take care of rotated version
-            ind_masked = np.where(mask_tmp == True)[0]
-
-            self.mask_dict[name] = ind_masked
-
-    def _shear_response(self):
-        """Shear Response.
-
-        Compute shear response matrix
-        """
-        sign = 1
-        if self._prefix == 'GALSIM':
-            sign = -1
-
-        ma = self.mask_dict['ns']
-        h2 = 2 * self._step
-
-        self.R11 = (self.p1['g1'][ma] - self.m1['g1'][ma]) / h2
-        self.R22 = sign * (self.p2['g2'][ma] - self.m2['g2'][ma]) / h2
-        self.R12 = (self.p2['g1'][ma] - self.m2['g1'][ma]) / h2
-        self.R21 = (self.p1['g2'][ma] - self.m1['g2'][ma]) / h2
-
-        self.R_shear = np.array([[self.R11, self.R12], [self.R21, self.R22]])
-
-    def _shear_response_std(
-        self,
-        stat_operator=lambda x: jackknif_weighted_average2(x, np.ones_like(x))
-    ):
-        """Shear Response Std.
-
-        Standard deviation of shear response
-        """
-        ma = self.mask_dict['ns']
-        h2 = 2 * self._step
-
-        if len(self.ns['g1'][ma]) == 0:
-            self.R_shear_std = np.array([[np.nan, np.nan], [np.nan, np.nan]])
-        else:
-            self.R11_stds = stat_operator(
-                (self.p1['g1'][ma] - self.m1['g1'][ma]) / h2
-            )[1]
-            self.R22_stds = stat_operator(
-                (self.p2['g2'][ma] - self.m2['g2'][ma]) / h2
-            )[1]
-            self.R12_stds = stat_operator(
-                (self.p2['g1'][ma] - self.m2['g1'][ma]) / h2
-            )[1]
-            self.R21_stds = stat_operator(
-                (self.p1['g2'][ma] - self.m1['g2'][ma]) / h2
-            )[1]
-
-            self.R_shear_std = np.array([
-                [self.R11_stds, self.R12_stds],
-                [self.R21_stds, self.R22_stds]
-            ])
-
-    def _selection_response(self):
-        """Add docstring.
-
-        ...
-
-        """
-        sign = 1
-        if self._prefix == 'GALSIM':
-            sign = -1
-
-        ma_p1 = self.mask_dict['p1']
-        ma_m1 = self.mask_dict['m1']
-        ma_p2 = self.mask_dict['p2']
-        ma_m2 = self.mask_dict['m2']
-        h2 = 2 * self._step
-
-        self.R11_s = (
-            np.mean(self.ns['g1'][ma_p1])
-            - np.mean(self.ns['g1'][ma_m1])
-        ) / h2
-        self.R22_s = sign * (
-            np.mean(self.ns['g2'][ma_p2])
-            - np.mean(self.ns['g2'][ma_m2])
-        ) / h2
-        self.R12_s = (
-            np.mean(self.ns['g1'][ma_p2])
-            - np.mean(self.ns['g1'][ma_m2])
-        ) / h2
-        self.R21_s = (
-            np.mean(self.ns['g2'][ma_p1])
-            - np.mean(self.ns['g2'][ma_m1])
-        ) / h2
-
-        self.R_selection = np.array([
-            [self.R11_s, self.R12_s],
-            [self.R21_s, self.R22_s]
-        ])
-
-    def _total_response(self):
-        """Add docstring.
-
-        ...
-
-        """
-        if self._global_R_weight is None or self._global_R_weight == "None":
-            print("Computing unweighted response")
-            self.R_shear_global = np.mean(self.R_shear, axis=2)
-        else:
-            print("Computing response weighted by", self._global_R_weight)
-            # Get weights of masked no-shear objects
-            weights = self.ns[self._global_R_weight][self.mask_dict['ns']]
-            self.R_shear_global = np.average(self.R_shear, axis=2, weights=weights)
-
-        self.R = self.R_shear_global + self.R_selection
-
-    def _return():
-        """Add docstring.
-
-        ...
-
-        """
-        return (
-            self.m1,
-            self.p1,
-            self.p1,
-            self.p2,
-            self.ns,
-            self.R,
-            self.R_selection_std,
-            self.R_shear_std
-        )
-
-
-def jackknif_weighted_average2(
-    data,
-    weights,
-    remove_size=0.1,
-    n_realization=100,
-):
-    """Add docstring.
-
-    ...
-
-    """
-    samp_size = len(data)
-    keep_size_pc = 1 - remove_size
-
-    if keep_size_pc < 0:
-        raise ValueError('remove size should be in [0, 1]')
-
-    subsamp_size = int(samp_size * keep_size_pc)
-
-    all_ind = np.arange(samp_size)
-
-    all_est = []
-    for i in range(n_realization):
-        sub_data_ind = np.random.choice(all_ind, subsamp_size)
-
-        if (sum(data[sub_data_ind]) == 0):
-            all_est.append(np.nan)
-        else:
-            all_est.append(
-                np.average(data[sub_data_ind], weights=weights[sub_data_ind])
-            )
-
-    all_est = np.array(all_est)
-
-    return np.mean(all_est), np.std(all_est)
-
-
-def mask_gal_size(T, Tpsf, rel_size_min, rel_size_max, size_corr_ell=False, g1=None, g2=None):
-
-    Tr_tmp = T
-    if size_corr_ell:
-        Tr_tmp *= (
-            (1 - g1 **2 + g2 ** 2) / (1 + g1 ** 2 + g2 **2)
-        )
-
-    mask = (
-        (Tr_tmp / Tpsf > rel_size_min)
-        & (Tr_tmp / Tpsf < rel_size_max)
-    )
-
-    return mask
-
-
-def mask_gal_SNR(SNR, snr_min, snr_max):
-
-    mask = (
-        (SNR > snr_min)
-        & (SNR < snr_max)
-    )
-
-    return mask
-
-
-def corr_from_cov(cov):
-    """Correlation matrix from a covariance matrix.
-
-    Parameters
-    ----------
-    cov : numpy.ndarray
-        Covariance matrix.
-
-    Returns
-    -------
-    numpy.ndarray
-        Correlation matrix.
-
-    """
-    std_dev = np.sqrt(np.diag(cov))
-    return cov / np.outer(std_dev, std_dev)
-
-
-def chi2_and_pte(data_vector, cov, verbose=False):
-    """Chi-squared, reduced chi-squared and PTE for a data vector.
-
-    The data vector is assumed to be zero-mean under the null hypothesis,
-    so ``chi2 = d^T C^-1 d``.
-
-    Parameters
-    ----------
-    data_vector : numpy.ndarray
-        Data vector.
-    cov : numpy.ndarray
-        Covariance matrix of the data vector.
-    verbose : bool, optional
-        If ``True``, print the statistics; default is ``False``.
-
-    Returns
-    -------
-    tuple
-        ``(chi2, reduced_chi2, pte)``.
-
-    """
-    chi2 = data_vector @ np.linalg.inv(cov) @ data_vector
-    dof = len(data_vector)
-    reduced_chi2 = chi2 / dof
-    pte = 1 - stats.chi2.cdf(chi2, dof)
-    if verbose:
-        print(f"Chi2: {chi2:.4f}")
-        print(f"Reduced Chi2: {reduced_chi2:.4f}")
-        print(f"PTE: {pte:.4f}")
-    return chi2, reduced_chi2, pte
-
-
-def cov_from_one_covariance(cov_one_cov, gaussian=True):
-    """Reshape a OneCovariance ``covariance_list`` table into a matrix.
-
-    Parameters
-    ----------
-    cov_one_cov : numpy.ndarray
-        Flat OneCovariance output (e.g. from ``covariance_list_..._Cell.dat``),
-        with one row per ``(i, j)`` element pair.
-    gaussian : bool, optional
-        If ``True`` use the Gaussian-only column, otherwise the
-        Gaussian+non-Gaussian column; default is ``True``.
-
-    Returns
-    -------
-    numpy.ndarray
-        Square covariance matrix.
-
-    """
-    n_bins = np.sqrt(cov_one_cov.shape[0]).astype(int)
-    cov = np.zeros((n_bins, n_bins))
-    index_value = 10 if gaussian else 9
-    for i in range(n_bins):
-        for j in range(n_bins):
-            cov[i, j] = cov_one_cov[i * n_bins + j, index_value]
-    return cov
diff --git a/src/sp_validation/calibration.py b/src/sp_validation/calibration.py
index 0272822a..75e9390f 100644
--- a/src/sp_validation/calibration.py
+++ b/src/sp_validation/calibration.py
@@ -17,7 +17,7 @@
 
 from sp_validation import util
 from sp_validation import io
-from sp_validation import basic
+from sp_validation.statistics import jackknif_weighted_average2
 from sp_validation.survey import get_footprint
 from sp_validation import cat as sp_cat
 
@@ -664,3 +664,581 @@ def get_calibrate_no_leakage_e_from_cat(path_cat_gal, weight_type='des', verbose
     e2_noleak = e2 - cat_gal['alpha_2']*cat_gal['e2_PSF']
 
     return e1_noleak, e2_noleak
+
+
+class metacal:
+    """Metacal.
+
+    Metacalibration.
+
+    Parameters
+    ----------
+    data :
+        input galaxy catalogue
+    mask : array of bool
+        mask according to galaxy selection, e.g. spread_model
+    masking_type : string, optional, default='gal'
+        masking type, one in 'gal', 'gal_mom', 'star'
+    step : float, optional, default=0.01
+        step h in finite differences
+    prefix : string, optional, default='NGMIX'
+        to specify columns in input catalogue
+    snr_min : float, optional, default=10
+        signal-to-noise minimum
+    snr_max; float, optional, default=500
+        signal-to-noise maximum
+    rel_size_min : float, optional, default=0.5
+        relative size minimum
+    rel_size_max : float, optional, default=3.0
+        relative size maximum
+    size_corr_ell : bool, optional, default=True
+    global_R_weight : str, optional,
+        weight column name for global response matrix; default is ``None``
+        (unweighted mean)
+    sigma_eps : float, optional
+        ellipticity dispersion (one component) for computation
+        of weights; default is 0.34
+    col_2d : bool, optional
+        if `True` (default, ellipticity in one 2D column;
+        if `False`, ellipticity in two columns ELL_0, ELL_1
+    verbose : bool, optional, default=False
+        verbose output if True
+
+    """
+
+    def __init__(
+        self,
+        data,
+        mask,
+        masking_type='gal',
+        step=0.01,
+        prefix='NGMIX',
+        snr_min=10,
+        snr_max=500,
+        rel_size_min=0.5,
+        rel_size_max=3.0,
+        size_corr_ell=True,
+        global_R_weight=None,
+        sigma_eps=0.34,
+        col_2d=True,
+        verbose=False,
+    ):
+
+        self._masking_type = masking_type
+        self._step = step
+
+        # Cuts
+        self._snr_min = snr_min
+        self._snr_max = snr_max
+        self._rel_size_min = rel_size_min
+        self._rel_size_max = rel_size_max
+        self._size_corr_ell = size_corr_ell
+        if verbose:
+            print(
+                f'Metacal cuts: {snr_min}<snr<{snr_max}, '
+                + f'rel_size_min={rel_size_min}, '
+                + f'rel_size_max={rel_size_max}, '
+                + f'size_corr_ell={size_corr_ell}'
+            )
+            
+        self._global_R_weight = global_R_weight
+
+        self._sigma_eps = sigma_eps
+        self._col_2d = col_2d
+
+        self._verbose = verbose
+
+        self._prefix = prefix
+
+        self._read_data(data, mask)
+        self._compute_calibration()
+
+    def _read_data(self, data, mask):
+        """Read Data.
+
+        Read relevant data columns.
+        """
+        m1 = {}
+        p1 = {}
+        m2 = {}
+        p2 = {}
+        ns = {}
+
+        masked_data = data[mask]
+        if self._prefix == 'NGMIX':
+            m1, p1, m2, p2, ns = self._read_data_ngmix(
+                masked_data,
+                m1,
+                p1,
+                m2,
+                p2,
+                ns,
+            )
+        elif self._prefix == 'GALSIM':
+            m1, p1, m2, p2, ns = self._read_data_galsim(
+                masked_data,
+                m1,
+                p1,
+                m2,
+                p2,
+                ns,
+            )
+
+        print("FHP/MK hack using p1 PSF for ns in cuts")
+        indices = np.where(mask)[0]
+        col_noshear = f"{self._prefix}_Tpsf_NOSHEAR"
+        col_1p = f"{self._prefix}_Tpsf_1P"
+        new_psf = data[col_1p][indices]
+
+        # Overwriting incorrect no-shear PSF size to the one from 1p
+        ns["Tpsf"] = new_psf
+
+        self.m1 = m1
+        self.p1 = p1
+        self.m2 = m2
+        self.p2 = p2
+        self.ns = ns
+
+    def _read_data_ngmix(self, masked_data, m1, p1, m2, p2, ns):
+        """Read Data Ngmix.
+
+        Read data from ngmix catalogue.
+        
+        """
+
+        for name_shear, dict_tmp in zip(
+            ['1M', '1P', '2M', '2P', 'NOSHEAR'],
+            [m1, p1, m2, p2, ns]
+        ):
+
+            if self._verbose:
+                print('Extracting {}'.format(name_shear))
+
+            dict_tmp['flag'] = (
+                masked_data[f'{self._prefix}_FLAGS_{name_shear}']
+            )
+
+            if self._col_2d:
+                # Ellipticity in one 2D column
+                for comp in (0, 1):
+                    dict_tmp[f"g{comp+1}"] = (
+                        masked_data[f"{self._prefix}_ELL_{name_shear}"][:, comp]
+                    )
+            else:
+                # Ellipcitiy in two different columns
+                for comp in (0, 1):
+                    dict_tmp[f"g{comp+1}"] = (
+                        masked_data[f"{self._prefix}_ELL_{name_shear}_{comp}"]
+                    )
+
+            for key in ("flux", "flux_err", "T", "T_err"):
+                dict_tmp[key] = (
+                    masked_data[f'{self._prefix}_{key.upper()}_{name_shear}']
+                )
+
+            dict_tmp['Tpsf'] = (
+                masked_data[f'{self._prefix}_Tpsf_{name_shear}']
+            )
+
+        ns["C11"], ns["C22"], ns["w"] = self.get_variance_ivweights(
+            masked_data,
+            self._sigma_eps,
+            self._prefix,
+            mask=None,
+            col_2d=self._col_2d,
+        )
+
+        self._n_input = len(masked_data)
+        self._n_after_gal_mask = len(dict_tmp['flag'])
+        if self._verbose:
+            print(f"Number of objects on metacal input = {self._n_input}")
+            print(
+                "Number of objects after galaxy selection masking ="
+                + f" {self._n_after_gal_mask}"
+            )
+        
+        return m1, p1, m2, p2, ns
+
+    @staticmethod
+    def get_variance_ivweights(data, sigma_eps, prefix="NGMIX", mask=None, col_2d=True):
+        """Get Variance IVWEIGHTS.
+
+        Compute variance and inverse-variance weights.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            input data
+        sigma_eps : float
+            ellipticity dispersion
+        prefix : str, optional
+            shape measurement identifier; default is "NGMIX"
+        mask : list, optional
+            indicates valid objects with ``True`` values; default is ``None`` = use all objects
+            type has to be bool
+        col_2d : bool, optional
+            if ``True`` (default), ellipticity is given in single 2D column;
+            if ``False``, ellipticity is expected in two 1D columns.
+
+        Returns
+        -------
+        float
+            variance first component
+        float
+            variance second component
+        float
+            weight
+
+        """
+        if mask is not None:
+            if col_2d:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0][mask]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1][mask]
+            else:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"][mask]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"][mask]
+        else:
+            if col_2d:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1]
+            else:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"]
+
+        iv_w = 1 / (2 * sigma_eps ** 2 + C11 + C22)
+
+        return C11, C22, iv_w
+
+    def _read_data_galsim(self, masked_data, m1, p1, m2, p2, ns):
+        """Read Data Galsim.
+
+        Read data from galsim catalogue.
+
+        """
+        prefix_mom = 'GALSIM_GAL'
+
+        for name_shear, dict_tmp in zip(
+            ['1m', '1p', '2m', '2p', 'noshear'],
+            [m1, p1, m2, p2, ns]
+        ):
+
+            if self._verbose:
+                print('Extracting {}'.format(name_shear))
+
+            dict_tmp['flag'] = (
+                masked_data[f'{self._prefix}_FLAGS_{name_shear.upper()}']
+            )
+            dict_tmp['g1'] = masked_data[
+                f'{prefix_mom}_ELL_UNCORR_{name_shear.upper()}'
+            ][:, 0]
+            dict_tmp['g2'] = masked_data[
+                f'{prefix_mom}_ELL_UNCORR_{name_shear.upper()}'
+            ][:, 1]
+
+            dict_tmp['T'] = (
+                masked_data[f'{prefix_mom}_SIGMA_{name_shear.upper()}']
+            )
+            dict_tmp['Tpsf'] = (
+                masked_data[f'{self._prefix}_PSF_SIGMA_{name_shear.upper()}']
+            )
+
+        self.snr_sextractor = masked_data['SNR_WIN']
+        ns['C11'] = masked_data[f'{prefix_mom}_ELL_ERR_NOSHEAR'][:, 0]
+        ns['C22'] = masked_data[f'{prefix_mom}_ELL_ERR_NOSHEAR'][:, 1]
+        ns['w'] = (
+            1. / (2 * self._sigma_eps ** 2 + dict_tmp['C11'] + dict_tmp['C22'])
+        )
+
+        return m1, p1, m2, p2, ns
+
+    def _compute_calibration(self):
+        """Compute Calibration.
+
+        Perform masking and compute calibration.
+        """
+        if self._masking_type == 'gal':
+            self._masking_gal()
+        elif self._masking_type == 'galmom':
+            self._masking_gal_mom()
+        elif self._masking_type == 'star':
+            self._masking_star()
+        else:
+            raise ValueError(f'Invalid masking type \'{self._masking_type}\'')
+
+        self._shear_response()
+        self._selection_response()
+        self._total_response()
+        # self._shear_response_std(stat_operator=lambda x:
+        # jackknif_weighted_average(x, np.ones_like(x)))
+
+    def add_cuts(self, snr_min=10, snr_max=500, rel_size_min=0.5):
+        """Add Cuts.
+
+        Apply additional cuts to metacal galaxy catalogue.
+        """
+        if snr_min < self._snr_min or \
+           snr_max > self._snr_max or \
+           rel_size_min < self._rel_size_min:
+            print(
+                'At least on cut is less stringend than existing one, '
+                + 'skipping...'
+            )
+            return
+
+        self._snr_min = snr_min
+        self._snr_max = snr_max
+        self._rel_size_min = rel_size_min
+        self._rel_size_max = rel_size_max
+        if self._verbose:
+            print(
+                f'Metacal new cuts: {snr_min}<snr<{snr_max}, '
+                + f'rel_size_min={rel_size_min}'
+            )
+
+        self._compute_calibration()
+
+    def _masking_gal(self):
+        """Masking Gal.
+
+        Mask metacal catalogue, i.e. apply cuts.
+        """
+        self.mask_dict = {}
+        for data, name in zip(
+            [self.ns, self.m1, self.p1, self.m2, self.p2],
+            ['ns', 'm1', 'p1', 'm2', 'p2']
+        ):
+            Tr_tmp = data['T']
+            if self._size_corr_ell:
+                Tr_tmp *= (
+                    (1 - (data['g1'] ** 2 + data['g2'] ** 2))
+                    / (1 + (data['g1'] ** 2 + data['g2'] ** 2))
+                )
+            if hasattr(self, 'snr_sextractor'):
+                snr_flux = self.snr_sextractor
+            else:
+                snr_flux = data['flux'] / data['flux_err']
+
+            Tpsf = data['Tpsf']
+
+            mask_tmp = (
+                (data['flag'] == 0)
+                & (Tr_tmp / Tpsf > self._rel_size_min)
+                & (Tr_tmp / Tpsf < self._rel_size_max)
+                & (snr_flux > self._snr_min)
+                & (snr_flux < self._snr_max)
+            )
+
+            # Take care of rotated version
+            ind_masked = np.where(mask_tmp == True)[0]
+
+            self.mask_dict[name] = ind_masked
+
+    def _masking_gal_mom(self):
+        """Add docstring.
+
+        ...
+
+        """
+        self.mask_dict = {}
+        for data, name in zip(
+            [self.ns, self.m1, self.p1, self.m2, self.p2],
+            ['ns', 'm1', 'p1', 'm2', 'p2']
+        ):
+            Tr_tmp = data['T']
+            if self._size_corr_ell:
+                Tr_tmp *= (
+                    (1 - (data['g1'] ** 2 + data['g2'] ** 2))
+                    / (1 + (data['g1'] ** 2 + data['g2'] ** 2))
+                )
+            snr_flux = data['flux'] / data['flux_err']
+
+            mask_tmp = (
+                (data['flag'] == 0)
+                & (1 - data['Tpsf'] / data['T'] > self._rel_size_min)
+                & (data['s2n'] > self._snr_min)
+                & (data['s2n'] < self._snr_max)
+                & (data['g1'] != -10)
+                & (data['g1'] != 0)
+            )
+
+            # Take care of rotated version
+            ind_masked = np.where(mask_tmp == True)[0]
+
+            self.mask_dict[name] = ind_masked
+
+    def _masking_star(self):
+        """Add docstring.
+
+        ...
+
+        """
+        self.mask_dict = {}
+        for data, name in zip(
+            [self.ns, self.m1, self.p1, self.m2, self.p2],
+            ['ns', 'm1', 'p1', 'm2', 'p2']
+        ):
+            if hasattr(self, 'snr_sextractor'):
+                snr_flux = self.snr_sextractor
+            else:
+                snr_flux = data['flux'] / data['flux_err']
+            mask_tmp = (data['flag'] == 0) & (snr_flux > 10) & (snr_flux < 500)
+
+            # Take care of rotated version
+            ind_masked = np.where(mask_tmp == True)[0]
+
+            self.mask_dict[name] = ind_masked
+
+    def _shear_response(self):
+        """Shear Response.
+
+        Compute shear response matrix
+        """
+        sign = 1
+        if self._prefix == 'GALSIM':
+            sign = -1
+
+        ma = self.mask_dict['ns']
+        h2 = 2 * self._step
+
+        self.R11 = (self.p1['g1'][ma] - self.m1['g1'][ma]) / h2
+        self.R22 = sign * (self.p2['g2'][ma] - self.m2['g2'][ma]) / h2
+        self.R12 = (self.p2['g1'][ma] - self.m2['g1'][ma]) / h2
+        self.R21 = (self.p1['g2'][ma] - self.m1['g2'][ma]) / h2
+
+        self.R_shear = np.array([[self.R11, self.R12], [self.R21, self.R22]])
+
+    def _shear_response_std(
+        self,
+        stat_operator=lambda x: jackknif_weighted_average2(x, np.ones_like(x))
+    ):
+        """Shear Response Std.
+
+        Standard deviation of shear response
+        """
+        ma = self.mask_dict['ns']
+        h2 = 2 * self._step
+
+        if len(self.ns['g1'][ma]) == 0:
+            self.R_shear_std = np.array([[np.nan, np.nan], [np.nan, np.nan]])
+        else:
+            self.R11_stds = stat_operator(
+                (self.p1['g1'][ma] - self.m1['g1'][ma]) / h2
+            )[1]
+            self.R22_stds = stat_operator(
+                (self.p2['g2'][ma] - self.m2['g2'][ma]) / h2
+            )[1]
+            self.R12_stds = stat_operator(
+                (self.p2['g1'][ma] - self.m2['g1'][ma]) / h2
+            )[1]
+            self.R21_stds = stat_operator(
+                (self.p1['g2'][ma] - self.m1['g2'][ma]) / h2
+            )[1]
+
+            self.R_shear_std = np.array([
+                [self.R11_stds, self.R12_stds],
+                [self.R21_stds, self.R22_stds]
+            ])
+
+    def _selection_response(self):
+        """Add docstring.
+
+        ...
+
+        """
+        sign = 1
+        if self._prefix == 'GALSIM':
+            sign = -1
+
+        ma_p1 = self.mask_dict['p1']
+        ma_m1 = self.mask_dict['m1']
+        ma_p2 = self.mask_dict['p2']
+        ma_m2 = self.mask_dict['m2']
+        h2 = 2 * self._step
+
+        self.R11_s = (
+            np.mean(self.ns['g1'][ma_p1])
+            - np.mean(self.ns['g1'][ma_m1])
+        ) / h2
+        self.R22_s = sign * (
+            np.mean(self.ns['g2'][ma_p2])
+            - np.mean(self.ns['g2'][ma_m2])
+        ) / h2
+        self.R12_s = (
+            np.mean(self.ns['g1'][ma_p2])
+            - np.mean(self.ns['g1'][ma_m2])
+        ) / h2
+        self.R21_s = (
+            np.mean(self.ns['g2'][ma_p1])
+            - np.mean(self.ns['g2'][ma_m1])
+        ) / h2
+
+        self.R_selection = np.array([
+            [self.R11_s, self.R12_s],
+            [self.R21_s, self.R22_s]
+        ])
+
+    def _total_response(self):
+        """Add docstring.
+
+        ...
+
+        """
+        if self._global_R_weight is None or self._global_R_weight == "None":
+            print("Computing unweighted response")
+            self.R_shear_global = np.mean(self.R_shear, axis=2)
+        else:
+            print("Computing response weighted by", self._global_R_weight)
+            # Get weights of masked no-shear objects
+            weights = self.ns[self._global_R_weight][self.mask_dict['ns']]
+            self.R_shear_global = np.average(self.R_shear, axis=2, weights=weights)
+
+        self.R = self.R_shear_global + self.R_selection
+
+    def _return():
+        """Add docstring.
+
+        ...
+
+        """
+        return (
+            self.m1,
+            self.p1,
+            self.p1,
+            self.p2,
+            self.ns,
+            self.R,
+            self.R_selection_std,
+            self.R_shear_std
+        )
+
+
+
+
+def mask_gal_size(T, Tpsf, rel_size_min, rel_size_max, size_corr_ell=False, g1=None, g2=None):
+
+    Tr_tmp = T
+    if size_corr_ell:
+        Tr_tmp *= (
+            (1 - g1 **2 + g2 ** 2) / (1 + g1 ** 2 + g2 **2)
+        )
+
+    mask = (
+        (Tr_tmp / Tpsf > rel_size_min)
+        & (Tr_tmp / Tpsf < rel_size_max)
+    )
+
+    return mask
+
+
+
+
+def mask_gal_SNR(SNR, snr_min, snr_max):
+
+    mask = (
+        (SNR > snr_min)
+        & (SNR < snr_max)
+    )
+
+    return mask
+
+
diff --git a/src/sp_validation/cat.py b/src/sp_validation/cat.py
index 20342828..30d90289 100644
--- a/src/sp_validation/cat.py
+++ b/src/sp_validation/cat.py
@@ -26,7 +26,6 @@
 
 from sp_validation import util
 from sp_validation import io
-from sp_validation import basic
 from sp_validation.survey import get_footprint
 from sp_validation import __version__, __name__
 
diff --git a/src/sp_validation/statistics.py b/src/sp_validation/statistics.py
new file mode 100644
index 00000000..c8e3d862
--- /dev/null
+++ b/src/sp_validation/statistics.py
@@ -0,0 +1,125 @@
+"""STATISTICS.
+
+:Name: statistics.py
+
+:Description: Cosmology-independent statistical helpers (jackknife resampling,
+              chi2/PTE, covariance<->correlation, OneCovariance reshaping).
+              Extracted verbatim from the former basic.py.
+"""
+
+import numpy as np
+from scipy import stats
+
+
+def jackknif_weighted_average2(
+    data,
+    weights,
+    remove_size=0.1,
+    n_realization=100,
+):
+    """Add docstring.
+
+    ...
+
+    """
+    samp_size = len(data)
+    keep_size_pc = 1 - remove_size
+
+    if keep_size_pc < 0:
+        raise ValueError('remove size should be in [0, 1]')
+
+    subsamp_size = int(samp_size * keep_size_pc)
+
+    all_ind = np.arange(samp_size)
+
+    all_est = []
+    for i in range(n_realization):
+        sub_data_ind = np.random.choice(all_ind, subsamp_size)
+
+        if (sum(data[sub_data_ind]) == 0):
+            all_est.append(np.nan)
+        else:
+            all_est.append(
+                np.average(data[sub_data_ind], weights=weights[sub_data_ind])
+            )
+
+    all_est = np.array(all_est)
+
+    return np.mean(all_est), np.std(all_est)
+
+
+def corr_from_cov(cov):
+    """Correlation matrix from a covariance matrix.
+
+    Parameters
+    ----------
+    cov : numpy.ndarray
+        Covariance matrix.
+
+    Returns
+    -------
+    numpy.ndarray
+        Correlation matrix.
+
+    """
+    std_dev = np.sqrt(np.diag(cov))
+    return cov / np.outer(std_dev, std_dev)
+
+
+def chi2_and_pte(data_vector, cov, verbose=False):
+    """Chi-squared, reduced chi-squared and PTE for a data vector.
+
+    The data vector is assumed to be zero-mean under the null hypothesis,
+    so ``chi2 = d^T C^-1 d``.
+
+    Parameters
+    ----------
+    data_vector : numpy.ndarray
+        Data vector.
+    cov : numpy.ndarray
+        Covariance matrix of the data vector.
+    verbose : bool, optional
+        If ``True``, print the statistics; default is ``False``.
+
+    Returns
+    -------
+    tuple
+        ``(chi2, reduced_chi2, pte)``.
+
+    """
+    chi2 = data_vector @ np.linalg.inv(cov) @ data_vector
+    dof = len(data_vector)
+    reduced_chi2 = chi2 / dof
+    pte = 1 - stats.chi2.cdf(chi2, dof)
+    if verbose:
+        print(f"Chi2: {chi2:.4f}")
+        print(f"Reduced Chi2: {reduced_chi2:.4f}")
+        print(f"PTE: {pte:.4f}")
+    return chi2, reduced_chi2, pte
+
+
+def cov_from_one_covariance(cov_one_cov, gaussian=True):
+    """Reshape a OneCovariance ``covariance_list`` table into a matrix.
+
+    Parameters
+    ----------
+    cov_one_cov : numpy.ndarray
+        Flat OneCovariance output (e.g. from ``covariance_list_..._Cell.dat``),
+        with one row per ``(i, j)`` element pair.
+    gaussian : bool, optional
+        If ``True`` use the Gaussian-only column, otherwise the
+        Gaussian+non-Gaussian column; default is ``True``.
+
+    Returns
+    -------
+    numpy.ndarray
+        Square covariance matrix.
+
+    """
+    n_bins = np.sqrt(cov_one_cov.shape[0]).astype(int)
+    cov = np.zeros((n_bins, n_bins))
+    index_value = 10 if gaussian else 9
+    for i in range(n_bins):
+        for j in range(n_bins):
+            cov[i, j] = cov_one_cov[i * n_bins + j, index_value]
+    return cov
diff --git a/src/sp_validation/tests/test_basic.py b/src/sp_validation/tests/test_basic.py
deleted file mode 100644
index 25260759..00000000
--- a/src/sp_validation/tests/test_basic.py
+++ /dev/null
@@ -1,279 +0,0 @@
-"""VALUE-DRIFT CHARACTERIZATION TESTS FOR basic.py.
-
-This module pins the numeric behaviour of the metacalibration estimator
-and the standalone selection / jackknife helpers in
-:mod:`sp_validation.basic`. Every assertion is a *characterization* test:
-the inputs are fully deterministic (hand-built arrays / seeded RNG, no
-cluster data) and the outputs are committed as literals with a tight
-``rtol``. A refactor that changes the numbers must turn this file red.
-
-Each test documents the perturbation that gives it teeth: a deliberate
-change to the input that is asserted to produce a different result, so a
-reviewer can independently confirm the test bites.
-
-:Author: cdaley
-
-"""
-
-import numpy as np
-import numpy.testing as npt
-from astropy.table import Table
-
-from sp_validation.basic import (
-    metacal,
-    mask_gal_size,
-    mask_gal_SNR,
-    jackknif_weighted_average2,
-)
-
-
-# Metacal sheared-image variants and their NGMIX column suffixes.
-_VARIANTS = ["1M", "1P", "2M", "2P", "NOSHEAR"]
-
-
-def _build_ngmix_catalog(slope_11=2.0, slope_22=3.0, step=0.01):
-    """Build a deterministic NGMIX catalogue with a known metacal response.
-
-    The metacal shear response is a finite difference of the measured
-    ellipticity between the +shear and -shear sheared images, divided by
-    twice the step ``h``:
-
-        R11 = (g1[1P] - g1[1M]) / (2 h)
-        R22 = (g2[2P] - g2[2M]) / (2 h)
-        R12 = (g1[2P] - g1[2M]) / (2 h)
-        R21 = (g2[1P] - g2[1M]) / (2 h)
-
-    We inject a pure diagonal response by making the measured g1 vary
-    linearly with the applied component-1 step (slope ``slope_11``) and g2
-    vary linearly with the applied component-2 step (slope ``slope_22``),
-    with NO cross-response. Concretely, relative to a NOSHEAR baseline of
-    (g1, g2) = (0.05, -0.02):
-
-        1P: g1 += slope_11 * step,   1M: g1 -= slope_11 * step
-        2P: g2 += slope_22 * step,   2M: g2 -= slope_22 * step
-
-    so the finite difference recovers R11 = slope_11, R22 = slope_22, and
-    R12 = R21 = 0 exactly. The selection response is zero because every
-    object passes every cut identically (the selection masks are the same
-    set for 1P/1M/2P/2M), so the total R equals the injected slope matrix.
-
-    All objects are built to pass the galaxy cuts: flag == 0,
-    rel_size = T / Tpsf = 1.0 (inside [0.5, 3.0]), and
-    SNR = flux / flux_err = 50 (inside [10, 500]).
-    """
-    n = 8
-    g1_base = 0.05
-    g2_base = -0.02
-
-    # Per-variant (g1, g2) shifts: only the matching component moves.
-    shifts = {
-        "NOSHEAR": (0.0, 0.0),
-        "1P": (slope_11 * step, 0.0),
-        "1M": (-slope_11 * step, 0.0),
-        "2P": (0.0, slope_22 * step),
-        "2M": (0.0, -slope_22 * step),
-    }
-
-    cols = {}
-    for var in _VARIANTS:
-        dg1, dg2 = shifts[var]
-        ell = np.zeros((n, 2))
-        ell[:, 0] = g1_base + dg1
-        ell[:, 1] = g2_base + dg2
-        cols[f"NGMIX_ELL_{var}"] = ell
-        cols[f"NGMIX_FLAGS_{var}"] = np.zeros(n, dtype=int)
-        cols[f"NGMIX_FLUX_{var}"] = np.full(n, 50.0)
-        cols[f"NGMIX_FLUX_ERR_{var}"] = np.full(n, 1.0)
-        # T / Tpsf = 1.0 -> rel_size inside [0.5, 3.0].
-        cols[f"NGMIX_T_{var}"] = np.full(n, 1.0)
-        cols[f"NGMIX_T_ERR_{var}"] = np.full(n, 0.1)
-        cols[f"NGMIX_Tpsf_{var}"] = np.full(n, 1.0)
-
-    # Per-component ellipticity errors -> inverse-variance weights.
-    cols["NGMIX_ELL_ERR_NOSHEAR"] = np.full((n, 2), 0.25)
-
-    return Table(cols), n
-
-
-def test_metacal_R_matrix_recovers_injected_response():
-    """Pin the metacal total response matrix R for an injected slope.
-
-    WHAT IS PINNED: with a hand-built NGMIX catalogue whose measured
-    ellipticity responds linearly to the applied metacal shear step
-    (slope 2.0 on component 1, slope 3.0 on component 2, no cross term),
-    the estimator's total response ``mcal.R`` is a 2x2 matrix equal to the
-    injected slopes:  R = [[2.0, 0.0], [0.0, 3.0]]. The off-diagonal terms
-    and the selection response are exactly zero because every object passes
-    every cut identically.
-
-    WHY TEETH: the response is the finite difference R11 = slope_11 by
-    construction. If a refactor changed the step normalisation (the 2 h
-    divisor), the +/-/component bookkeeping, the selection-response
-    subtraction, or the column names read by ``_read_data_ngmix``, R would
-    no longer equal the injected matrix. The companion assertion below
-    re-runs with slope 5.0 and confirms R11 tracks it (5.0 != 2.0), so a
-    change that decouples R from the input numbers fails.
-
-    NOTE: the estimator prints an 'FHP/MK hack' line and an unweighted /
-    weighted response line; these are expected stdout, not errors.
-    """
-    data, n = _build_ngmix_catalog(slope_11=2.0, slope_22=3.0, step=0.01)
-    mask = np.ones(n, dtype=bool)
-
-    mcal = metacal(
-        data,
-        mask,
-        masking_type="gal",
-        step=0.01,
-        prefix="NGMIX",
-        size_corr_ell=False,   # avoid the in-place T mutation; clean linear cut
-        global_R_weight=None,  # unweighted mean over objects
-    )
-
-    assert mcal.R.shape == (2, 2)
-    npt.assert_allclose(
-        mcal.R,
-        np.array([[2.0, 0.0], [0.0, 3.0]]),
-        rtol=1e-10,
-        atol=1e-12,
-    )
-
-    # TEETH: a different injected slope yields a different R11.
-    data2, n2 = _build_ngmix_catalog(slope_11=5.0, slope_22=3.0, step=0.01)
-    mcal2 = metacal(
-        data2,
-        np.ones(n2, dtype=bool),
-        masking_type="gal",
-        step=0.01,
-        prefix="NGMIX",
-        size_corr_ell=False,
-        global_R_weight=None,
-    )
-    npt.assert_allclose(mcal2.R[0, 0], 5.0, rtol=1e-10)
-    assert not np.isclose(mcal2.R[0, 0], mcal.R[0, 0])
-
-
-def test_metacal_R_matrix_step_normalization():
-    """Pin that R is independent of the step h for a fixed injected slope.
-
-    WHAT IS PINNED: the response is divided by 2 h, and the injected
-    ellipticity shift is slope * h, so R = slope regardless of h. Building
-    the catalogue with step = 0.02 (and reading it back with step = 0.02)
-    still gives R11 = 2.0, R22 = 3.0.
-
-    WHY TEETH: this isolates the ``h2 = 2 * self._step`` normalisation in
-    ``_shear_response``. If a refactor dropped the factor of 2 or used the
-    wrong step, R would scale and this exact-match assertion would fail.
-    """
-    data, n = _build_ngmix_catalog(slope_11=2.0, slope_22=3.0, step=0.02)
-    mcal = metacal(
-        data,
-        np.ones(n, dtype=bool),
-        masking_type="gal",
-        step=0.02,
-        prefix="NGMIX",
-        size_corr_ell=False,
-        global_R_weight=None,
-    )
-    npt.assert_allclose(
-        mcal.R,
-        np.array([[2.0, 0.0], [0.0, 3.0]]),
-        rtol=1e-10,
-        atol=1e-12,
-    )
-
-
-def test_mask_gal_size_boolean_mask():
-    """Pin the exact boolean size mask spanning below/within/above the cut.
-
-    WHAT IS PINNED: ``mask_gal_size`` keeps objects with
-    rel_size_min < T / Tpsf < rel_size_max (strict on both ends). With
-    Tpsf = 1.0 everywhere and T spanning the bounds [0.5, 3.0], the
-    expected mask is computed element by element below.
-
-    WHY TEETH: T / Tpsf values 0.5 and 3.0 sit exactly on the (strict)
-    bounds and are EXCLUDED; 0.49 and 3.01 are also excluded; the interior
-    values pass. The companion assertion tightens rel_size_min to 1.0 and
-    asserts the 0.75 element flips from True to False.
-    """
-    T = np.array([0.3, 0.49, 0.5, 0.75, 1.0, 2.999, 3.0, 3.01, 5.0])
-    Tpsf = np.ones_like(T)
-
-    mask = mask_gal_size(T, Tpsf, rel_size_min=0.5, rel_size_max=3.0)
-
-    expected = np.array(
-        [False, False, False, True, True, True, False, False, False]
-    )
-    npt.assert_array_equal(mask, expected)
-
-    # TEETH: tightening the lower bound to 1.0 drops the 0.75 element.
-    mask_tight = mask_gal_size(T, Tpsf, rel_size_min=1.0, rel_size_max=3.0)
-    assert mask[3] and not mask_tight[3]
-    assert not np.array_equal(mask, mask_tight)
-
-
-def test_mask_gal_SNR_boolean_mask():
-    """Pin the exact boolean SNR mask spanning below/within/above the cut.
-
-    WHAT IS PINNED: ``mask_gal_SNR`` keeps objects with
-    snr_min < SNR < snr_max (strict on both ends). With bounds [10, 500],
-    the boundary values 10 and 500 are EXCLUDED and the interior passes.
-
-    WHY TEETH: shifting the lower bound from 10 to 12 flips the SNR = 11
-    element from True to False; the companion assertion confirms it.
-    """
-    SNR = np.array([5.0, 10.0, 11.0, 100.0, 499.0, 500.0, 600.0])
-
-    mask = mask_gal_SNR(SNR, snr_min=10.0, snr_max=500.0)
-
-    expected = np.array([False, False, True, True, True, False, False])
-    npt.assert_array_equal(mask, expected)
-
-    # TEETH: raising snr_min to 12 drops the SNR = 11 element.
-    mask_shifted = mask_gal_SNR(SNR, snr_min=12.0, snr_max=500.0)
-    assert mask[2] and not mask_shifted[2]
-    assert not np.array_equal(mask, mask_shifted)
-
-
-def test_jackknif_weighted_average2_mean_and_error():
-    """Pin the jackknife weighted average + error for a seeded RNG draw.
-
-    WHAT IS PINNED: ``jackknif_weighted_average2`` draws ``n_realization``
-    bootstrap-style subsamples (size = (1 - remove_size) * N, sampled WITH
-    replacement via ``np.random.choice``) and returns
-    (mean over realizations, std over realizations) of the per-subsample
-    weighted average. With ``np.random.seed`` fixed before the call, the
-    draws are deterministic, so both numbers are pinned as literals
-    obtained from an actual run.
-
-    WHY TEETH: the function depends on the data values, the weights, and
-    the sampling. The companion assertion changes a single weight (under
-    the same seed and therefore the same index draws) and asserts the mean
-    moves, proving the weighting is load-bearing and not ignored.
-    """
-    data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
-    weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0])
-
-    np.random.seed(1234)
-    mean, err = jackknif_weighted_average2(
-        data, weights, remove_size=0.1, n_realization=50
-    )
-
-    npt.assert_allclose(mean, _PINNED_JK_MEAN, rtol=1e-10)
-    npt.assert_allclose(err, _PINNED_JK_ERR, rtol=1e-10)
-
-    # TEETH: same seed (same index draws) but a perturbed weight -> the
-    # weighted average changes, so the returned mean must differ.
-    weights_perturbed = weights.copy()
-    weights_perturbed[0] = 100.0
-    np.random.seed(1234)
-    mean_p, _ = jackknif_weighted_average2(
-        data, weights_perturbed, remove_size=0.1, n_realization=50
-    )
-    assert not np.isclose(mean, mean_p)
-
-
-# Literals pinned from an observed run inside the container; see the test
-# docstring for the seed and parameters that reproduce them.
-_PINNED_JK_MEAN = 6.248279984721161
-_PINNED_JK_ERR = 0.971349020459367
diff --git a/src/sp_validation/tests/test_calibration.py b/src/sp_validation/tests/test_calibration.py
index 3ce8a37f..d61b8bdb 100644
--- a/src/sp_validation/tests/test_calibration.py
+++ b/src/sp_validation/tests/test_calibration.py
@@ -1,15 +1,17 @@
 """UNIT TESTS FOR CALIBRATION SUBPACKAGE.
 
-This module contains value-drift CHARACTERIZATION tests for the pure
-calibration linear algebra in ``sp_validation.calibration``:
-``get_calibrated_quantities`` and ``get_calibrated_m_c``.
+This module contains value-drift CHARACTERIZATION tests for
+``sp_validation.calibration``: the m/c bias linear algebra
+(``get_calibrated_quantities``, ``get_calibrated_m_c``), the ``metacal``
+response class, and the galaxy selection masks (``mask_gal_size``,
+``mask_gal_SNR``).
 
-Both functions consume a ``gal_metacal`` object through exactly three
+The m/c functions consume a ``gal_metacal`` object through exactly three
 attributes -- ``.R`` (2x2 response matrix), ``.ns`` (dict of 'g1'/'g2'/'w'
-arrays), and ``.mask_dict`` (dict with key 'ns' -> boolean mask). We build a
-LIGHTWEIGHT FAKE ``gal_metacal`` (a ``SimpleNamespace``) so we pin the
-multiplicative/additive bias correction maths without coupling to the
-metacal-class internals (``test_basic.py`` owns the class itself).
+arrays), and ``.mask_dict`` (dict with key 'ns' -> boolean mask). For those
+we build a LIGHTWEIGHT FAKE ``gal_metacal`` (a ``SimpleNamespace``) so we pin
+the bias-correction maths in isolation; the ``metacal`` tests below exercise
+the real class directly.
 
 The literals below were produced by running the real estimator once in the
 container and copying the observed output; they are pinned with a tight rtol
@@ -24,8 +26,10 @@
 import numpy as np
 import numpy.testing as npt
 import pytest
+from astropy.table import Table
 
 from sp_validation import calibration
+from sp_validation.calibration import metacal, mask_gal_size, mask_gal_SNR
 
 
 # Fixed, compact inputs shared across the tests.
@@ -187,3 +191,211 @@ def test_get_calibrated_m_c_teeth_offset_tracks_in_c(gal_metacal):
     Rinv = np.linalg.inv(R)
     closed_form = Rinv.dot(g_uncorr) - Rinv.dot(c_off)[:, None]
     npt.assert_allclose(g_corr_mc, closed_form, rtol=1e-12)
+
+
+# Metacal sheared-image variants and their NGMIX column suffixes.
+_VARIANTS = ["1M", "1P", "2M", "2P", "NOSHEAR"]
+
+
+def _build_ngmix_catalog(slope_11=2.0, slope_22=3.0, step=0.01):
+    """Build a deterministic NGMIX catalogue with a known metacal response.
+
+    The metacal shear response is a finite difference of the measured
+    ellipticity between the +shear and -shear sheared images, divided by
+    twice the step ``h``:
+
+        R11 = (g1[1P] - g1[1M]) / (2 h)
+        R22 = (g2[2P] - g2[2M]) / (2 h)
+        R12 = (g1[2P] - g1[2M]) / (2 h)
+        R21 = (g2[1P] - g2[1M]) / (2 h)
+
+    We inject a pure diagonal response by making the measured g1 vary
+    linearly with the applied component-1 step (slope ``slope_11``) and g2
+    vary linearly with the applied component-2 step (slope ``slope_22``),
+    with NO cross-response. Concretely, relative to a NOSHEAR baseline of
+    (g1, g2) = (0.05, -0.02):
+
+        1P: g1 += slope_11 * step,   1M: g1 -= slope_11 * step
+        2P: g2 += slope_22 * step,   2M: g2 -= slope_22 * step
+
+    so the finite difference recovers R11 = slope_11, R22 = slope_22, and
+    R12 = R21 = 0 exactly. The selection response is zero because every
+    object passes every cut identically (the selection masks are the same
+    set for 1P/1M/2P/2M), so the total R equals the injected slope matrix.
+
+    All objects are built to pass the galaxy cuts: flag == 0,
+    rel_size = T / Tpsf = 1.0 (inside [0.5, 3.0]), and
+    SNR = flux / flux_err = 50 (inside [10, 500]).
+    """
+    n = 8
+    g1_base = 0.05
+    g2_base = -0.02
+
+    # Per-variant (g1, g2) shifts: only the matching component moves.
+    shifts = {
+        "NOSHEAR": (0.0, 0.0),
+        "1P": (slope_11 * step, 0.0),
+        "1M": (-slope_11 * step, 0.0),
+        "2P": (0.0, slope_22 * step),
+        "2M": (0.0, -slope_22 * step),
+    }
+
+    cols = {}
+    for var in _VARIANTS:
+        dg1, dg2 = shifts[var]
+        ell = np.zeros((n, 2))
+        ell[:, 0] = g1_base + dg1
+        ell[:, 1] = g2_base + dg2
+        cols[f"NGMIX_ELL_{var}"] = ell
+        cols[f"NGMIX_FLAGS_{var}"] = np.zeros(n, dtype=int)
+        cols[f"NGMIX_FLUX_{var}"] = np.full(n, 50.0)
+        cols[f"NGMIX_FLUX_ERR_{var}"] = np.full(n, 1.0)
+        # T / Tpsf = 1.0 -> rel_size inside [0.5, 3.0].
+        cols[f"NGMIX_T_{var}"] = np.full(n, 1.0)
+        cols[f"NGMIX_T_ERR_{var}"] = np.full(n, 0.1)
+        cols[f"NGMIX_Tpsf_{var}"] = np.full(n, 1.0)
+
+    # Per-component ellipticity errors -> inverse-variance weights.
+    cols["NGMIX_ELL_ERR_NOSHEAR"] = np.full((n, 2), 0.25)
+
+    return Table(cols), n
+
+
+def test_metacal_R_matrix_recovers_injected_response():
+    """Pin the metacal total response matrix R for an injected slope.
+
+    WHAT IS PINNED: with a hand-built NGMIX catalogue whose measured
+    ellipticity responds linearly to the applied metacal shear step
+    (slope 2.0 on component 1, slope 3.0 on component 2, no cross term),
+    the estimator's total response ``mcal.R`` is a 2x2 matrix equal to the
+    injected slopes:  R = [[2.0, 0.0], [0.0, 3.0]]. The off-diagonal terms
+    and the selection response are exactly zero because every object passes
+    every cut identically.
+
+    WHY TEETH: the response is the finite difference R11 = slope_11 by
+    construction. If a refactor changed the step normalisation (the 2 h
+    divisor), the +/-/component bookkeeping, the selection-response
+    subtraction, or the column names read by ``_read_data_ngmix``, R would
+    no longer equal the injected matrix. The companion assertion below
+    re-runs with slope 5.0 and confirms R11 tracks it (5.0 != 2.0), so a
+    change that decouples R from the input numbers fails.
+
+    NOTE: the estimator prints an 'FHP/MK hack' line and an unweighted /
+    weighted response line; these are expected stdout, not errors.
+    """
+    data, n = _build_ngmix_catalog(slope_11=2.0, slope_22=3.0, step=0.01)
+    mask = np.ones(n, dtype=bool)
+
+    mcal = metacal(
+        data,
+        mask,
+        masking_type="gal",
+        step=0.01,
+        prefix="NGMIX",
+        size_corr_ell=False,   # avoid the in-place T mutation; clean linear cut
+        global_R_weight=None,  # unweighted mean over objects
+    )
+
+    assert mcal.R.shape == (2, 2)
+    npt.assert_allclose(
+        mcal.R,
+        np.array([[2.0, 0.0], [0.0, 3.0]]),
+        rtol=1e-10,
+        atol=1e-12,
+    )
+
+    # TEETH: a different injected slope yields a different R11.
+    data2, n2 = _build_ngmix_catalog(slope_11=5.0, slope_22=3.0, step=0.01)
+    mcal2 = metacal(
+        data2,
+        np.ones(n2, dtype=bool),
+        masking_type="gal",
+        step=0.01,
+        prefix="NGMIX",
+        size_corr_ell=False,
+        global_R_weight=None,
+    )
+    npt.assert_allclose(mcal2.R[0, 0], 5.0, rtol=1e-10)
+    assert not np.isclose(mcal2.R[0, 0], mcal.R[0, 0])
+
+
+def test_metacal_R_matrix_step_normalization():
+    """Pin that R is independent of the step h for a fixed injected slope.
+
+    WHAT IS PINNED: the response is divided by 2 h, and the injected
+    ellipticity shift is slope * h, so R = slope regardless of h. Building
+    the catalogue with step = 0.02 (and reading it back with step = 0.02)
+    still gives R11 = 2.0, R22 = 3.0.
+
+    WHY TEETH: this isolates the ``h2 = 2 * self._step`` normalisation in
+    ``_shear_response``. If a refactor dropped the factor of 2 or used the
+    wrong step, R would scale and this exact-match assertion would fail.
+    """
+    data, n = _build_ngmix_catalog(slope_11=2.0, slope_22=3.0, step=0.02)
+    mcal = metacal(
+        data,
+        np.ones(n, dtype=bool),
+        masking_type="gal",
+        step=0.02,
+        prefix="NGMIX",
+        size_corr_ell=False,
+        global_R_weight=None,
+    )
+    npt.assert_allclose(
+        mcal.R,
+        np.array([[2.0, 0.0], [0.0, 3.0]]),
+        rtol=1e-10,
+        atol=1e-12,
+    )
+
+
+def test_mask_gal_size_boolean_mask():
+    """Pin the exact boolean size mask spanning below/within/above the cut.
+
+    WHAT IS PINNED: ``mask_gal_size`` keeps objects with
+    rel_size_min < T / Tpsf < rel_size_max (strict on both ends). With
+    Tpsf = 1.0 everywhere and T spanning the bounds [0.5, 3.0], the
+    expected mask is computed element by element below.
+
+    WHY TEETH: T / Tpsf values 0.5 and 3.0 sit exactly on the (strict)
+    bounds and are EXCLUDED; 0.49 and 3.01 are also excluded; the interior
+    values pass. The companion assertion tightens rel_size_min to 1.0 and
+    asserts the 0.75 element flips from True to False.
+    """
+    T = np.array([0.3, 0.49, 0.5, 0.75, 1.0, 2.999, 3.0, 3.01, 5.0])
+    Tpsf = np.ones_like(T)
+
+    mask = mask_gal_size(T, Tpsf, rel_size_min=0.5, rel_size_max=3.0)
+
+    expected = np.array(
+        [False, False, False, True, True, True, False, False, False]
+    )
+    npt.assert_array_equal(mask, expected)
+
+    # TEETH: tightening the lower bound to 1.0 drops the 0.75 element.
+    mask_tight = mask_gal_size(T, Tpsf, rel_size_min=1.0, rel_size_max=3.0)
+    assert mask[3] and not mask_tight[3]
+    assert not np.array_equal(mask, mask_tight)
+
+
+def test_mask_gal_SNR_boolean_mask():
+    """Pin the exact boolean SNR mask spanning below/within/above the cut.
+
+    WHAT IS PINNED: ``mask_gal_SNR`` keeps objects with
+    snr_min < SNR < snr_max (strict on both ends). With bounds [10, 500],
+    the boundary values 10 and 500 are EXCLUDED and the interior passes.
+
+    WHY TEETH: shifting the lower bound from 10 to 12 flips the SNR = 11
+    element from True to False; the companion assertion confirms it.
+    """
+    SNR = np.array([5.0, 10.0, 11.0, 100.0, 499.0, 500.0, 600.0])
+
+    mask = mask_gal_SNR(SNR, snr_min=10.0, snr_max=500.0)
+
+    expected = np.array([False, False, True, True, True, False, False])
+    npt.assert_array_equal(mask, expected)
+
+    # TEETH: raising snr_min to 12 drops the SNR = 11 element.
+    mask_shifted = mask_gal_SNR(SNR, snr_min=12.0, snr_max=500.0)
+    assert mask[2] and not mask_shifted[2]
+    assert not np.array_equal(mask, mask_shifted)
diff --git a/src/sp_validation/tests/test_statistics.py b/src/sp_validation/tests/test_statistics.py
new file mode 100644
index 00000000..507413c2
--- /dev/null
+++ b/src/sp_validation/tests/test_statistics.py
@@ -0,0 +1,59 @@
+"""VALUE-DRIFT CHARACTERIZATION TESTS FOR statistics.py.
+
+This module pins the numeric behaviour of the cosmology-independent
+statistical helpers in :mod:`sp_validation.statistics`. The inputs are
+fully deterministic (seeded RNG, no cluster data) and the outputs are
+committed as literals with a tight ``rtol``. A refactor that changes the
+numbers must turn this file red.
+
+:Author: cdaley
+
+"""
+
+import numpy as np
+import numpy.testing as npt
+
+from sp_validation.statistics import jackknif_weighted_average2
+
+def test_jackknif_weighted_average2_mean_and_error():
+    """Pin the jackknife weighted average + error for a seeded RNG draw.
+
+    WHAT IS PINNED: ``jackknif_weighted_average2`` draws ``n_realization``
+    bootstrap-style subsamples (size = (1 - remove_size) * N, sampled WITH
+    replacement via ``np.random.choice``) and returns
+    (mean over realizations, std over realizations) of the per-subsample
+    weighted average. With ``np.random.seed`` fixed before the call, the
+    draws are deterministic, so both numbers are pinned as literals
+    obtained from an actual run.
+
+    WHY TEETH: the function depends on the data values, the weights, and
+    the sampling. The companion assertion changes a single weight (under
+    the same seed and therefore the same index draws) and asserts the mean
+    moves, proving the weighting is load-bearing and not ignored.
+    """
+    data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
+    weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0])
+
+    np.random.seed(1234)
+    mean, err = jackknif_weighted_average2(
+        data, weights, remove_size=0.1, n_realization=50
+    )
+
+    npt.assert_allclose(mean, _PINNED_JK_MEAN, rtol=1e-10)
+    npt.assert_allclose(err, _PINNED_JK_ERR, rtol=1e-10)
+
+    # TEETH: same seed (same index draws) but a perturbed weight -> the
+    # weighted average changes, so the returned mean must differ.
+    weights_perturbed = weights.copy()
+    weights_perturbed[0] = 100.0
+    np.random.seed(1234)
+    mean_p, _ = jackknif_weighted_average2(
+        data, weights_perturbed, remove_size=0.1, n_realization=50
+    )
+    assert not np.isclose(mean, mean_p)
+
+
+# Literals pinned from an observed run inside the container; see the test
+# docstring for the seed and parameters that reproduce them.
+_PINNED_JK_MEAN = 6.248279984721161
+_PINNED_JK_ERR = 0.971349020459367

From 1ff913c86737bc813f6c29271828d7be12fb8c70 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 03:47:54 +0200
Subject: [PATCH 02/16] =?UTF-8?q?felt:=20reserved-sasha=20=E2=80=94=20docu?=
 =?UTF-8?q?ment=20Sasha's=20hands-off=20zones?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

scratch/guerrini/ and the namaster_utils→source / Gaussian-sims work he
reserved for his next PR in the #197 review. So future workers don't touch it.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .felt/sp_validation/reserved-sasha/reserved-sasha.md | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .felt/sp_validation/reserved-sasha/reserved-sasha.md

diff --git a/.felt/sp_validation/reserved-sasha/reserved-sasha.md b/.felt/sp_validation/reserved-sasha/reserved-sasha.md
new file mode 100644
index 00000000..014de4b0
--- /dev/null
+++ b/.felt/sp_validation/reserved-sasha/reserved-sasha.md
@@ -0,0 +1,11 @@
+---
+id: 01KVHAG0FSYA0Y6F5JDGTT6VNR
+name: 'Reserved: Sasha''s sp_validation work (don''t touch)'
+tags:
+    - sp-validation
+    - reserved
+    - guerrini
+created-at: 2026-06-20T03:32:01.913796095+02:00
+updated-at: 2026-06-20T03:32:01.913796095+02:00
+outcome: 'Hands-off zones owned by Sasha (Sacha Guerrini) on sp_validation: scratch/guerrini/ entirely, and the namaster_utils → source migration + Gaussian-sims de-duplication, which he reserved for HIS NEXT PR in the #197 review. Future workers/agents: do not refactor these; only mechanical import-path updates forced by a library change are OK (and should be flagged).'
+---

From 322a589033f89162ee0c13721a27bbe5fbea5d2b Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 07:50:18 +0200
Subject: [PATCH 03/16] test(statistics): pin the extracted helpers; drop dead
 code in calibration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up polish on the basic.py dissolution (#199).

Tests — characterization (value-drift) coverage for the three statistics.py
helpers that had none, with literals generated by running the real functions
in-container and teeth on each:
- corr_from_cov: unit diagonal + reconstruction from cov/outer(std,std)
- chi2_and_pte: diagonal reduces to sum((d/sigma)^2) with matching scipy PTE,
  plus a non-diagonal case exercising the full d^T C^-1 d path
- cov_from_one_covariance: gaussian(col 10) vs non-gaussian(col 9) selection
  and a row-major-layout check (a transpose would be caught)

Calibration — strictly behavior-preserving dead-code removal:
- 3 unused module imports (util, io, get_footprint — verified unreferenced)
- an unused local (col_noshear) in metacal._read_data
- the uncallable metacal._return method (defined without self, references
  self.* in its body — would NameError if ever invoked; referenced nowhere)

Value pins (metacal R-matrix, m/c bias) stay green; conservatively skipped any
change that would reorder float ops or restructure an estimator. Verified by an
adversarial behavior-preservation review.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/calibration.py           |  23 ----
 src/sp_validation/tests/test_statistics.py | 138 ++++++++++++++++++++-
 2 files changed, 137 insertions(+), 24 deletions(-)

diff --git a/src/sp_validation/calibration.py b/src/sp_validation/calibration.py
index 75e9390f..380b7286 100644
--- a/src/sp_validation/calibration.py
+++ b/src/sp_validation/calibration.py
@@ -15,10 +15,7 @@
 import statsmodels.api as sm
 import tqdm
 
-from sp_validation import util
-from sp_validation import io
 from sp_validation.statistics import jackknif_weighted_average2
-from sp_validation.survey import get_footprint
 from sp_validation import cat as sp_cat
 
 from shear_psf_leakage import leakage
@@ -786,7 +783,6 @@ def _read_data(self, data, mask):
 
         print("FHP/MK hack using p1 PSF for ns in cuts")
         indices = np.where(mask)[0]
-        col_noshear = f"{self._prefix}_Tpsf_NOSHEAR"
         col_1p = f"{self._prefix}_Tpsf_1P"
         new_psf = data[col_1p][indices]
 
@@ -1194,25 +1190,6 @@ def _total_response(self):
 
         self.R = self.R_shear_global + self.R_selection
 
-    def _return():
-        """Add docstring.
-
-        ...
-
-        """
-        return (
-            self.m1,
-            self.p1,
-            self.p1,
-            self.p2,
-            self.ns,
-            self.R,
-            self.R_selection_std,
-            self.R_shear_std
-        )
-
-
-
 
 def mask_gal_size(T, Tpsf, rel_size_min, rel_size_max, size_corr_ell=False, g1=None, g2=None):
 
diff --git a/src/sp_validation/tests/test_statistics.py b/src/sp_validation/tests/test_statistics.py
index 507413c2..c681d6ab 100644
--- a/src/sp_validation/tests/test_statistics.py
+++ b/src/sp_validation/tests/test_statistics.py
@@ -12,8 +12,14 @@
 
 import numpy as np
 import numpy.testing as npt
+from scipy import stats
 
-from sp_validation.statistics import jackknif_weighted_average2
+from sp_validation.statistics import (
+    chi2_and_pte,
+    corr_from_cov,
+    cov_from_one_covariance,
+    jackknif_weighted_average2,
+)
 
 def test_jackknif_weighted_average2_mean_and_error():
     """Pin the jackknife weighted average + error for a seeded RNG draw.
@@ -57,3 +63,133 @@ def test_jackknif_weighted_average2_mean_and_error():
 # docstring for the seed and parameters that reproduce them.
 _PINNED_JK_MEAN = 6.248279984721161
 _PINNED_JK_ERR = 0.971349020459367
+
+
+def test_corr_from_cov_pins_correlation_matrix():
+    """Pin the correlation matrix derived from a fixed 3x3 covariance.
+
+    WHAT IS PINNED: ``corr_from_cov`` normalises each entry by the geometric
+    mean of the two diagonal standard deviations,
+    ``corr[i, j] = cov[i, j] / (sigma_i sigma_j)``. With the hand-picked
+    covariance below the off-diagonals are exact rationals
+    (1/sqrt(4*9) = 1/6, -2/sqrt(4*16) = -1/4, 3/sqrt(9*16) = 1/4), pinned as
+    literals from an observed container run.
+
+    WHY TEETH: the defining property of a correlation matrix is a UNIT
+    DIAGONAL, and every off-diagonal must lie in [-1, 1]; both are asserted
+    independently of the literals. A companion check reconstructs the matrix
+    from the closed form ``cov / outer(std, std)`` so a refactor that, e.g.,
+    divided by the variances instead of the standard deviations would break
+    the unit-diagonal property and the closed form simultaneously.
+    """
+    cov = np.array([[4.0, 1.0, -2.0],
+                    [1.0, 9.0, 3.0],
+                    [-2.0, 3.0, 16.0]])
+    corr = corr_from_cov(cov)
+
+    npt.assert_allclose(
+        corr,
+        [[1.0, 1.0 / 6.0, -0.25],
+         [1.0 / 6.0, 1.0, 0.25],
+         [-0.25, 0.25, 1.0]],
+        rtol=1e-12,
+    )
+
+    # TEETH: a correlation matrix has unit diagonal and |off-diagonal| <= 1.
+    npt.assert_allclose(np.diag(corr), 1.0, rtol=1e-12)
+    assert np.all(np.abs(corr) <= 1.0)
+
+    # And independently: the documented closed form cov / outer(std, std).
+    std = np.sqrt(np.diag(cov))
+    npt.assert_allclose(corr, cov / np.outer(std, std), rtol=1e-12)
+
+
+def test_chi2_and_pte_diagonal_reduces_to_sum_of_squares():
+    """Pin chi2/reduced-chi2/PTE and prove the diagonal-cov reduction.
+
+    WHAT IS PINNED: for a diagonal covariance ``diag(sigma^2)`` the quadratic
+    form collapses to ``chi2 = sum((d / sigma)^2)`` with ``dof = len(d)``,
+    ``reduced_chi2 = chi2 / dof`` and ``pte = 1 - chi2.cdf(chi2, dof)``. All
+    three are pinned as literals from an observed container run.
+
+    WHY TEETH: the diagonal case has a closed form, so chi2 is asserted equal
+    to ``sum((d / sigma)^2)`` and the PTE to the matching ``scipy`` survival
+    value -- a refactor that dropped the inverse, used the covariance instead
+    of its inverse, or mis-set the dof would break the reduction. The
+    companion check feeds the same data through a NON-diagonal covariance and
+    asserts the chi2 changes, proving the full quadratic form is exercised.
+    """
+    d = np.array([1.0, -2.0, 0.5, 3.0])
+    sigma = np.array([2.0, 1.0, 0.5, 4.0])
+    cov_diag = np.diag(sigma ** 2)
+
+    chi2, reduced_chi2, pte = chi2_and_pte(d, cov_diag)
+
+    npt.assert_allclose(chi2, 5.8125, rtol=1e-12)
+    npt.assert_allclose(reduced_chi2, 1.453125, rtol=1e-12)
+    npt.assert_allclose(pte, 0.21359530221841705, rtol=1e-12)
+
+    # TEETH: the diagonal quadratic form is exactly sum((d / sigma)^2), and
+    # the PTE is the matching scipy chi2 survival probability on dof = len(d).
+    chi2_closed = np.sum((d / sigma) ** 2)
+    npt.assert_allclose(chi2, chi2_closed, rtol=1e-12)
+    npt.assert_allclose(reduced_chi2, chi2_closed / len(d), rtol=1e-12)
+    npt.assert_allclose(pte, 1 - stats.chi2.cdf(chi2_closed, len(d)), rtol=1e-12)
+
+    # TEETH: off-diagonal covariance terms change the quadratic form, so the
+    # full d^T C^-1 d path (not just the diagonal shortcut) is load-bearing.
+    cov_full = np.array([[4.0, 1.0, 0.0, 0.0],
+                         [1.0, 1.0, 0.2, 0.0],
+                         [0.0, 0.2, 0.25, 0.1],
+                         [0.0, 0.0, 0.1, 16.0]])
+    chi2_full, _, pte_full = chi2_and_pte(d, cov_full)
+    npt.assert_allclose(chi2_full, 13.526036131774706, rtol=1e-12)
+    npt.assert_allclose(pte_full, 0.00897199803545734, rtol=1e-12)
+    assert not np.isclose(chi2_full, chi2)
+
+
+def test_cov_from_one_covariance_selects_gaussian_column():
+    """Pin the reshaped matrix and prove the gaussian column selection.
+
+    WHAT IS PINNED: ``cov_from_one_covariance`` reads a flat OneCovariance
+    table with one row per ``(i, j)`` pair (row-major, ``k = i*n_bins + j``)
+    and lays the chosen column into a square matrix. Column 10 carries the
+    Gaussian-only term (``gaussian=True``); column 9 the Gaussian+non-Gaussian
+    term (``gaussian=False``). With a deterministic table whose entries encode
+    their row and column, both reshaped matrices are pinned as literals.
+
+    WHY TEETH: the only difference between the two calls is the column index
+    (10 vs 9), so the two pinned matrices differ by exactly 1 in every entry,
+    proving the gaussian flag selects the right column. A companion check
+    places a unique tag ``10*i + j`` in column 10 and asserts the reshape is
+    row-major (``cov[i, j]`` lands at row ``i*n_bins + j``), so a transposed
+    or column-major refactor would change the recovered matrix.
+    """
+    # Two-bin table -> 4 rows; entry (row k, col c) = 10*k + c, so the
+    # chosen-column values are distinct and self-documenting. Row k carries
+    # the (i, j) pair with k = i*n_bins + j, so rows 0..3 -> (0,0),(0,1),
+    # (1,0),(1,1). Column 10 holds values 10, 20, 30, 40; column 9 holds
+    # 9, 19, 29, 39.
+    one_cov = np.array([np.arange(11.0) + 10.0 * k for k in range(4)])
+
+    cov_gauss = cov_from_one_covariance(one_cov, gaussian=True)
+    cov_nongauss = cov_from_one_covariance(one_cov, gaussian=False)
+
+    npt.assert_allclose(cov_gauss, [[10.0, 20.0], [30.0, 40.0]], rtol=1e-12)
+    npt.assert_allclose(cov_nongauss, [[9.0, 19.0], [29.0, 39.0]], rtol=1e-12)
+
+    # TEETH: the gaussian flag shifts the column by one, so every entry of the
+    # gaussian matrix exceeds its non-gaussian counterpart by exactly 1.
+    npt.assert_allclose(cov_gauss - cov_nongauss, 1.0, rtol=1e-12)
+
+    # TEETH: the (i, j) -> row k = i*n_bins + j layout is row-major. Tag
+    # column 10 with 10*i + j and check it lands at cov[i, j], not cov[j, i].
+    tagged = np.zeros((4, 11))
+    for i in range(2):
+        for j in range(2):
+            tagged[i * 2 + j, 10] = 10.0 * i + j
+    npt.assert_allclose(
+        cov_from_one_covariance(tagged, gaussian=True),
+        [[0.0, 1.0], [10.0, 11.0]],
+        rtol=1e-12,
+    )

From e8ad1d01cee8bb9813be3b3fd4ccaf83d154872c Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 14:50:03 +0200
Subject: [PATCH 04/16] refactor(src): delete dead info.py, fix cat.py version
 import

info.py had zero importers; its only content was a redundant
__name__ = 'sp_validation'. cat.py imported __version__ and __name__
from the package root but used only __version__ (line 607); the
software name at line 606 is already hardcoded. Repoint to the
canonical home: from sp_validation.version import __version__.

Register the retired import path in the dangling-move guard.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/cat.py                                 | 2 +-
 src/sp_validation/info.py                                | 9 ---------
 src/sp_validation/tests/test_dangling_move_references.py | 1 +
 3 files changed, 2 insertions(+), 10 deletions(-)
 delete mode 100644 src/sp_validation/info.py

diff --git a/src/sp_validation/cat.py b/src/sp_validation/cat.py
index 30d90289..4b82ff97 100644
--- a/src/sp_validation/cat.py
+++ b/src/sp_validation/cat.py
@@ -27,7 +27,7 @@
 from sp_validation import util
 from sp_validation import io
 from sp_validation.survey import get_footprint
-from sp_validation import __version__, __name__
+from sp_validation.version import __version__
 
 
 def print_mean_ellipticity(
diff --git a/src/sp_validation/info.py b/src/sp_validation/info.py
deleted file mode 100644
index 1129428c..00000000
--- a/src/sp_validation/info.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""SP_VALIDATION INFO.
-
-This module provides some basic information about the sp_validation package.
-
-:Author: Martin Kilbinger <martin.kilbingers@cea.fr>
-
-"""
-
-__name__ = 'sp_validation'
diff --git a/src/sp_validation/tests/test_dangling_move_references.py b/src/sp_validation/tests/test_dangling_move_references.py
index a42b69ba..745391ec 100644
--- a/src/sp_validation/tests/test_dangling_move_references.py
+++ b/src/sp_validation/tests/test_dangling_move_references.py
@@ -31,6 +31,7 @@
         "cosmo_inference/notebooks/2D_cosmic_shear_consistency",
         "papers/consistency",
     ),
+    ("sp_validation.info", "sp_validation.version (__version__); __name__ dropped"),
 )
 EXCLUDED_DIRS = {
     ".git",

From 7a5e4883b91fcacfc2ece9ef42c12a2fb493a1bb Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 14:50:38 +0200
Subject: [PATCH 05/16] refactor(src): make package __all__ honest

The old __all__ listed modules nobody imports through the package
(io, plot_style, cosmo_val) and omitted the two genuinely public
diagnostic modules rho_tau and b_modes. Replace it with the real
public surface, alphabetised, and drop the stale commented-out
explicit-import block. Nothing does `from sp_validation import *`,
so this is purely a documentation fix.

(util and run_joint_cat are renamed in the Tier-2 commits.)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/__init__.py | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/src/sp_validation/__init__.py b/src/sp_validation/__init__.py
index f1119d7c..9683397d 100644
--- a/src/sp_validation/__init__.py
+++ b/src/sp_validation/__init__.py
@@ -1,29 +1,16 @@
 from .version import __version__
 
 __all__ = [
-    'util',
-    'io',
-    'cosmo_val',
-    'galaxy',
+    'b_modes',
+    'cat',
+    'calibration',
     'cosmology',
+    'galaxy',
     'glass_mock',
-    'calibration',
-    'statistics',
-    'cat',
-    'plot_style',
     'plots',
+    'rho_tau',
     'run_joint_cat',
+    'statistics',
     'survey',
+    'util',
 ]
-
-# Explicit imports to avoid circular issues
-#from . import util
-#from . import io
-#from . import galaxy
-#from . import cosmology
-#from . import calibration
-#from . import cat
-#from . import plot_style
-#from . import plots
-#from . import run_joint_cat
-#from . import survey

From 9a41f4a1772e56c0ab8fdc8205b972cea9e2cf79 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:00:08 +0200
Subject: [PATCH 06/16] refactor(cosmo_val): hoist b_modes imports to module
 top level

Five methods each imported from .b_modes inside their bodies. b_modes
is import-time side-effect-light (it pulls only .cosmology) and has no
back-edge to cosmo_val, so the locals were defensive, not necessary.
Consolidate the union of imported names into one top-level block next
to the existing cosmology/rho_tau imports and drop the five inline
imports. test_cosmo_val: 11 passed.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/cosmo_val.py | 37 +++++++++++++++-------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/src/sp_validation/cosmo_val.py b/src/sp_validation/cosmo_val.py
index f707f600..00b13c3a 100644
--- a/src/sp_validation/cosmo_val.py
+++ b/src/sp_validation/cosmo_val.py
@@ -19,6 +19,22 @@
 from shear_psf_leakage.rho_tau_stat import PSFErrorFit
 from uncertainties import ufloat
 
+from .b_modes import (
+    _get_pte_from_scale_cut,
+    calculate_cosebis,
+    calculate_eb_statistics,
+    calculate_pure_eb_correlation,
+    find_conservative_scale_cut_key,
+    plot_cosebis_covariance_matrix,
+    plot_cosebis_modes,
+    plot_cosebis_scale_cut_heatmap,
+    plot_eb_covariance_matrix,
+    plot_integration_vs_reporting,
+    plot_pte_2d_heatmaps,
+    plot_pure_eb_correlations,
+    save_cosebis_results,
+    save_pure_eb_results,
+)
 from .cosmology import get_cosmo, get_theo_c_ell
 from .rho_tau import (
     get_params_rho_tau,
@@ -2096,8 +2112,6 @@ def calculate_pure_eb(
         - A shared patch file is used for the reporting and integration binning,
           and is created if it does not exist.
         """
-        from .b_modes import calculate_pure_eb_correlation
-
         self.print_start(f"Computing {version} pure E/B")
 
         # Set up parameters with defaults
@@ -2219,15 +2233,6 @@ def plot_pure_eb(
         - Delegates individual plot generation to specialized functions in
           b_modes module
         """
-        from .b_modes import (
-            calculate_eb_statistics,
-            plot_eb_covariance_matrix,
-            plot_integration_vs_reporting,
-            plot_pte_2d_heatmaps,
-            plot_pure_eb_correlations,
-            save_pure_eb_results,
-        )
-
         # Use instance defaults for unspecified parameters
         versions = versions or self.versions
         output_dir = output_dir or self.cc['paths']['output']
@@ -2406,8 +2411,6 @@ def calculate_cosebis(
             When multiple scale cuts: Dictionary with scale cut tuples as
             keys and results dictionaries as values.
         """
-        from .b_modes import calculate_cosebis
-
         self.print_start(f"Computing {version} COSEBIs")
 
         # Set up parameters with defaults
@@ -2513,13 +2516,6 @@ def plot_cosebis(
             Precalculated results to avoid recomputation. If None (default),
             results will be calculated using calculate_cosebis.
         """
-        from .b_modes import (
-            find_conservative_scale_cut_key,
-            plot_cosebis_covariance_matrix,
-            plot_cosebis_modes,
-            plot_cosebis_scale_cut_heatmap,
-            save_cosebis_results,
-        )
 
         # Use instance defaults if not specified
         version = version or self.versions[0]
@@ -3461,7 +3457,6 @@ def summarize_bmodes(self, fiducial_scale_cut=(12, 83), versions=None):
             ``{version: {statistic: pte_value, ...}, ...}``
         """
         from scipy import stats as sp_stats
-        from .b_modes import _get_pte_from_scale_cut, find_conservative_scale_cut_key
 
         versions = versions or self.versions
         summary = {}

From 8da5d8b180e2f8d237753aa15d4163a56ded1730 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:00:59 +0200
Subject: [PATCH 07/16] refactor(run_joint_cat): drop shadowed confusion_matrix
 def
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two module-level defs named confusion_matrix existed; the first
(mask, confidence_level=0.9) was a near-duplicate of correlation_matrix
and was unconditionally shadowed by the second
(prediction, observation) ~40 lines later. Every caller — in
scripts/calibration and scripts/examples — uses the
(prediction, observation) signature. Remove the dead first def.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/run_joint_cat.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/src/sp_validation/run_joint_cat.py b/src/sp_validation/run_joint_cat.py
index 51a4d84e..35b19fea 100644
--- a/src/sp_validation/run_joint_cat.py
+++ b/src/sp_validation/run_joint_cat.py
@@ -1183,25 +1183,6 @@ def run(self):
 
         """
 
-def confusion_matrix(mask, confidence_level=0.9):
-
-    n_key = len(mask)
-
-    cm = np.empty((n_key, n_key))
-    r_val = np.zeros_like(cm)
-    r_cl = np.empty((n_key, n_key, 2))
-
-    for idx, key1 in enumerate(mask):
-        for jdx, key2 in enumerate(mask):
-            res = stats.pearsonr(mask[key1], mask[key2])
-            r_val[idx][jdx] = res.statistic
-            r_cl[idx][jdx] = res.confidence_interval(
-                confidence_level=confidence_level
-            )
-
-    return r_val, r_cl
-
-
 def correlation_matrix(masks, confidence_level=0.9):
 
     n_key = len(masks)

From 7a531f103b78e8ea1cf8c985a566186657451e0e Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:05:48 +0200
Subject: [PATCH 08/16] refactor: remove dead cluster/convergence-map helpers

Five functions had zero callers anywhere in the repo (src, scripts,
papers, scratch, notebooks), including across the star-imports of
plots:
  cosmology.py: get_clusters, stack_mm3, gamma_T_tc, xi_gal_gal_tc
  plots.py:     plot_map_stacked
Removing the cosmology block also orphaned the imports that existed
solely for it (treecorr, fits, canfar, radec2xy, cKDTree, tqdm,
get_footprint); drop those too. test_cosmology + test_plots: 29 passed.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/cosmology.py | 252 ---------------------------------
 src/sp_validation/plots.py     |  64 ---------
 2 files changed, 316 deletions(-)

diff --git a/src/sp_validation/cosmology.py b/src/sp_validation/cosmology.py
index 701b5c3e..b7614280 100644
--- a/src/sp_validation/cosmology.py
+++ b/src/sp_validation/cosmology.py
@@ -14,17 +14,8 @@
 import pyccl as ccl
 import camb
 
-# For correlation function calculations
-import treecorr
 from astropy import cosmology
 from astropy.cosmology import Planck18
-from astropy.io import fits
-from cs_util import canfar
-from lenspack.geometry.projections.gnom import radec2xy
-from scipy.spatial import cKDTree
-from tqdm import tqdm
-
-from sp_validation.survey import get_footprint
 
 
 # =============================================================================
@@ -520,246 +511,3 @@ def get_theo_xi(
 
     # Convert to xi
     return c_ell_to_xi(cosmo, theta, ell, cl)
-
-
-# Convergence maps
-def stack_mm3(
-    ra,
-    dec,
-    e1,
-    e2,
-    w,
-    cluster_ra,
-    cluster_dec,
-    cluster_z,
-    radius=100,
-    n_match=100000,
-    tree=None,
-):
-    """Add docstring.
-
-    ...
-
-    """
-    # Project data
-    mean_dec = np.mean(dec)
-    mean_ra = np.mean(ra)
-    xx, yy = radec2xy(mean_ra, mean_dec, ra, dec)
-    xx_clust, yy_clust = radec2xy(mean_ra, mean_dec, cluster_ra, cluster_dec)
-
-    # Hardcoded cosmology for angular diameter distance calculation
-    cosmo = cosmology.FlatLambdaCDM(H0=70.0, Om0=0.3)
-
-    if tree is None:
-        tree = cKDTree(np.array([xx, yy]).T)
-
-    k = 0
-    for ra_c, dec_c, z_c in tqdm(
-        zip(xx_clust, yy_clust, cluster_z),
-        total=len(xx_clust),
-    ):
-
-        d_ang = cosmo.angular_diameter_distance(z_c).value  # Rad
-
-        R_max_ang = radius / d_ang  # Rad         / deg_to_rad  # Deg
-
-        res_match = tree.query(np.array([ra_c, dec_c]).T, k=n_match)
-
-        ind_gal = res_match[1][np.where(res_match[0] < R_max_ang)]
-
-        ra_centered = (xx[ind_gal] - ra_c) / R_max_ang
-        dec_centered = (yy[ind_gal] - dec_c) / R_max_ang
-
-        if k == 0:
-            all_ra = ra_centered
-            all_dec = dec_centered
-            all_e1 = e1[ind_gal]
-            all_e2 = e2[ind_gal]
-            all_w = w[ind_gal]
-        else:
-            all_ra = np.concatenate((all_ra, ra_centered))
-            all_dec = np.concatenate((all_dec, dec_centered))
-            all_e1 = np.concatenate((all_e1, e1[ind_gal]))
-            all_e2 = np.concatenate((all_e2, e2[ind_gal]))
-            all_w = np.concatenate((all_w, w[ind_gal]))
-
-        k += 1
-
-    return all_ra, all_dec, all_e1, all_e2, all_w
-
-
-def gamma_T_tc(ra_pos, dec_pos, ra_cat, dec_cat, e1_cat, e2_cat, w_cat=None):
-    """Gamma T tc.
-
-    Compute cross-correlation between positions (forground) and lensing
-    (background) catalogue. Also called galaxy-galaxy lensing or population
-    lensing.
-
-    Parameters
-    ----------
-    ra_pos : array of float
-        RA coordinates of foreground catalogue
-    dec_pos : array of float
-        DEC coordinates of foreground catalogue
-    ra_cat : array of float
-        RA coordinates of background catalogue
-    dec_cat : array of float
-        DEC coordinates of background catalogue
-    e1_cat : array of float
-        ellipticity component 1 of background catalogue
-    e2_cat : array of float
-        ellipticity component 2 of background catalogue
-    w_cat : array of float, optional, default=None
-        weight of background catalogue
-
-    Returns
-    -------
-    meanr : array of float
-        spatial bin centres
-    meanlogr : array of float
-        log of spatial bin centres
-    xi : array of float
-        tangential shear (E-mode)
-    xi_im : array of float
-        cross-component shear (B- or parity mode)
-    rms : array of float
-        R.M.S of both xi and xi_im
-    """
-    cat_pos = treecorr.Catalog(
-        ra=ra_pos,
-        dec=dec_pos,
-        ra_units="degrees",
-        dec_units="degrees",
-    )
-    cat_gal = treecorr.Catalog(
-        ra=ra_cat,
-        dec=dec_cat,
-        g1=e1_cat,
-        g2=e2_cat,
-        w=w_cat,
-        ra_units="degrees",
-        dec_units="degrees",
-    )
-
-    config = {
-        "ra_units": "degrees",
-        "dec_units": "degrees",
-        "max_sep": 60,
-        "min_sep": 0.7,
-        "sep_units": "arcminutes",
-        "nbins": 30,
-    }
-
-    ng = treecorr.NGCorrelation(config)
-
-    ng.process(cat_pos, cat_gal)
-
-    return ng.meanr, ng.meanlogr, ng.xi, ng.xi_im, np.sqrt(ng.varxi)
-
-
-def xi_gal_gal_tc(
-    ra_gal,
-    dec_gal,
-    e1_gal,
-    e2_gal,
-    w_gal,
-    ra_star,
-    dec_star,
-    e1_star,
-    e2_star,
-    w_star=None,
-    theta_min_amin=2,
-    theta_max_amin=200,
-    n_theta=20,
-):
-    """Add docstring.
-
-    ...
-
-    """
-    cat_gal = treecorr.Catalog(
-        ra=ra_gal,
-        dec=dec_gal,
-        g1=e1_gal,
-        g2=e2_gal,
-        w=w_gal,
-        ra_units="degrees",
-        dec_units="degrees",
-    )
-    cat_star = treecorr.Catalog(
-        ra=ra_star,
-        dec=dec_star,
-        g1=e1_star,
-        g2=e2_star,
-        w=w_star,
-        ra_units="degrees",
-        dec_units="degrees",
-    )
-
-    config = {
-        "ra_units": "degrees",
-        "dec_units": "degrees",
-        "sep_units": "arcminutes",
-        "min_sep": theta_min_amin,
-        "max_sep": theta_max_amin,
-        "nbins": n_theta,
-    }
-
-    ng = treecorr.GGCorrelation(config)
-
-    ng.process(cat_gal, cat_star)
-
-    return ng
-
-
-def get_clusters(
-    cluster_cat_name,
-    vos_dir,
-    output_dir,
-    field_name,
-    verbose=False,
-):
-    """Get Clusters.
-
-    Return cluster information from file on VOspace
-
-    Parameters
-    ----------
-    cluster_cat_name : string
-        cluster catalogue file name
-    vos_dir : string
-        directory on VOspace
-    field_name : string
-        survey footprint name
-    verbose : bool, optional, default=False
-        verbose output if True
-
-    Returns
-    -------
-    tuple
-        cluster information (ra, dec, z, SZ-mass)
-    """
-    out_path = f"{output_dir}/{cluster_cat_name}"
-    canfar.download(f"{vos_dir}/{cluster_cat_name}", out_path, verbose=verbose)
-
-    cluster_cat = fits.getdata(out_path)
-    m_good_cluster = (cluster_cat["MSZ"] != 0) & cluster_cat["COSMO"]
-
-    m_cluster_foot = get_footprint(
-        field_name,
-        cluster_cat["RA"][m_good_cluster],
-        cluster_cat["DEC"][m_good_cluster],
-    )
-
-    # Apply both cuts at once
-    final_mask = m_good_cluster.copy()
-    final_mask[m_good_cluster] = m_cluster_foot
-
-    cluster_cut = {
-        "ra": cluster_cat["RA"][final_mask],
-        "dec": cluster_cat["DEC"][final_mask],
-        "z": cluster_cat["REDSHIFT"][final_mask],
-        "M": cluster_cat["MSZ"][final_mask] * 1e14,
-    }
-
-    return cluster_cut
diff --git a/src/sp_validation/plots.py b/src/sp_validation/plots.py
index f5c0cef0..fd350a7a 100644
--- a/src/sp_validation/plots.py
+++ b/src/sp_validation/plots.py
@@ -244,70 +244,6 @@ def plot_map(
     return vlim
 
 
-def plot_map_stacked(kappa, title, radius, output_path, vlim=None):
-    """Plot Map Stacked.
-
-    Plot stacked convergence map.
-
-    Parameters
-    ----------
-    kappa : image
-        map values
-    title : string
-        plot title
-    output_path : string
-        figure output file path
-
-    vlim : array(2) of float, optional, default=None
-        map limits; min and max of kappa if not given
-
-    Returns
-    -------
-    array(2) of float
-        map limits
-
-    """
-    plots.figure(figsize=(10, 10))
-
-    # plot image
-    plt.imshow(kappa)
-
-    # set colorbar
-    if not vlim:
-        vlim = plt.gci().get_clim()
-    else:
-        plt.gci().set_clim(vlim)
-    plt.colorbar()
-
-    npix = kappa.shape[0]
-
-    # mark center
-    plt.plot(npix / 2 - 1, npix / 2 - 1, "+")
-
-    # axes ticks
-    n_ticks = 4
-    loc = np.arange(0, npix + npix / n_ticks, step=npix / n_ticks)
-    lab = np.round(
-        np.arange(
-            -radius,
-            radius + radius * 2 / n_ticks,
-            step=radius * 2 / n_ticks,
-        ),
-        1,
-    )
-    plt.xticks(loc, labels=lab)
-    plt.yticks(loc, labels=lab)
-
-    plt.xlabel(r"separation $R$ [Mpc]")
-    plt.ylabel(r"separation $R$ [Mpc]")
-
-    plt.title(title)
-
-    plots.savefig(output_path)
-
-    return vlim
-
-
 def plot_binned_one(
     ax,
     quantity,

From 3bea49036585b8a7460f32357d0fd9c002a23dc0 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:10:49 +0200
Subject: [PATCH 09/16] refactor(src): rename util -> format; drop dead
 transform_nan import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

util.py held only millify / print_millified — number formatting, not a
grab-bag. Rename it to format.py and sweep all importers:
  internal: cat.py, run_joint_cat.py (util.millify -> format.millify)
  scripts:  apply_alpha.py, examples/demo_calibrate_minimal_cat.py,
            calibration/extract_info.py (star-import x2)
  papers:   catalog/hist_mag.py
Register the rename in the dangling-move guard; update package __all__.

B1: scripts/plot_leakage.py imported transform_nan from the old util
module — a symbol removed from the library long ago and never used in
the script. Drop the dead import.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 papers/catalog/hist_mag.py                               | 2 +-
 scripts/apply_alpha.py                                   | 2 +-
 scripts/calibration/extract_info.py                      | 4 ++--
 scripts/examples/demo_calibrate_minimal_cat.py           | 2 +-
 scripts/plot_leakage.py                                  | 1 -
 src/sp_validation/__init__.py                            | 2 +-
 src/sp_validation/cat.py                                 | 6 +++---
 src/sp_validation/{util.py => format.py}                 | 4 ++--
 src/sp_validation/run_joint_cat.py                       | 8 ++++----
 src/sp_validation/tests/test_dangling_move_references.py | 1 +
 10 files changed, 16 insertions(+), 16 deletions(-)
 rename src/sp_validation/{util.py => format.py} (91%)

diff --git a/papers/catalog/hist_mag.py b/papers/catalog/hist_mag.py
index edc45fa1..0098cc39 100644
--- a/papers/catalog/hist_mag.py
+++ b/papers/catalog/hist_mag.py
@@ -25,7 +25,7 @@
 from io import StringIO
 
 from sp_validation import run_joint_cat as sp_joint
-from sp_validation import util
+from sp_validation import format
 from sp_validation.calibration import metacal
 from sp_validation import calibration
 import sp_validation.cat as cat
diff --git a/scripts/apply_alpha.py b/scripts/apply_alpha.py
index 1d357481..0769814e 100755
--- a/scripts/apply_alpha.py
+++ b/scripts/apply_alpha.py
@@ -23,7 +23,7 @@
 from shear_psf_leakage.leakage import func_bias_2d
 
 from sp_validation import cat
-from sp_validation import util
+from sp_validation import format
 
 
 def params_default():
diff --git a/scripts/calibration/extract_info.py b/scripts/calibration/extract_info.py
index bad32a08..59c08baa 100644
--- a/scripts/calibration/extract_info.py
+++ b/scripts/calibration/extract_info.py
@@ -326,7 +326,7 @@
 
 # -
 from sp_validation.survey import *
-from sp_validation.util import *
+from sp_validation.format import *
 
 # ## metacalibration for galaxies
 
@@ -941,7 +941,7 @@
 import os
 
 from sp_validation.cat import *
-from sp_validation.util import *
+from sp_validation.format import *
 
 # Shear response per galaxy
 R_shear_ind = gal_metacal.R_shear
diff --git a/scripts/examples/demo_calibrate_minimal_cat.py b/scripts/examples/demo_calibrate_minimal_cat.py
index 0d064585..edc2428f 100644
--- a/scripts/examples/demo_calibrate_minimal_cat.py
+++ b/scripts/examples/demo_calibrate_minimal_cat.py
@@ -24,7 +24,7 @@
 import matplotlib.pylab as plt
 
 from sp_validation import run_joint_cat as sp_joint
-from sp_validation import util
+from sp_validation import format
 from sp_validation.calibration import metacal
 import sp_validation.cat as cat
 
diff --git a/scripts/plot_leakage.py b/scripts/plot_leakage.py
index 354ef858..f37fa4d9 100755
--- a/scripts/plot_leakage.py
+++ b/scripts/plot_leakage.py
@@ -11,7 +11,6 @@
 
 from sp_validation.cat import *
 from sp_validation.plots import *
-from sp_validation.util import transform_nan
 from sp_validation import io
 
 
diff --git a/src/sp_validation/__init__.py b/src/sp_validation/__init__.py
index 9683397d..10f99859 100644
--- a/src/sp_validation/__init__.py
+++ b/src/sp_validation/__init__.py
@@ -5,6 +5,7 @@
     'cat',
     'calibration',
     'cosmology',
+    'format',
     'galaxy',
     'glass_mock',
     'plots',
@@ -12,5 +13,4 @@
     'run_joint_cat',
     'statistics',
     'survey',
-    'util',
 ]
diff --git a/src/sp_validation/cat.py b/src/sp_validation/cat.py
index 4b82ff97..ab3a4a2b 100644
--- a/src/sp_validation/cat.py
+++ b/src/sp_validation/cat.py
@@ -24,7 +24,7 @@
 
 from cs_util import cat
 
-from sp_validation import util
+from sp_validation import format
 from sp_validation import io
 from sp_validation.survey import get_footprint
 from sp_validation.version import __version__
@@ -85,7 +85,7 @@ def print_mean_ellipticity(
     ind_v = ind_val[0] & ind_val[1]
 
     n_tot_val = len(np.where(ind_v)[0])
-    n_tot_mil = util.millify(n_tot_val)
+    n_tot_mil = format.millify(n_tot_val)
     msg = (
         f"Total number of valid objects ({ell_col_name}0,1 != {invalid})"
         + f" = {n_tot_val} = {n_tot_mil}"
@@ -143,7 +143,7 @@ def print_some_quantities(dd, stats_file, verbose=False):
         print("")
 
     n_tot = len(dd)
-    n_mil = util.millify(n_tot)
+    n_mil = format.millify(n_tot)
     msg = f"Total number of objects = {n_tot} = {n_mil}"
     io.print_stats(msg, stats_file, verbose=verbose)
 
diff --git a/src/sp_validation/util.py b/src/sp_validation/format.py
similarity index 91%
rename from src/sp_validation/util.py
rename to src/sp_validation/format.py
index bd179ba0..a7b4cee3 100644
--- a/src/sp_validation/util.py
+++ b/src/sp_validation/format.py
@@ -1,6 +1,6 @@
-"""UTIL.
+"""FORMAT.
 
-:Description: This script contains utility methods.
+:Description: Human-readable formatting of large numbers.
 
 :Author: Martin Kilbinger <martin.kilblinger@cea.fr>
 
diff --git a/src/sp_validation/run_joint_cat.py b/src/sp_validation/run_joint_cat.py
index 35b19fea..66043d09 100644
--- a/src/sp_validation/run_joint_cat.py
+++ b/src/sp_validation/run_joint_cat.py
@@ -31,7 +31,7 @@
 from cs_util import cat
 from cs_util import args as cs_args
 
-from . import util
+from . import format
 from . import calibration
 from . import cat as sp_cat
 
@@ -373,7 +373,7 @@ def get_n_obj(self, patches, base_path, input_sub_path):
             n_obj += this_n
 
         if self._params["verbose"]:
-            print(f"Found a total of {n_obj} (~{util.millify(n_obj)}) objects.")
+            print(f"Found a total of {n_obj} (~{format.millify(n_obj)}) objects.")
 
         return hdu_lists, n_obj_list, n_obj
 
@@ -627,7 +627,7 @@ def merge_catalogues(self, patches, base_path="."):
                 )
             if self._params["verbose"]:
                 print(
-                    f"{patch}: Added {len(dat)} (~{util.millify(len(dat))})"
+                    f"{patch}: Added {len(dat)} (~{format.millify(len(dat))})"
                     + f" objects (from {start} to {end-1})."
                 )
             start = end
@@ -1146,7 +1146,7 @@ def read_cat(self, load_into_memory=False):
 
         if verbose:
             print(
-                f"Found {len(dat)} (~{util.millify(len(dat))}) objects"
+                f"Found {len(dat)} (~{format.millify(len(dat))}) objects"
                 + " in catalogue"
             )
 
diff --git a/src/sp_validation/tests/test_dangling_move_references.py b/src/sp_validation/tests/test_dangling_move_references.py
index 745391ec..d74bf33d 100644
--- a/src/sp_validation/tests/test_dangling_move_references.py
+++ b/src/sp_validation/tests/test_dangling_move_references.py
@@ -32,6 +32,7 @@
         "papers/consistency",
     ),
     ("sp_validation.info", "sp_validation.version (__version__); __name__ dropped"),
+    ("sp_validation/util.py", "sp_validation/format.py"),
 )
 EXCLUDED_DIRS = {
     ".git",

From bd41a133bbb75a21dbd6af48abc12eb8c306c512 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:13:16 +0200
Subject: [PATCH 10/16] refactor(src): move SquareRootScale to plots, re-export
 from rho_tau
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SquareRootScale is a matplotlib ScaleBase subclass — plotting
infrastructure, not rho/tau logic. Move the class and its
register_scale call into plots.py (which now carries the
matplotlib.scale/ticker/transforms imports it needs). rho_tau.py keeps
a compat re-export so existing
`from sp_validation.rho_tau import SquareRootScale` callers — in
cosmo_inference, scratch/guerrini, papers/harmonic — still resolve.

workflow/scripts/plotting_utils.py holds a near-duplicate that
diverges (ScalarFormatter(useMathText=True); inverted transform method
named transform_non_affine not transform), so it is left in place.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/plots.py   | 54 ++++++++++++++++++++++++++++++++++
 src/sp_validation/rho_tau.py | 56 ++----------------------------------
 2 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/src/sp_validation/plots.py b/src/sp_validation/plots.py
index fd350a7a..06fa4f25 100644
--- a/src/sp_validation/plots.py
+++ b/src/sp_validation/plots.py
@@ -14,6 +14,9 @@
 import healpy as hp
 import healsparse as hsp
 import matplotlib.pylab as plt
+import matplotlib.scale as mscale
+import matplotlib.ticker as ticker
+import matplotlib.transforms as mtransforms
 import numpy as np
 import skyproj
 from astropy import units as u
@@ -24,6 +27,57 @@
 from sp_validation.plot_style import *
 
 
+class SquareRootScale(mscale.ScaleBase):
+    """
+    ScaleBase class for generating square root scale.
+
+    Usage example: axis.set_yscale('squareroot')
+
+    """
+
+    name = "squareroot"
+
+    def __init__(self, axis, **kwargs):
+        mscale.ScaleBase.__init__(self, axis, **kwargs)
+
+    def set_default_locators_and_formatters(self, axis):
+        axis.set_major_locator(ticker.AutoLocator())
+        axis.set_major_formatter(ticker.ScalarFormatter())
+        axis.set_minor_locator(ticker.NullLocator())
+        axis.set_minor_formatter(ticker.NullFormatter())
+
+    def limit_range_for_scale(self, vmin, vmax, minpos):
+        return max(0.0, vmin), vmax
+
+    class SquareRootTransform(mtransforms.Transform):
+        input_dims = 1
+        output_dims = 1
+        is_separable = True
+
+        def transform_non_affine(self, a):
+            return np.array(a) ** 0.5
+
+        def inverted(self):
+            return SquareRootScale.InvertedSquareRootTransform()
+
+    class InvertedSquareRootTransform(mtransforms.Transform):
+        input_dims = 1
+        output_dims = 1
+        is_separable = True
+
+        def transform(self, a):
+            return np.array(a) ** 2
+
+        def inverted(self):
+            return SquareRootScale.SquareRootTransform()
+
+    def get_transform(self):
+        return self.SquareRootTransform()
+
+
+mscale.register_scale(SquareRootScale)
+
+
 def plot_spatial_density(
     ra, dec, title, x_label, y_label, cbar_label, out_path, n_grid=1000, verbose=False
 ):
diff --git a/src/sp_validation/rho_tau.py b/src/sp_validation/rho_tau.py
index a7abb93c..3040ef50 100644
--- a/src/sp_validation/rho_tau.py
+++ b/src/sp_validation/rho_tau.py
@@ -2,63 +2,13 @@
 import time
 from pathlib import Path
 
-import matplotlib.scale as mscale
-import matplotlib.ticker as ticker
-import matplotlib.transforms as mtransforms
 import numpy as np
 from shear_psf_leakage.rho_tau_cov import CovTauTh
 from shear_psf_leakage.rho_tau_stat import RhoStat, TauStat
 
-
-class SquareRootScale(mscale.ScaleBase):
-    """
-    ScaleBase class for generating square root scale.
-
-    Usage example: axis.set_yscale('squareroot')
-
-    """
-
-    name = "squareroot"
-
-    def __init__(self, axis, **kwargs):
-        mscale.ScaleBase.__init__(self, axis, **kwargs)
-
-    def set_default_locators_and_formatters(self, axis):
-        axis.set_major_locator(ticker.AutoLocator())
-        axis.set_major_formatter(ticker.ScalarFormatter())
-        axis.set_minor_locator(ticker.NullLocator())
-        axis.set_minor_formatter(ticker.NullFormatter())
-
-    def limit_range_for_scale(self, vmin, vmax, minpos):
-        return max(0.0, vmin), vmax
-
-    class SquareRootTransform(mtransforms.Transform):
-        input_dims = 1
-        output_dims = 1
-        is_separable = True
-
-        def transform_non_affine(self, a):
-            return np.array(a) ** 0.5
-
-        def inverted(self):
-            return SquareRootScale.InvertedSquareRootTransform()
-
-    class InvertedSquareRootTransform(mtransforms.Transform):
-        input_dims = 1
-        output_dims = 1
-        is_separable = True
-
-        def transform(self, a):
-            return np.array(a) ** 2
-
-        def inverted(self):
-            return SquareRootScale.SquareRootTransform()
-
-    def get_transform(self):
-        return self.SquareRootTransform()
-
-
-mscale.register_scale(SquareRootScale)
+# SquareRootScale now lives in sp_validation.plots; re-exported here so that
+# `from sp_validation.rho_tau import SquareRootScale` keeps working.
+from sp_validation.plots import SquareRootScale  # noqa: F401
 
 not_square_size = [
     "DES",

From 0b1d2f59768ca87d71e80c70aa67657433b0b504 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:14:28 +0200
Subject: [PATCH 11/16] fix(papers/harmonic): repoint SquareRootScale off dead
 utils_cosmo_val

Six harmonic-space scripts imported SquareRootScale from
sp_validation.utils_cosmo_val, a module that no longer exists. Repoint
to sp_validation.rho_tau (the compat re-export), matching the sibling
2026_03_17 script. get_params_rho_tau was already correctly imported
from rho_tau. No other utils_cosmo_val imports remain (the two
scratch/guerrini mentions are prose noting the module's removal).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 papers/harmonic/2025_09_11_namaster_covariance.py    | 2 +-
 papers/harmonic/2025_09_11_psf_leakage_cell.py       | 2 +-
 papers/harmonic/2025_09_11_validation_cov_cell.py    | 2 +-
 papers/harmonic/2025_10_08_plot_data_vectors.py      | 2 +-
 papers/harmonic/2026_01_05_get_p_value_glass_mock.py | 2 +-
 papers/harmonic/2026_01_13_check_BB_covariance.py    | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/papers/harmonic/2025_09_11_namaster_covariance.py b/papers/harmonic/2025_09_11_namaster_covariance.py
index dc9360f8..7e3c7693 100644
--- a/papers/harmonic/2025_09_11_namaster_covariance.py
+++ b/papers/harmonic/2025_09_11_namaster_covariance.py
@@ -19,7 +19,7 @@
 import pymaster as nmt
 import camb
 
-from sp_validation.utils_cosmo_val import SquareRootScale
+from sp_validation.rho_tau import SquareRootScale
 from sp_validation.cosmo_val import CosmologyValidation
 from sp_validation.rho_tau import get_params_rho_tau
 
diff --git a/papers/harmonic/2025_09_11_psf_leakage_cell.py b/papers/harmonic/2025_09_11_psf_leakage_cell.py
index 45e2b473..b8456dcc 100644
--- a/papers/harmonic/2025_09_11_psf_leakage_cell.py
+++ b/papers/harmonic/2025_09_11_psf_leakage_cell.py
@@ -24,7 +24,7 @@
 import healpy as hp
 import pymaster as nmt
 
-from sp_validation.utils_cosmo_val import SquareRootScale
+from sp_validation.rho_tau import SquareRootScale
 
 mscale.register_scale(SquareRootScale)
 
diff --git a/papers/harmonic/2025_09_11_validation_cov_cell.py b/papers/harmonic/2025_09_11_validation_cov_cell.py
index 451e1704..25c2856c 100644
--- a/papers/harmonic/2025_09_11_validation_cov_cell.py
+++ b/papers/harmonic/2025_09_11_validation_cov_cell.py
@@ -25,7 +25,7 @@
 from tqdm import tqdm
 import healpy as hp
 
-from sp_validation.utils_cosmo_val import SquareRootScale
+from sp_validation.rho_tau import SquareRootScale
 
 mscale.register_scale(SquareRootScale)
 
diff --git a/papers/harmonic/2025_10_08_plot_data_vectors.py b/papers/harmonic/2025_10_08_plot_data_vectors.py
index 417d55f2..e6f3d9a8 100644
--- a/papers/harmonic/2025_10_08_plot_data_vectors.py
+++ b/papers/harmonic/2025_10_08_plot_data_vectors.py
@@ -21,7 +21,7 @@
 from matplotlib import scale as mscale
 import matplotlib.ticker as mticker
 import seaborn as sns
-from sp_validation.utils_cosmo_val import SquareRootScale
+from sp_validation.rho_tau import SquareRootScale
 
 from utils import get_chi2_and_pte
 
diff --git a/papers/harmonic/2026_01_05_get_p_value_glass_mock.py b/papers/harmonic/2026_01_05_get_p_value_glass_mock.py
index e61e933b..bb14d06e 100644
--- a/papers/harmonic/2026_01_05_get_p_value_glass_mock.py
+++ b/papers/harmonic/2026_01_05_get_p_value_glass_mock.py
@@ -23,7 +23,7 @@
 import seaborn as sns
 from tqdm import tqdm
 
-from sp_validation.utils_cosmo_val import SquareRootScale
+from sp_validation.rho_tau import SquareRootScale
 
 mscale.register_scale(SquareRootScale)
 
diff --git a/papers/harmonic/2026_01_13_check_BB_covariance.py b/papers/harmonic/2026_01_13_check_BB_covariance.py
index 3c7efede..f62d1662 100644
--- a/papers/harmonic/2026_01_13_check_BB_covariance.py
+++ b/papers/harmonic/2026_01_13_check_BB_covariance.py
@@ -26,7 +26,7 @@
 import healpy as hp
 import scipy.stats as stats
 
-from sp_validation.utils_cosmo_val import SquareRootScale
+from sp_validation.rho_tau import SquareRootScale
 
 mscale.register_scale(SquareRootScale)
 

From 02c40503060ec5b4183401ff2c20f37a83e79e30 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:17:58 +0200
Subject: [PATCH 12/16] refactor(src): rename run_joint_cat -> catalog_builders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The module holds the catalogue-builder runner classes (JointCat,
ApplyHspMasks, CalibrateCat) and their run_* entry-point functions;
catalog_builders names that role. Sweep all importers (the
`as sp_joint` alias is preserved, only the module name changes):
  scripts/apply_hsp_masks.py
  scripts/examples/{demo_check_footprint, create_binned_mask_comprehensive,
    demo_comprehensive_to_minimal_cat, demo_create_footprint_mask,
    demo_calibrate_minimal_cat}.py
  scripts/calibration/{create_joint_comprehensive_cat (direct symbol),
    demo_apply_hsp_masks, calibrate_comprehensive_cat}.py
  scratch/kilbinger/demo_binned_mask.py
  papers/catalog/hist_mag.py
Also update the two prose references in docs and update __all__ and
the dangling-move guard.

The OPTIONAL masks.py extraction (Mask + mask-algebra fns) is deferred:
those symbols are reached externally through the sp_joint.* module
alias (Mask, get_masks_from_config, print_mask_stats across 4 scripts +
papers/catalog), so splitting them out would require either re-exports
or sweeping the public call surface — beyond a behavior-preserving move.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 docs/source/post_processing.md                              | 2 +-
 papers/catalog/hist_mag.py                                  | 2 +-
 scratch/kilbinger/demo_binned_mask.py                       | 2 +-
 scripts/apply_hsp_masks.py                                  | 2 +-
 scripts/calibration/README.md                               | 2 +-
 scripts/calibration/calibrate_comprehensive_cat.py          | 2 +-
 scripts/calibration/create_joint_comprehensive_cat.py       | 2 +-
 scripts/calibration/demo_apply_hsp_masks.py                 | 2 +-
 scripts/examples/create_binned_mask_comprehensive.py        | 2 +-
 scripts/examples/demo_calibrate_minimal_cat.py              | 2 +-
 scripts/examples/demo_check_footprint.py                    | 2 +-
 scripts/examples/demo_comprehensive_to_minimal_cat.py       | 2 +-
 scripts/examples/demo_create_footprint_mask.py              | 2 +-
 src/sp_validation/__init__.py                               | 2 +-
 src/sp_validation/{run_joint_cat.py => catalog_builders.py} | 4 ++--
 src/sp_validation/tests/test_dangling_move_references.py    | 1 +
 16 files changed, 17 insertions(+), 16 deletions(-)
 rename src/sp_validation/{run_joint_cat.py => catalog_builders.py} (99%)

diff --git a/docs/source/post_processing.md b/docs/source/post_processing.md
index 814fb25b..6d2f2423 100644
--- a/docs/source/post_processing.md
+++ b/docs/source/post_processing.md
@@ -32,7 +32,7 @@ This step is carried out per patch. Parameters have to be set via the python con
 ### 2. Merge catalogues
 
 The patch-wise comprehensive catalogues extracted in the previous step are merged using the script `scripts/calibration/create_joint_comprehensive_cat.py`, which is a front-end
-of the `sp_validation` library class `run_joint_cat:JointCat`.
+of the `sp_validation` library class `catalog_builders:JointCat`.
 
 ### 3. Apply external masks
 
diff --git a/papers/catalog/hist_mag.py b/papers/catalog/hist_mag.py
index 0098cc39..bb0e1deb 100644
--- a/papers/catalog/hist_mag.py
+++ b/papers/catalog/hist_mag.py
@@ -24,7 +24,7 @@
 from astropy.io import fits
 from io import StringIO
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 from sp_validation import format
 from sp_validation.calibration import metacal
 from sp_validation import calibration
diff --git a/scratch/kilbinger/demo_binned_mask.py b/scratch/kilbinger/demo_binned_mask.py
index 33f02e48..a8f5bae2 100644
--- a/scratch/kilbinger/demo_binned_mask.py
+++ b/scratch/kilbinger/demo_binned_mask.py
@@ -21,7 +21,7 @@
 from cs_util import cat as cs_cat
 from cs_util import plots as cs_plots
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 # %%
 # Initialize calibration class instance (for config and data)
diff --git a/scripts/apply_hsp_masks.py b/scripts/apply_hsp_masks.py
index 48fc8442..a54e7970 100755
--- a/scripts/apply_hsp_masks.py
+++ b/scripts/apply_hsp_masks.py
@@ -2,7 +2,7 @@
 
 import sys
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 
 def main(argv=None):
diff --git a/scripts/calibration/README.md b/scripts/calibration/README.md
index c70eb706..870c87ce 100644
--- a/scripts/calibration/README.md
+++ b/scripts/calibration/README.md
@@ -7,7 +7,7 @@ in order. See `docs/source/post_processing.md` for the full prose.
 | Step | Script | Does |
 |------|--------|------|
 | 1 | `extract_info.py` | Extract metacal + diagnostic info per patch; create pre-calibration shear catalogues. Configured via `params.py`. |
-| 2 | `create_joint_comprehensive_cat.py` | Merge the patch-wise comprehensive catalogues into one joint catalogue (front-end of `run_joint_cat.JointCat`). |
+| 2 | `create_joint_comprehensive_cat.py` | Merge the patch-wise comprehensive catalogues into one joint catalogue (front-end of `catalog_builders.JointCat`). |
 | 3 | `demo_apply_hsp_masks.py` | Add the structural and coverage (HealSparse) masks. |
 | 4 | `calibrate_comprehensive_cat.py` | Galaxy selection + metacalibration. Uses the mask configs in `config/calibration/`. |
 
diff --git a/scripts/calibration/calibrate_comprehensive_cat.py b/scripts/calibration/calibrate_comprehensive_cat.py
index 9596523e..84825347 100644
--- a/scripts/calibration/calibrate_comprehensive_cat.py
+++ b/scripts/calibration/calibrate_comprehensive_cat.py
@@ -24,7 +24,7 @@
 
 import sp_validation.cat as cat
 from sp_validation import calibration
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 from sp_validation.calibration import metacal
 
 # %%
diff --git a/scripts/calibration/create_joint_comprehensive_cat.py b/scripts/calibration/create_joint_comprehensive_cat.py
index 14289c67..ab541eec 100755
--- a/scripts/calibration/create_joint_comprehensive_cat.py
+++ b/scripts/calibration/create_joint_comprehensive_cat.py
@@ -2,7 +2,7 @@
 
 import sys
 
-from sp_validation.run_joint_cat import run_joint_comprehensive_cat
+from sp_validation.catalog_builders import run_joint_comprehensive_cat
 
 def main(argv=None):
 
diff --git a/scripts/calibration/demo_apply_hsp_masks.py b/scripts/calibration/demo_apply_hsp_masks.py
index c752e0e7..dc81de74 100644
--- a/scripts/calibration/demo_apply_hsp_masks.py
+++ b/scripts/calibration/demo_apply_hsp_masks.py
@@ -25,7 +25,7 @@
 import re
 import tracemalloc
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 # +
 # Trace and print used memory if True
diff --git a/scripts/examples/create_binned_mask_comprehensive.py b/scripts/examples/create_binned_mask_comprehensive.py
index a8347d07..2b433359 100644
--- a/scripts/examples/create_binned_mask_comprehensive.py
+++ b/scripts/examples/create_binned_mask_comprehensive.py
@@ -24,7 +24,7 @@
 from astropy.io import fits
 from cs_util import cat as cs_cat
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 # %%
 # Initialize calibration class instance (for config and data)
diff --git a/scripts/examples/demo_calibrate_minimal_cat.py b/scripts/examples/demo_calibrate_minimal_cat.py
index edc2428f..77d26566 100644
--- a/scripts/examples/demo_calibrate_minimal_cat.py
+++ b/scripts/examples/demo_calibrate_minimal_cat.py
@@ -23,7 +23,7 @@
 from astropy.io import fits
 import matplotlib.pylab as plt
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 from sp_validation import format
 from sp_validation.calibration import metacal
 import sp_validation.cat as cat
diff --git a/scripts/examples/demo_check_footprint.py b/scripts/examples/demo_check_footprint.py
index 24f773f1..2c33da59 100644
--- a/scripts/examples/demo_check_footprint.py
+++ b/scripts/examples/demo_check_footprint.py
@@ -24,7 +24,7 @@
 from astropy.io import fits
 from astropy.table import Table
 
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 # -
 
diff --git a/scripts/examples/demo_comprehensive_to_minimal_cat.py b/scripts/examples/demo_comprehensive_to_minimal_cat.py
index 553e9ce1..ca5e9aa8 100644
--- a/scripts/examples/demo_comprehensive_to_minimal_cat.py
+++ b/scripts/examples/demo_comprehensive_to_minimal_cat.py
@@ -28,7 +28,7 @@
 from astropy.io import fits
 
 # -
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 # Initialize calibration class instance
 obj = sp_joint.CalibrateCat()
diff --git a/scripts/examples/demo_create_footprint_mask.py b/scripts/examples/demo_create_footprint_mask.py
index d9506210..311ec8ed 100644
--- a/scripts/examples/demo_create_footprint_mask.py
+++ b/scripts/examples/demo_create_footprint_mask.py
@@ -28,7 +28,7 @@
 from cs_util.plots import FootprintPlotter
 
 from sp_validation import cosmo_val
-from sp_validation import run_joint_cat as sp_joint
+from sp_validation import catalog_builders as sp_joint
 
 # -
 
diff --git a/src/sp_validation/__init__.py b/src/sp_validation/__init__.py
index 10f99859..4d092306 100644
--- a/src/sp_validation/__init__.py
+++ b/src/sp_validation/__init__.py
@@ -4,13 +4,13 @@
     'b_modes',
     'cat',
     'calibration',
+    'catalog_builders',
     'cosmology',
     'format',
     'galaxy',
     'glass_mock',
     'plots',
     'rho_tau',
-    'run_joint_cat',
     'statistics',
     'survey',
 ]
diff --git a/src/sp_validation/run_joint_cat.py b/src/sp_validation/catalog_builders.py
similarity index 99%
rename from src/sp_validation/run_joint_cat.py
rename to src/sp_validation/catalog_builders.py
index 66043d09..f75da7ae 100644
--- a/src/sp_validation/run_joint_cat.py
+++ b/src/sp_validation/catalog_builders.py
@@ -1,7 +1,7 @@
-"""RUN JOINT CAT.
+"""CATALOG BUILDERS.
 
 This module implements classes to create, mask, and calibrate joint
-comprehensive catalogues.
+comprehensive catalogues, plus the run_* entry-point functions.
 
 :Author: Martin Kilbinger
 """
diff --git a/src/sp_validation/tests/test_dangling_move_references.py b/src/sp_validation/tests/test_dangling_move_references.py
index d74bf33d..a22e29a1 100644
--- a/src/sp_validation/tests/test_dangling_move_references.py
+++ b/src/sp_validation/tests/test_dangling_move_references.py
@@ -33,6 +33,7 @@
     ),
     ("sp_validation.info", "sp_validation.version (__version__); __name__ dropped"),
     ("sp_validation/util.py", "sp_validation/format.py"),
+    ("run_joint_cat", "catalog_builders"),
 )
 EXCLUDED_DIRS = {
     ".git",

From 575c3f4abff5b90deb63e6b71631bbd698f08723 Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 15:59:33 +0200
Subject: [PATCH 13/16] Extract masks.py from catalog_builders.py

Move the healsparse-backed spatial-masking cluster out of
catalog_builders.py into a dedicated masks.py: the Mask class plus
get_masks_from_config, print_mask_stats, correlation_matrix, and
confusion_matrix. Bodies are byte-identical; the move carries the
numexpr/scipy.stats imports those helpers need (now removed from
catalog_builders, which no longer references them).

catalog_builders.py re-exports the five symbols from sp_validation.masks
so external code using `from sp_validation import catalog_builders as
sp_joint` keeps resolving sp_joint.Mask, sp_joint.get_masks_from_config,
etc. The *Cat runner classes (ApplyHspMasks, ReadCat, run_* entry points)
stay; ApplyHspMasks uses healsparse directly, not the Mask class.

masks added to __init__.__all__. No MOVE_MAP entry: this is an
extraction-in-place (catalog_builders survives), not a retired path.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/sp_validation/__init__.py         |   1 +
 src/sp_validation/catalog_builders.py | 337 +------------------------
 src/sp_validation/masks.py            | 341 ++++++++++++++++++++++++++
 3 files changed, 353 insertions(+), 326 deletions(-)
 create mode 100644 src/sp_validation/masks.py

diff --git a/src/sp_validation/__init__.py b/src/sp_validation/__init__.py
index 4d092306..fd785f48 100644
--- a/src/sp_validation/__init__.py
+++ b/src/sp_validation/__init__.py
@@ -9,6 +9,7 @@
     'format',
     'galaxy',
     'glass_mock',
+    'masks',
     'plots',
     'rho_tau',
     'statistics',
diff --git a/src/sp_validation/catalog_builders.py b/src/sp_validation/catalog_builders.py
index f75da7ae..d92dc432 100644
--- a/src/sp_validation/catalog_builders.py
+++ b/src/sp_validation/catalog_builders.py
@@ -10,8 +10,6 @@
 import os
 
 import numpy as np
-import numexpr as ne
-from scipy import stats
 import yaml
 
 import datetime
@@ -35,6 +33,17 @@
 from . import calibration
 from . import cat as sp_cat
 
+# Spatial-masking primitives now live in ``masks``; re-exported here so external
+# code using ``from sp_validation import catalog_builders as sp_joint`` keeps
+# resolving ``sp_joint.Mask``, ``sp_joint.get_masks_from_config``, etc.
+from sp_validation.masks import (
+    Mask,
+    get_masks_from_config,
+    print_mask_stats,
+    correlation_matrix,
+    confusion_matrix,
+)
+
 
 class BaseCat(object):
     """Base_Cat.
@@ -1183,249 +1192,6 @@ def run(self):
 
         """
 
-def correlation_matrix(masks, confidence_level=0.9):
-
-    n_key = len(masks)
-    print(n_key)
-
-    cm = np.empty((n_key, n_key))
-    r_val = np.zeros_like(cm)
-    r_cl = np.empty((n_key, n_key, 2))
-
-    for idx, mask_idx in enumerate(masks):
-        for jdx, mask_jdx in enumerate(masks):
-            res = stats.pearsonr(mask_idx._mask, mask_jdx._mask)
-            r_val[idx][jdx] = res.statistic
-            r_cl[idx][jdx] = res.confidence_interval(
-                confidence_level=confidence_level
-            )
-
-    return r_val, r_cl
-
-
-def confusion_matrix(prediction, observation):
-
-    result = {}
-
-    pred_pos = sum(prediction)
-    result["true_pos"] = sum(prediction & observation)
-    result["true_neg"] = sum(
-        np.logical_not(prediction) & np.logical_not(observation)
-    )
-    result["false_neg"] = sum(prediction & np.logical_not(observation))
-    result["false_pos"] = sum(np.logical_not(prediction) & observation)
-    result["false_pos_rate"] = result["false_pos"] / (
-        result["false_pos"] + result["true_neg"]
-    )
-    result["false_neg_rate"] = result["false_neg"] / (
-        result["false_neg"] + result["true_pos"]
-    )
-    result["sensitivity"] = result["true_pos"] / (
-        result["true_pos"] + result["false_neg"]
-    )
-    result["specificity"] = result["true_neg"] / (
-        result["true_neg"] + result["false_pos"]
-    )
-
-    cm = np.zeros((2, 2))
-    cm[0][0] = result["true_pos"]
-    cm[1][1] = result["true_neg"]
-    cm[0][1] = result["false_neg"]
-    cm[1][0] = result["false_pos"]
-    cmn = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
-    result["cmn"] = cmn
-
-    return result
-
-
-class Mask():
-    """Mask.
-    
-    Class to handle masking of catalogues.
-    
-    Parameters
-    ----------
-    col_name : str
-        name of column to use for mask
-    label : str
-        mask label
-    kind : str
-        operation type, allowed are "equal", "not_equal, ""greater_equal",
-        "smaller_equal", "range"
-    value : float or list
-        value(s) to be used in mask operation
-    dat : numpy.ndarray, optional
-        input data, default is `None`; apply mask if given
-    verbose : bool, optional
-        verbose output if ``True``; default is ``False``
-
-    """
-
-    def __init__(self, col_name, label, kind=None, value=0, dat=None, verbose=False):
-        
-        self._col_name = col_name
-        self._label = label
-        self._value = value
-        self._kind = kind
-        self._num_ok = None
-        self._verbose = verbose
-
-        if self._verbose:
-            print("Initialising mask:", self)
-
-        if dat is not None:
-            self.apply(dat)
-            
-    def __repr__(self):
-        
-        return (
-            f"Mask(col_name={self._col_name}, label={self._label}, kind={self._kind},"
-            + f" value={self._value})"
-        )
-        
-    @classmethod    
-    def from_list(cls, masks, label="combined", verbose=False):
-
-        if verbose:
-            print(f"Combining {len(masks)} masks")
-        
-        my_mask = cls(label, label, kind="combined", value=None)
-
-        my_mask._mask = np.logical_and.reduce([m._mask for m in masks])
-
-        return my_mask
-
-    def apply(self, dat):
-        
-        # Get column
-        col_data = dat[self._col_name]
-        
-        if self._kind == "equal":
-            self._mask = ne.evaluate("col_data == value", local_dict={"col_data": col_data, "value": self._value})
-        elif self._kind == "not_equal":
-            self._mask = ne.evaluate("col_data != value", local_dict={"col_data": col_data, "value": self._value})
-        elif self._kind == "greater_equal":
-            self._mask = ne.evaluate("col_data >= value", local_dict={"col_data": col_data, "value": self._value})
-        elif self._kind == "smaller_equal":
-            self._mask = ne.evaluate("col_data <= value", local_dict={"col_data": col_data, "value": self._value})
-        elif self._kind == "range":
-            self._mask = ne.evaluate("(col_data >= low) & (col_data <= high)", local_dict={"col_data": col_data, "low": self._value[0], "high": self._value[1]})
-        else:
-            raise ValueError(f"Invalid kind {self._kind}")
-
-    def to_bool(self, hsp_mask):
-
-        if self._verbose:
-            print("to_bool: get valid pixels")
-        valid_pixels = hsp_mask.valid_pixels
-
-        # Abuse of col_name
-        self._col_name = valid_pixels
-
-        if self._verbose:
-            print("to_bool: apply mask")
-        self.apply(hsp_mask)
-        mask_bool = hsp.HealSparseMap.make_empty(
-            hsp_mask.nside_coverage,
-            hsp_mask.nside_sparse,
-            dtype="bool",
-        )
-        mask_bool[valid_pixels] = self._mask
-        return mask_bool
- 
-    @classmethod
-    def print_strings(cls, coln, lab, num, fnum, f_out=None):
-        msg = f"{coln:30s} {lab:30s} {num:10s} {fnum:10s}"
-        print(msg)
-        if f_out:
-            print(msg, file=f_out)
- 
-    def print_stats(self, num_obj, f_out=None):
-        if self._num_ok is None:
-            self._num_ok = sum(self._mask)
-
-        si = f"{self._num_ok:10d}"
-        sf = f"{self._num_ok/num_obj:10.2%}"
-        self.print_strings(self._col_name, self._label, si, sf, f_out=f_out)
-
-    def get_sign(self, latex=False):
-        
-        sign = None
-        if self._kind == "equal":
-            sign = "$=$" if latex else "="
-        elif self._kind == "not_equal":
-            sign = "$\ne$" if latex else "!="
-        elif self._kind in ("greater_equal", "range"):
-            sign = "$\leq$" if latex else ">="
-        elif self._kind == "smaller_equal":
-            sign = "$\geq$" if latex else "<="
-        return sign
-
-    def print_condition(self, f_out, latex=False):
-
-        if self._value is None:
-            return ""
-
-        sign = self.get_sign(latex=latex)
-        
-        name = self._label if latex else self._col_name
-
-        if sign is not None:
-            print(f"{name} {sign} {self._value}", file=f_out)
-
-        if self._kind == "range":
-            print(f"{self._value[0]} {sign} {name} {sign} {self._value[1]}", file=f_out)
-
-    def print_summary(self, f_out):
-        print(f"[{self._label}]\t\t\t", file=f_out, end="")
-        self.print_condition(f_out)
-
-    def create_descr(self):
-        """Create Descr.
-
-        Create description of mask for later use in output file header.
-
-        Returns
-        -------
-        str
-            description
-
-        """
-        sign = self.get_sign()
-        if sign is not None:
-            descr = f"{sign}{self._value}"
-        if self._kind == "range":
-            descr = f"{self._value[0]}<={self._col_name}<={self._value[1]}"
-        self._descr = descr
-    
-        # Create description for FITS header
-    def add_summary_to_FITS_header(self, header):
-
-        header_new = fits.Header()
-        
-        self.create_descr()
-
-        header_new[self._col_name] = (self._descr, self._label)
-        
-        header.update(header_new)
-
-
-def print_mask_stats(num_obj, masks, mask_combined):
-    """Print Mask Stats.
-
-    Print mask statistics.
-
-    Parameters
-    ----------
-    num_obj
-    
-    """
-    Mask.print_strings("flag", "label", f"{'num_ok':>10}", f"{'num_ok[%]':>10}")
-    for my_mask in masks:
-        my_mask.print_stats(num_obj)
-
-    mask_combined.print_stats(num_obj)
-
 
 class ReadCat:
 
@@ -1455,87 +1221,6 @@ def params_default(self):
     def run(self):
 
         pass
-    
-    
-def get_masks_from_config(
-    config,
-    dat,
-    dat_ext,
-    masks_to_apply=None,
-    verbose=False
-):
-    """Get Masks From Config.
-    
-    Return mask information from yaml config structure.
-    
-    Parameters
-    ----------
-    config : dict
-        config information
-    dat : numpy.ndarray
-        input data
-    det_ext : numpy.ndarray
-        input extended data
-    masks_to_apply: list, optional
-        masks to apply exclusively; if `None` (default), use all masks
-    verbose : bool, optional
-        verbose output if ``True``; default is ``False``
-        
-    Returns
-    -------
-    list
-        list of masks
-    dict
-        list of indices for given mask column name (label)
-        
-    """
-    # List to store all mask objects
-    masks = []
-
-    # Dict to associate labels with index in mask list
-    labels = {}
-    
-     # Loop over mask sections from config file
-    config_data = {
-        key: config[key] for key in ["dat", "dat_ext"] if key in config
-    }
-    idx = 0
-    for section, mask_list in config_data.items():
-
-        # Set data source
-        dat_source = dat if section == "dat" else dat_ext
-
-        # Loop over mask information in this section
-        for mask_params in mask_list:
-
-            use_this_mask = False            
-            if masks_to_apply is not None:
-                if mask_params["col_name"] in masks_to_apply:
-                    use_this_mask = True
-            else:
-                use_this_mask = True
-                    
-            if use_this_mask:
-                # Ensure 'range' kind has exactly two values
-                value = mask_params["value"]
-                if mask_params["kind"] == "range" and (
-                    not isinstance(value, list) or len(value) != 2
-                ):
-                    raise ValueError(
-                        f"Range kind requires a list of two values, got {value}"
-                    )
-
-                # Create mask instance and append to list
-                my_mask = Mask(**mask_params, dat=dat_source, verbose=verbose)
-                masks.append(my_mask)
-                labels[my_mask._col_name] = idx
-                idx += 1
-            else:
-                if verbose:
-                    print(f"Skipping mask {mask_params['col_name']}")
-                continue
-            
-    return masks, labels
 
 
 def compute_weights_gatti(
diff --git a/src/sp_validation/masks.py b/src/sp_validation/masks.py
new file mode 100644
index 00000000..f7abcc38
--- /dev/null
+++ b/src/sp_validation/masks.py
@@ -0,0 +1,341 @@
+"""SPATIAL MASKS.
+
+This module implements the healsparse-backed ``Mask`` class and the
+spatial-masking helpers (config-driven mask construction, mask statistics,
+correlation/confusion matrices) used when building catalogues.
+
+:Author: Martin Kilbinger
+"""
+
+import numpy as np
+import numexpr as ne
+from scipy import stats
+
+import healsparse as hsp
+
+from astropy.io import fits
+
+
+def correlation_matrix(masks, confidence_level=0.9):
+
+    n_key = len(masks)
+    print(n_key)
+
+    cm = np.empty((n_key, n_key))
+    r_val = np.zeros_like(cm)
+    r_cl = np.empty((n_key, n_key, 2))
+
+    for idx, mask_idx in enumerate(masks):
+        for jdx, mask_jdx in enumerate(masks):
+            res = stats.pearsonr(mask_idx._mask, mask_jdx._mask)
+            r_val[idx][jdx] = res.statistic
+            r_cl[idx][jdx] = res.confidence_interval(
+                confidence_level=confidence_level
+            )
+
+    return r_val, r_cl
+
+
+def confusion_matrix(prediction, observation):
+
+    result = {}
+
+    pred_pos = sum(prediction)
+    result["true_pos"] = sum(prediction & observation)
+    result["true_neg"] = sum(
+        np.logical_not(prediction) & np.logical_not(observation)
+    )
+    result["false_neg"] = sum(prediction & np.logical_not(observation))
+    result["false_pos"] = sum(np.logical_not(prediction) & observation)
+    result["false_pos_rate"] = result["false_pos"] / (
+        result["false_pos"] + result["true_neg"]
+    )
+    result["false_neg_rate"] = result["false_neg"] / (
+        result["false_neg"] + result["true_pos"]
+    )
+    result["sensitivity"] = result["true_pos"] / (
+        result["true_pos"] + result["false_neg"]
+    )
+    result["specificity"] = result["true_neg"] / (
+        result["true_neg"] + result["false_pos"]
+    )
+
+    cm = np.zeros((2, 2))
+    cm[0][0] = result["true_pos"]
+    cm[1][1] = result["true_neg"]
+    cm[0][1] = result["false_neg"]
+    cm[1][0] = result["false_pos"]
+    cmn = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
+    result["cmn"] = cmn
+
+    return result
+
+
+class Mask():
+    """Mask.
+    
+    Class to handle masking of catalogues.
+    
+    Parameters
+    ----------
+    col_name : str
+        name of column to use for mask
+    label : str
+        mask label
+    kind : str
+        operation type, allowed are "equal", "not_equal, ""greater_equal",
+        "smaller_equal", "range"
+    value : float or list
+        value(s) to be used in mask operation
+    dat : numpy.ndarray, optional
+        input data, default is `None`; apply mask if given
+    verbose : bool, optional
+        verbose output if ``True``; default is ``False``
+
+    """
+
+    def __init__(self, col_name, label, kind=None, value=0, dat=None, verbose=False):
+        
+        self._col_name = col_name
+        self._label = label
+        self._value = value
+        self._kind = kind
+        self._num_ok = None
+        self._verbose = verbose
+
+        if self._verbose:
+            print("Initialising mask:", self)
+
+        if dat is not None:
+            self.apply(dat)
+            
+    def __repr__(self):
+        
+        return (
+            f"Mask(col_name={self._col_name}, label={self._label}, kind={self._kind},"
+            + f" value={self._value})"
+        )
+        
+    @classmethod    
+    def from_list(cls, masks, label="combined", verbose=False):
+
+        if verbose:
+            print(f"Combining {len(masks)} masks")
+        
+        my_mask = cls(label, label, kind="combined", value=None)
+
+        my_mask._mask = np.logical_and.reduce([m._mask for m in masks])
+
+        return my_mask
+
+    def apply(self, dat):
+        
+        # Get column
+        col_data = dat[self._col_name]
+        
+        if self._kind == "equal":
+            self._mask = ne.evaluate("col_data == value", local_dict={"col_data": col_data, "value": self._value})
+        elif self._kind == "not_equal":
+            self._mask = ne.evaluate("col_data != value", local_dict={"col_data": col_data, "value": self._value})
+        elif self._kind == "greater_equal":
+            self._mask = ne.evaluate("col_data >= value", local_dict={"col_data": col_data, "value": self._value})
+        elif self._kind == "smaller_equal":
+            self._mask = ne.evaluate("col_data <= value", local_dict={"col_data": col_data, "value": self._value})
+        elif self._kind == "range":
+            self._mask = ne.evaluate("(col_data >= low) & (col_data <= high)", local_dict={"col_data": col_data, "low": self._value[0], "high": self._value[1]})
+        else:
+            raise ValueError(f"Invalid kind {self._kind}")
+
+    def to_bool(self, hsp_mask):
+
+        if self._verbose:
+            print("to_bool: get valid pixels")
+        valid_pixels = hsp_mask.valid_pixels
+
+        # Abuse of col_name
+        self._col_name = valid_pixels
+
+        if self._verbose:
+            print("to_bool: apply mask")
+        self.apply(hsp_mask)
+        mask_bool = hsp.HealSparseMap.make_empty(
+            hsp_mask.nside_coverage,
+            hsp_mask.nside_sparse,
+            dtype="bool",
+        )
+        mask_bool[valid_pixels] = self._mask
+        return mask_bool
+ 
+    @classmethod
+    def print_strings(cls, coln, lab, num, fnum, f_out=None):
+        msg = f"{coln:30s} {lab:30s} {num:10s} {fnum:10s}"
+        print(msg)
+        if f_out:
+            print(msg, file=f_out)
+ 
+    def print_stats(self, num_obj, f_out=None):
+        if self._num_ok is None:
+            self._num_ok = sum(self._mask)
+
+        si = f"{self._num_ok:10d}"
+        sf = f"{self._num_ok/num_obj:10.2%}"
+        self.print_strings(self._col_name, self._label, si, sf, f_out=f_out)
+
+    def get_sign(self, latex=False):
+        
+        sign = None
+        if self._kind == "equal":
+            sign = "$=$" if latex else "="
+        elif self._kind == "not_equal":
+            sign = "$\ne$" if latex else "!="
+        elif self._kind in ("greater_equal", "range"):
+            sign = "$\leq$" if latex else ">="
+        elif self._kind == "smaller_equal":
+            sign = "$\geq$" if latex else "<="
+        return sign
+
+    def print_condition(self, f_out, latex=False):
+
+        if self._value is None:
+            return ""
+
+        sign = self.get_sign(latex=latex)
+        
+        name = self._label if latex else self._col_name
+
+        if sign is not None:
+            print(f"{name} {sign} {self._value}", file=f_out)
+
+        if self._kind == "range":
+            print(f"{self._value[0]} {sign} {name} {sign} {self._value[1]}", file=f_out)
+
+    def print_summary(self, f_out):
+        print(f"[{self._label}]\t\t\t", file=f_out, end="")
+        self.print_condition(f_out)
+
+    def create_descr(self):
+        """Create Descr.
+
+        Create description of mask for later use in output file header.
+
+        Returns
+        -------
+        str
+            description
+
+        """
+        sign = self.get_sign()
+        if sign is not None:
+            descr = f"{sign}{self._value}"
+        if self._kind == "range":
+            descr = f"{self._value[0]}<={self._col_name}<={self._value[1]}"
+        self._descr = descr
+    
+        # Create description for FITS header
+    def add_summary_to_FITS_header(self, header):
+
+        header_new = fits.Header()
+        
+        self.create_descr()
+
+        header_new[self._col_name] = (self._descr, self._label)
+        
+        header.update(header_new)
+
+
+def print_mask_stats(num_obj, masks, mask_combined):
+    """Print Mask Stats.
+
+    Print mask statistics.
+
+    Parameters
+    ----------
+    num_obj
+    
+    """
+    Mask.print_strings("flag", "label", f"{'num_ok':>10}", f"{'num_ok[%]':>10}")
+    for my_mask in masks:
+        my_mask.print_stats(num_obj)
+
+    mask_combined.print_stats(num_obj)
+
+
+def get_masks_from_config(
+    config,
+    dat,
+    dat_ext,
+    masks_to_apply=None,
+    verbose=False
+):
+    """Get Masks From Config.
+    
+    Return mask information from yaml config structure.
+    
+    Parameters
+    ----------
+    config : dict
+        config information
+    dat : numpy.ndarray
+        input data
+    det_ext : numpy.ndarray
+        input extended data
+    masks_to_apply: list, optional
+        masks to apply exclusively; if `None` (default), use all masks
+    verbose : bool, optional
+        verbose output if ``True``; default is ``False``
+        
+    Returns
+    -------
+    list
+        list of masks
+    dict
+        list of indices for given mask column name (label)
+        
+    """
+    # List to store all mask objects
+    masks = []
+
+    # Dict to associate labels with index in mask list
+    labels = {}
+    
+     # Loop over mask sections from config file
+    config_data = {
+        key: config[key] for key in ["dat", "dat_ext"] if key in config
+    }
+    idx = 0
+    for section, mask_list in config_data.items():
+
+        # Set data source
+        dat_source = dat if section == "dat" else dat_ext
+
+        # Loop over mask information in this section
+        for mask_params in mask_list:
+
+            use_this_mask = False            
+            if masks_to_apply is not None:
+                if mask_params["col_name"] in masks_to_apply:
+                    use_this_mask = True
+            else:
+                use_this_mask = True
+                    
+            if use_this_mask:
+                # Ensure 'range' kind has exactly two values
+                value = mask_params["value"]
+                if mask_params["kind"] == "range" and (
+                    not isinstance(value, list) or len(value) != 2
+                ):
+                    raise ValueError(
+                        f"Range kind requires a list of two values, got {value}"
+                    )
+
+                # Create mask instance and append to list
+                my_mask = Mask(**mask_params, dat=dat_source, verbose=verbose)
+                masks.append(my_mask)
+                labels[my_mask._col_name] = idx
+                idx += 1
+            else:
+                if verbose:
+                    print(f"Skipping mask {mask_params['col_name']}")
+                continue
+            
+    return masks, labels

From 9bfee819c2eccb72d8a3aa898a8ddefd474d2d7f Mon Sep 17 00:00:00 2001
From: Cail Daley <cail.daley@cea.fr>
Date: Sat, 20 Jun 2026 16:27:42 +0200
Subject: [PATCH 14/16] Rename cat.py -> catalog.py: catalogue data layer

The catalogue module pair now reads by role: catalog.py is the data
layer (read/write/column-access/matching free functions), catalog_builders.py
is the construction pipeline (runner classes built on it). Module docstrings
state this hierarchy explicitly.

Behaviour-preserving. Every importer of the local sp_validation.cat module is
swept to sp_validation.catalog, each preserving its local binding (bare
`import cat` forms gain `as cat` so function bodies are unchanged). The
cs_util `cat` import is a different module and is left untouched throughout.

The dangling-reference guard registers the retired flat-import form
`sp_validation.cat import` rather than the bare `sp_validation.cat` token,
which would false-positive on the live `sp_validation.catalog` /
`sp_validation.catalog_builders` modules (same prefix trap as glass_mock).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 cosmo_val/match_LF_SP.py                                 | 2 +-
 papers/catalog/hist_mag.py                               | 2 +-
 scripts/apply_alpha.py                                   | 2 +-
 scripts/calibration/calibrate_comprehensive_cat.py       | 2 +-
 scripts/calibration/extract_info.py                      | 6 +++---
 scripts/create_joint_shape_cat.py                        | 2 +-
 scripts/examples/demo_calibrate_minimal_cat.py           | 2 +-
 scripts/plot_leakage.py                                  | 2 +-
 src/sp_validation/__init__.py                            | 2 +-
 src/sp_validation/calibration.py                         | 2 +-
 src/sp_validation/{cat.py => catalog.py}                 | 9 ++++++---
 src/sp_validation/catalog_builders.py                    | 8 +++++---
 src/sp_validation/tests/test_dangling_move_references.py | 8 ++++++++
 13 files changed, 31 insertions(+), 18 deletions(-)
 rename src/sp_validation/{cat.py => catalog.py} (98%)

diff --git a/cosmo_val/match_LF_SP.py b/cosmo_val/match_LF_SP.py
index 130dc9aa..802365e4 100644
--- a/cosmo_val/match_LF_SP.py
+++ b/cosmo_val/match_LF_SP.py
@@ -29,7 +29,7 @@
 from uncertainties import ufloat   
 from sklearn import preprocessing
 
-from sp_validation import cat
+from sp_validation import catalog as cat
 
 # ## Set-up
 
diff --git a/papers/catalog/hist_mag.py b/papers/catalog/hist_mag.py
index bb0e1deb..2f19fe7a 100644
--- a/papers/catalog/hist_mag.py
+++ b/papers/catalog/hist_mag.py
@@ -28,7 +28,7 @@
 from sp_validation import format
 from sp_validation.calibration import metacal
 from sp_validation import calibration
-import sp_validation.cat as cat
+import sp_validation.catalog as cat
 
 # %%
 # Initialize calibration class instance
diff --git a/scripts/apply_alpha.py b/scripts/apply_alpha.py
index 0769814e..fc88ba51 100755
--- a/scripts/apply_alpha.py
+++ b/scripts/apply_alpha.py
@@ -22,7 +22,7 @@
 
 from shear_psf_leakage.leakage import func_bias_2d
 
-from sp_validation import cat
+from sp_validation import catalog as cat
 from sp_validation import format
 
 
diff --git a/scripts/calibration/calibrate_comprehensive_cat.py b/scripts/calibration/calibrate_comprehensive_cat.py
index 84825347..f39dba7c 100644
--- a/scripts/calibration/calibrate_comprehensive_cat.py
+++ b/scripts/calibration/calibrate_comprehensive_cat.py
@@ -22,7 +22,7 @@
 from astropy.io import fits
 from cs_util import cat as cs_cat
 
-import sp_validation.cat as cat
+import sp_validation.catalog as cat
 from sp_validation import calibration
 from sp_validation import catalog_builders as sp_joint
 from sp_validation.calibration import metacal
diff --git a/scripts/calibration/extract_info.py b/scripts/calibration/extract_info.py
index 59c08baa..2b163873 100644
--- a/scripts/calibration/extract_info.py
+++ b/scripts/calibration/extract_info.py
@@ -35,8 +35,8 @@
 import numpy as np
 from astropy.io import fits
 
-#from sp_validation.cat import *
-from sp_validation import cat as spv_cat
+#from sp_validation.catalog import *
+from sp_validation import catalog as spv_cat
 from sp_validation.calibration import metacal
 from sp_validation.calibration import *
 from sp_validation.galaxy import *
@@ -940,7 +940,7 @@
 
 import os
 
-from sp_validation.cat import *
+from sp_validation.catalog import *
 from sp_validation.format import *
 
 # Shear response per galaxy
diff --git a/scripts/create_joint_shape_cat.py b/scripts/create_joint_shape_cat.py
index b1fa3e6d..4423d5cc 100755
--- a/scripts/create_joint_shape_cat.py
+++ b/scripts/create_joint_shape_cat.py
@@ -12,7 +12,7 @@
 from cs_util import cat
 from cs_util import plots
 
-from sp_validation.cat import *
+from sp_validation.catalog import *
 from sp_validation.calibration import *
 
 
diff --git a/scripts/examples/demo_calibrate_minimal_cat.py b/scripts/examples/demo_calibrate_minimal_cat.py
index 77d26566..ccb246f2 100644
--- a/scripts/examples/demo_calibrate_minimal_cat.py
+++ b/scripts/examples/demo_calibrate_minimal_cat.py
@@ -26,7 +26,7 @@
 from sp_validation import catalog_builders as sp_joint
 from sp_validation import format
 from sp_validation.calibration import metacal
-import sp_validation.cat as cat
+import sp_validation.catalog as cat
 
 # Initialize calibration class instance
 obj = sp_joint.CalibrateCat()
diff --git a/scripts/plot_leakage.py b/scripts/plot_leakage.py
index f37fa4d9..a7c27de9 100755
--- a/scripts/plot_leakage.py
+++ b/scripts/plot_leakage.py
@@ -9,7 +9,7 @@
 
 from cs_util import logging
 
-from sp_validation.cat import *
+from sp_validation.catalog import *
 from sp_validation.plots import *
 from sp_validation import io
 
diff --git a/src/sp_validation/__init__.py b/src/sp_validation/__init__.py
index fd785f48..1b955e11 100644
--- a/src/sp_validation/__init__.py
+++ b/src/sp_validation/__init__.py
@@ -2,8 +2,8 @@
 
 __all__ = [
     'b_modes',
-    'cat',
     'calibration',
+    'catalog',
     'catalog_builders',
     'cosmology',
     'format',
diff --git a/src/sp_validation/calibration.py b/src/sp_validation/calibration.py
index 380b7286..9a5fea31 100644
--- a/src/sp_validation/calibration.py
+++ b/src/sp_validation/calibration.py
@@ -16,7 +16,7 @@
 import tqdm
 
 from sp_validation.statistics import jackknif_weighted_average2
-from sp_validation import cat as sp_cat
+from sp_validation import catalog as sp_cat
 
 from shear_psf_leakage import leakage
 from shear_psf_leakage import run_object
diff --git a/src/sp_validation/cat.py b/src/sp_validation/catalog.py
similarity index 98%
rename from src/sp_validation/cat.py
rename to src/sp_validation/catalog.py
index ab3a4a2b..867e4e7c 100644
--- a/src/sp_validation/cat.py
+++ b/src/sp_validation/catalog.py
@@ -1,8 +1,11 @@
-"""CAT.
+"""CATALOG.
 
-:Name: cat.py
+:Name: catalog.py
 
-:Description: This script contains methods to deal with catalogues.
+:Description: Catalogue data layer — the read/write, column-access, and
+    object-matching free functions that operate directly on shape catalogues.
+    These are the low-level primitives; the catalogue *construction pipeline*
+    (runner classes that orchestrate them) lives in ``catalog_builders``.
 
 :Author: Martin Kilbinger
 
diff --git a/src/sp_validation/catalog_builders.py b/src/sp_validation/catalog_builders.py
index d92dc432..12fe0af6 100644
--- a/src/sp_validation/catalog_builders.py
+++ b/src/sp_validation/catalog_builders.py
@@ -1,7 +1,9 @@
 """CATALOG BUILDERS.
 
-This module implements classes to create, mask, and calibrate joint
-comprehensive catalogues, plus the run_* entry-point functions.
+Catalogue construction pipeline — the runner classes that create, mask, and
+calibrate joint comprehensive catalogues, plus the ``run_*`` entry-point
+functions. Built on the catalogue data layer in ``catalog`` (imported here as
+``sp_cat``), which supplies the read/write/column-access/matching primitives.
 
 :Author: Martin Kilbinger
 """
@@ -31,7 +33,7 @@
 
 from . import format
 from . import calibration
-from . import cat as sp_cat
+from . import catalog as sp_cat
 
 # Spatial-masking primitives now live in ``masks``; re-exported here so external
 # code using ``from sp_validation import catalog_builders as sp_joint`` keeps
diff --git a/src/sp_validation/tests/test_dangling_move_references.py b/src/sp_validation/tests/test_dangling_move_references.py
index a22e29a1..e0432623 100644
--- a/src/sp_validation/tests/test_dangling_move_references.py
+++ b/src/sp_validation/tests/test_dangling_move_references.py
@@ -34,6 +34,14 @@
     ("sp_validation.info", "sp_validation.version (__version__); __name__ dropped"),
     ("sp_validation/util.py", "sp_validation/format.py"),
     ("run_joint_cat", "catalog_builders"),
+    # The catalogue data layer ``cat.py`` became ``catalog.py``. The bare token
+    # ``sp_validation.cat`` is a *prefix* of the live ``sp_validation.catalog``
+    # / ``sp_validation.catalog_builders`` modules, so registering it would
+    # false-positive on every catalog reference (the same trap as ``glass_mock``
+    # above). The flat-import form ``sp_validation.cat import`` is unique to the
+    # retired module — its next char is a space, never the ``a`` of ``catalog`` —
+    # so it traps the dangling import without catching the new name.
+    ("sp_validation.cat import", "sp_validation.catalog import"),
 )
 EXCLUDED_DIRS = {
     ".git",

From 16822ddabfa7b828ab9443668cdc598a2fe101a0 Mon Sep 17 00:00:00 2001
From: Martin Kilbinger <martin.kilbinger@cea.fr>
Date: Tue, 23 Jun 2026 15:54:17 +0200
Subject: [PATCH 15/16] Deleted obsolete script cosmo_val/match_LF_SP.py

---
 cosmo_val/match_LF_SP.py | 352 ---------------------------------------
 1 file changed, 352 deletions(-)
 delete mode 100644 cosmo_val/match_LF_SP.py

diff --git a/cosmo_val/match_LF_SP.py b/cosmo_val/match_LF_SP.py
deleted file mode 100644
index 802365e4..00000000
--- a/cosmo_val/match_LF_SP.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: Python 3 (ipykernel)
-#     language: python
-#     name: python3
-# ---
-
-# # Match UNIONS ShapePipe and LensFit catalogues
-
-# !pip install -U scikit-learn
-
-import os
-import numpy as np
-from scipy import stats
-import matplotlib.pyplot as plt
-from astropy.io import fits
-from astropy.coordinates import match_coordinates_sky
-import csv
-import astropy.units as u
-from astropy.coordinates import SkyCoord
-from uncertainties import ufloat   
-from sklearn import preprocessing
-
-from sp_validation import catalog as cat
-
-# ## Set-up
-
-# Maximum separation between objects defined as match
-max_sep = 0.5 * u.arcsec
-
-# +
-# Input UNIONS data
-
-sp_version = '1.0'
-lf_version = '1'
-version = sp_version
-
-path_to_unions_data = '/n17data/mkilbing/astro/data/CFIS/v1.0/ShapePipe/'
-#path_to_unions_data = f'{os.environ["HOME"]}/astro/data/UNIONS/v{version}'
-
-# Input catalogues
-sp_cat_name = f'{path_to_unions_data}/ShapePipe/unions_shapepipe_extended_2022_v{sp_version}.fits'
-lf_cat_name = f'{path_to_unions_data}/Lensfit/lensfit_goldshape_2022v{lf_version}.fits'
-
-# Input masks
-lf_mask_name = f'{path_to_unions_data}/Lensfit/masks/CFIS3500_THELI_mask_hp_4096.fits'
-sp_mask_name = f'{path_to_unions_data}/ShapePipe/masks/healpix/nside_1024/mask_all.fits'
-
-# +
-# Output directory
-direc = os.environ['HOME'] + '/astro/data/CFIS/v1.0/matched_LF_SP'
-
-# Masked catalogue file names (here used as in- and out-put)
-path_sp_masked = f'{direc}/masked_{os.path.basename(sp_cat_name)}'
-path_lf_masked = f'{direc}/masked_{os.path.basename(lf_cat_name)}'
-# -
-
-# ## Masking
-
-# !./check_footprint.py -m {lf_mask_name} -i {sp_cat_name} -g 0 --output_path {path_sp_masked}
-
-# !./check_footprint.py -m {sp_mask_name} -i {lf_cat_name} -g 1 --output_path {path_lf_masked}
-
-# ## Read catalogues
-
-hdu_list_sp = fits.open(f'{path_sp_masked}')
-sp_data = hdu_list_sp[1].data
-sp_header = hdu_list_sp[0].header
-
-hdu_list_lf = fits.open(f'{path_lf_masked}')
-lf_data = hdu_list_lf[1].data
-lf_header = hdu_list_lf[0].header
-
-# ## Matching
-
-coord_units = u.degree
-
-# +
-# Restrict SP catalogue to LF tiles.
-# After above masking should have no effect.
-
-w_in_LF = (sp_data['mask_extern'] == 0)
-sp_data_ok = sp_data[w_in_LF]
-#sp_data_ok = sp_data
-# -
-
-print(len(sp_data), len(sp_data_ok), len(lf_data))
-
-sp_sc = SkyCoord(
-    ra=sp_data_ok['RA'] * coord_units,
-    dec=sp_data_ok['Dec'] * coord_units,
-)
-
-lf_sc = SkyCoord(
-    ra=lf_data['RA'] * coord_units,
-    dec=lf_data['Dec'] * coord_units,
-)
-
-# Match SDSS to UNIONS
-idx, d2d, d3d = match_coordinates_sky(lf_sc, sp_sc) 
-
-#test to see if we understand well match_coordinates_sky
-print(idx[0], lf_sc[0], sp_sc[idx[0]])
-
-# +
-sep_constraint = d2d < max_sep
-
-lf_matches = lf_data[sep_constraint]
-sp_matches = sp_data_ok[idx[sep_constraint]]
-# -
-
-print(len(lf_matches), len(sp_matches))
-
-print(len(sp_matches) / len(sp_data_ok))
-
-# #### Plot matching distance histograms
-
-d2d_arcsec = d2d.to('arcsec').value
-plt.hist(d2d_arcsec, bins=500, range=(0, 5), density=True, log=True)
-plt.xlabel('distance [arcsec]')
-_ = plt.ylabel('frequency')
-plt.xlim(1e-3, 5)
-_ = plt.ylim(1e-5, 5e1)
-
-plt.hist(d2d_arcsec, bins=500, range=(0, 3), density=True)
-plt.xlabel('distance [arcsec]')
-_ = plt.ylabel('frequency')
-_ = plt.ylim(0, 1)
-
-# Normalise weights
-lf_w_n = preprocessing.normalize([lf_matches['w']])
-sp_w_n = preprocessing.normalize([sp_matches['w']])
-
-# Pearson correlation coefficient
-r = stats.pearsonr(lf_matches['w'], sp_matches['w'])
-r_n = stats.pearsonr(lf_w_n[0], sp_w_n[0])
-
-print(r, r_n)
-
-# +
-nmax = -1 #10_000_000
-fig, ax = plt.subplots()
-ax.scatter(lf_matches['w'][:nmax], sp_matches['w'][:nmax], s=0.0001)
-
-plt.title('Weights')
-ax.set_xlabel('LensFit $w$')
-ax.set_ylabel('ShapePipe $w$')
-
-#ax.set_xlim(0, w_max)
-#ax.set_ylim(0, w_max)
-ax.set_box_aspect(1)
-
-plt.show()
-
-# +
-w_max = 0.00022
-nmax = -1 #10_000_000
-fig, ax = plt.subplots()
-ax.scatter(lf_w_n[0][:nmax], sp_w_n[0][:nmax], s=0.0001)
-
-ax.set_xlabel(r'LensFit $w_{\rm{n}}$')
-ax.set_ylabel(r'ShapePipe $w_{\rm{n}}$')
-plt.title('Normalised weights')
-
-ax.set_xlim(0, w_max)
-ax.set_ylim(0, w_max)
-ax.set_box_aspect(1)
-
-plt.show()
-
-# +
-# Check additive bias
-cw = {}
-cw_err = {}
-
-cat_names = ['LF', 'LF_matched', 'SP', 'SP_matched']
-
-for cn in cat_names:
-    cw[cn] = np.zeros(2)
-    cw_err[cn] = np.zeros(2)
-
-# LF
-for comp in (0, 1):
-    cw['LF'][comp] = np.average(lf_data[f'e{comp+1}'], weights=lf_data['w'])
-    variance = np.average(
-        lf_data[f'e{comp+1}'] - cw['LF'][comp]**2,
-        weights=lf_data['w'],
-    )
-    cw_err['LF'][comp] = np.sqrt(variance / len(lf_data))
-
-for comp in (0, 1):
-    cw['LF_matched'][comp] = np.average(
-        lf_matches[f'e{comp+1}'],
-        weights=lf_matches['w'],
-    )
-    variance = np.average(
-        lf_matches[f'e{comp+1}'] - cw['LF_matched'][comp]**2,
-        weights=lf_matches['w'],
-    )
-    cw_err['LF_matched'][comp] = np.sqrt(variance / len(lf_matches))
-
-# SP
-for comp in (0, 1):
-    cw['SP'][comp] = np.average(sp_data[f'e{comp+1}_uncal'], weights=sp_data['w'])
-    variance = np.average(
-        (sp_data[f'e{comp+1}_uncal'] - cw['SP'][comp])**2,
-        weights=sp_data['w'],
-    )
-    cw_err['SP'][comp] = np.sqrt(variance / len(sp_data))
-
-for comp in (0, 1):
-    cw['SP_matched'][comp] = np.average(
-        sp_matches[f'e{comp+1}_uncal'],
-        weights=sp_matches['w'],
-    )
-    variance = np.average(
-        (sp_matches[f'e{comp+1}_uncal'] - cw['SP_matched'][comp])**2,
-        weights=sp_matches['w'],
-    )
-    cw_err['SP_matched'][comp] = np.sqrt(variance / len(sp_matches))
-
-# Print results
-for cn in cat_names:
-    for comp in (0, 1):
-        c_dc = ufloat(cw[cn][comp], cw_err[cn][comp])
-        print(f'{cn} c_{comp+1} = {c_dc:.3eP}')
-
-# +
-# Metacal
-
-R_g = {}
-
-# Checkcd v
-for cn in ['SP', 'SP_matched']:
-    R_g[cn] = np.empty(shape=(2, 2))
-
-for idx in (0, 1):
-    for jdx in (0, 1):
-        R_g['SP'][idx, jdx] = np.mean(sp_data[f'R_g{idx+1}{jdx+1}'])
-        R_g['SP_matched'][idx, jdx] = np.mean(sp_matches[f'R_g{idx+1}{jdx+1}'])
-        
-print('Shear response matrices')
-for cn in ['SP', 'SP_matched']:
-    print(cn)
-    print(np.matrix(R_g[cn]))
-
-# +
-# Selection response matrix
-
-hdu_sp_orig = fits.open(sp_cat_name)
-sp_header = hdu_sp_orig[0].header
-# -
-
-R_s = np.empty(shape=(2, 2))
-for idx in (0, 1):
-    for jdx in (0, 1):
-        R_s[idx][jdx] = sp_header[f'R_S{idx+1}{jdx+1}']
-
-# +
-R = R_g['SP_matched'] + R_s
-
-print('Total response matrix for matched SP cat R =')
-print(np.matrix(R))
-# -
-
-# Test corrected additive bias
-R_SP = np.matrix(R_g['SP'] + R_s)
-print(R_SP)
-Rm1_SP = np.linalg.inv(R_SP)
-c_corr = Rm1_SP.dot(cw['SP'])
-print(cw['SP'])
-print(c_corr)
-
-# +
-# Apply metacal
-
-e_uncal_minus_c = np.array([
-    sp_matches['e1_uncal'] - cw['SP_matched'][0],
-    sp_matches['e2_uncal'] - cw['SP_matched'][1]
-])
-Rm1 = np.linalg.inv(R)
-e_cal = Rm1.dot(e_uncal_minus_c)
-
-# +
-# Write matched catalogues
-
-# LF
-out_path_lf_masked_matched = f'{direc}/masked_matched_{os.path.basename(lf_cat_name)}'
-
-ra = lf_matches['ra']
-dec = lf_matches['dec']
-g = [lf_matches['e1'], lf_matches['e2']]
-w = lf_matches['w']
-mag = lf_matches['mag']
-R = np.array([[0, 0], [0, 0]])
-R_shear = R
-R_select = R
-c = cw['LF_matched']
-c_err = cw_err['LF_matched']
-
-cat.write_shape_catalog(
-    out_path_lf_masked_matched,
-    ra,
-    dec,
-    g,
-    w,
-    mag,
-    R,
-    R_shear,
-    R_select,
-    c,
-    c_err,
-)   
-
-# +
-# SP
-out_path_sp_masked_matched = f'{direc}/masked_matched_{os.path.basename(sp_cat_name)}'
-
-ra = sp_matches['ra']
-dec = sp_matches['dec']
-g = e_cal
-w = sp_matches['w']
-mag = sp_matches['mag']
-R_shear = R_g['SP_matched']
-R_select = R
-c = cw['SP_matched']
-c_err = cw_err['SP_matched']
-
-cat.write_shape_catalog(
-    out_path_sp_masked_matched,
-    ra,
-    dec,
-    g,
-    w,
-    mag,
-    R,
-    R_shear,
-    R_select,
-    c,
-    c_err,
-) 
-# -
-
-

From 52c0e5deda92adf79617bed4cdf8541e57ad460d Mon Sep 17 00:00:00 2001
From: Martin Kilbinger <martin.kilbinger@cea.fr>
Date: Tue, 23 Jun 2026 15:59:16 +0200
Subject: [PATCH 16/16] Deleted old scripts/create_joint_shape_cat.py

---
 scripts/create_joint_shape_cat.py | 593 ------------------------------
 1 file changed, 593 deletions(-)
 delete mode 100755 scripts/create_joint_shape_cat.py

diff --git a/scripts/create_joint_shape_cat.py b/scripts/create_joint_shape_cat.py
deleted file mode 100755
index 4423d5cc..00000000
--- a/scripts/create_joint_shape_cat.py
+++ /dev/null
@@ -1,593 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-
-import numpy as np
-import copy
-from astropy.io import ascii
-from astropy.io import fits
-from optparse import OptionParser
-
-from cs_util import logging
-from cs_util import cat
-from cs_util import plots
-
-from sp_validation.catalog import *
-from sp_validation.calibration import *
-
-
-
-class param:
-    """Param Class.
-
-    General class to store (default) variables.
-
-    """
-
-    def __init__(self, **kwds):
-        self.__dict__.update(kwds)
-
-    def print(self, **kwds):
-        """Print."""
-        print(self.__dict__)
-
-    def var_list(self, **kwds):
-        """Get Variable List."""
-        return vars(self)
-
-
-
-def params_default():
-    """Set default parameter values.
-
-    Parameters
-    ----------
-    None
-
-    Returns
-    -------
-    p_def: class param
-        parameter values
-    """
-
-    p_def = param(
-        survey = 'v1',
-        star_cat_base_path='./star_cat',
-    )
-
-    return p_def
-
-
-def parse_options(p_def):
-    """Parse command line options.
-
-    Parameters
-    ----------
-    p_def : class param
-        parameter values
-
-    Returns
-    -------
-    tuple
-        Command line options
-    str
-        Command line str
-
-    """
-    usage  = "%prog [OPTIONS]"
-    parser = OptionParser(usage=usage)
-
-    parser.add_option(
-        '-s',
-        '--survey',
-        dest='survey',
-        type='string',
-        help='survey, allowed is \'v1\'|\'test\'|\'v1_small\'|\'Pa+Pb+...\'',
-    )
-    parser.add_option(
-        '-S',
-        '--star_cat_base_path',
-        dest='star_cat_base_path',
-        default=p_def.star_cat_base_path,
-        type='string',
-        help=f'star catalogue base path, default is {p_def.star_cat_base_path}',
-    )
-
-    parser.add_option(
-        '-v',
-        '--verbose',
-        dest='verbose',
-        action='store_true',
-        help='verbose output',
-    )
-
-    options, args = parser.parse_args()
-
-    return options, args
-
-
-def check_options(options):
-    """Check command line options.
-
-    Parameters
-    ----------
-    options: tuple
-        Command line options
-
-    Returns
-    -------
-    bool
-        Result of option check. False if invalid option value.
-
-    """
-    return True
-
-
-def update_param(p_def, options):
-    """Return default parameter, updated and complemented according to options.
-    
-    Parameters
-    ----------
-    p_def:  class param
-        parameter values
-    options: tuple
-        command line options
-    
-    Returns
-    -------
-    param: class param
-        updated paramter values
-    """
-
-    param = copy.copy(p_def)
-
-    # Update keys in param according to options values
-    for key in vars(param):
-        if key in vars(options):
-            setattr(param, key, getattr(options, key))
-
-    # Add remaining keys from options to param
-    for key in vars(options):
-        if not key in vars(param):
-            setattr(param, key, getattr(options, key))
-
-    return param
-
-
-
-def get_R(fname_base, key_base=None):
-
-    dat = ascii.read(f'{fname_base}.txt')
-    if dat[-1]['patch'] != 'all':
-        raise ValueError(
-            f'Invalid file {fname_base}, last row does not correspond to patch=\'all\''
-        )
-    R = np.empty(shape=(2, 2))
-
-    if not key_base:
-        key_base = fname_base
-
-    R[0, 0] = dat[-1][f'{key_base}_11']
-    R[0, 1] = dat[-1][f'{key_base}_12']
-    R[1, 0] = dat[-1][f'{key_base}_21']
-    R[1, 1] = dat[-1][f'{key_base}_22']
-
-    return R
-
-
-def merge_catalogues(
-    patches,
-    base_path,
-    input_sub_path,
-    output_path,
-    sh,
-    R_select=None,
-    return_mean_e=False,
-    return_mean_R_shear=False,
-    hdu_in=1,
-    hist_base=None,
-    verbose=False,
-):
-    """Merge Catalogues
-
-    Merge catalogues from sub-patches into one FITS file.
-
-    Parameters
-    ----------
-    patches : list of str
-        list of patches/sub-directories
-    base_path : str
-        base path
-    input_sub_path : str
-        common part of input path (below patch)
-    output_path : str
-        output file path
-    sh : str
-        shape measurement method, for DES weights use 'ngmix'
-    R_select : np.array(2, 2), optional
-        selection matrix
-    return_mean_e : bool, optional
-        return mean ellipticity if `True`; default is `False`
-    return_mean_R_shear : bool, optional
-        return mean response matrix if `True`; default is `False`
-    hdu_in : int, optional
-        input data HD, default is `1` 
-    verbose : bool, optional
-        verbose output if `True`; default is `False`
-
-    Returns
-    -------
-    dict
-        additive bias
-    dict
-        response matrix (multiplicative bias component)
-
-    """
-    dat_all = {}
-    for idx, patch in enumerate(patches):
-
-        if verbose:
-            print(' ', patch)
-
-        input_path = f'{base_path}/{patch}/{input_sub_path}'
-        try:
-            dat = fits.getdata(input_path, hdu_in)
-        except:
-            print(f"No data found in file {input_path} at HDU #{hdu_in}")
-            print(f"Trying at HDU #{hdu_in-1}")
-            dat = fits.getdata(input_path, hdu_in - 1)
-
-        if idx == 0:
-            col_names = dat.dtype.names
-            for name in col_names:
-                if name != 'w':
-                    dat_all[name] = []
-                else:
-                    dat_all[name+'_iv'] = []
-            dat_all['patch'] = []
-        for name in col_names:
-            if name != 'w':
-                dat_all[name] = np.append(dat_all[name], dat[name])
-            else:
-                dat_all[name+'_iv'] = np.append(dat_all[name+'_iv'], dat[name])
-
-        # Add patch number
-        dat_all['patch'] = np.append(dat_all['patch'], [idx + 1] * len(dat))
-
-    col_names = col_names + ('patch',)
-
-    # Compute column for the DES weights (Gatti et al. 2021)
-    if sh == 'ngmix':
-        if verbose:
-            print(f"Compute DES weights for the combined catalogue.")
-        name = 'w_des'
-        num_bins = 20
-        dat_all['w_des'] = get_w_des(dat_all, num_bins)
-        col_names = col_names + ('w_des',)
-
-    column_all = []
-    for name in col_names:
-        if name != 'patch':
-            my_format = 'D'
-        else:
-            my_format = 'I'
-        if name == 'w':
-            name = 'w_iv'
-        column = fits.Column(name=name, array=dat_all[name], format=my_format)
-        column_all.append(column)
-
-    # Compute bias parameters if required
-    c = np.empty(shape=(2))
-    if return_mean_e:
-        for idx in (0, 1):
-            if 'w_des' in col_names:
-                name = 'w_des'
-            else:
-                name = 'w_iv'
-            c[idx] = np.average(
-                dat_all[f'e{idx+1}_uncal'],
-                weights=dat_all[name]
-        )
-
-    R_shear = np.empty(shape=(2, 2))
-    R_shear_all = []
-    labels = []
-    if return_mean_R_shear:
-        for idx in (0, 1):
-            for jdx in (0, 1):
-                R_shear[idx][jdx] = np.mean(dat_all[f'R_g{idx+1}{jdx+1}'])
-
-                R_shear_all.append(dat_all[f'R_g{idx+1}{jdx+1}'])
-                labels.append(f'$R_{{{idx+1}{jdx+1}}}$')
-
-    if hist_base is not None:
-        # Plot histogram of R_shear elements
-        title = 'Shear response'
-        x_label = 'response matrix element'
-        y_label = 'frequency'
-        x_range = (-3, 3)
-        n_bins = 500
-        out_path = f'R_{hist_base}.pdf'
-        colors = ['blue', 'red','blue', 'red']                                          
-        linestyles = ['-', '-', ':', ':']
-
-        plots.plot_histograms(
-            R_shear_all,
-            labels,
-            title,
-            x_label,
-            y_label,
-            x_range,
-            n_bins,
-            out_path,
-            colors=colors,
-            linestyles=linestyles,
-        )
-
-    if R_select is not None:
-        R = R_shear + R_select
-        if verbose:
-            print('Performing calibration on the whole catalog.')
-        # Compute the calibration on the whole catalog for the ellipticities
-        # Find the index of the columns e1 and e2 of calibrated ellipticities
-        index_e1 = next((i for i, col in enumerate(column_all) if col.name == 'e1'), None)
-        index_e2 = next((i for i, col in enumerate(column_all) if col.name == 'e2'), None)
-
-        # Perform the calibration
-        g = np.array([dat_all['e1_uncal'], dat_all['e2_uncal']])
-        Rm1 = np.linalg.inv(R)
-
-        c_corr = Rm1.dot(c)
-        g_corr = Rm1.dot(g)
-        for comp in (0, 1):
-            g_corr[comp] = g_corr[comp] - c_corr[comp]
-
-        # Replace the columns
-        dat_all['e1'] = g_corr[0]
-        dat_all['e2'] = g_corr[1]
-        column_all[index_e1] = fits.Column(name='e1', array=g_corr[0], format='D')
-        column_all[index_e2] = fits.Column(name='e2', array=g_corr[1], format='D')
-
-        # Compute the leakage coefficients for each object
-        if sh == 'ngmix':
-            if verbose:
-                print('Computing leakage coefficients per object.')
-            num_bins = 20
-            weight_type = 'des' if 'w_des' in col_names else 'iv'
-            alpha_1, alpha_2 = get_alpha_leakage_per_object(dat_all, num_bins, weight_type)
-            # Add the columns to the fits file
-            column_all.append(fits.Column(name='alpha_1', array=alpha_1, format='D'))
-            column_all.append(fits.Column(name='alpha_2', array=alpha_2, format='D'))
-
-
-        if verbose:
-            print(f"Wrting file {output_path}")
-        cat.write_fits_BinTable_file(column_all, output_path, R, R_shear, R_select, c)
-    else:
-        if verbose:
-            print('No calibration performed on the whole catalog.')
-        g_corr = None
-        cat.write_fits_BinTable_file(column_all, output_path)
-
-    return c, R_shear, g_corr
-
-
-def main(argv=None):
-    """Main
-
-    Main program
-    """
-
-    # Set default parameters
-    p_def = params_default()
-
-    # Command line options
-    options, args = parse_options(p_def)
-    # Without option parsing, this would be: args = argv[1:]
-
-    if check_options(options) is False:
-        return 1
-
-    param = update_param(p_def, options)
-
-    logging.log_command(argv)
-
-    if param.survey == 'v1':
-        n_patch = 7
-        patches = [f'P{x}' for x in np.arange(n_patch) + 1]
-    elif param.survey == 'test':
-        patches = ['P7', 'W3', 'S4']
-    elif param.survey == 'v1_small':
-        patches = ['W3', 'P7']
-    else:
-        patches = param.survey.split("+")
-    
-    sh = 'ngmix'
-
-    survey = 'unions'
-    pipeline = 'shapepipe'
-    year = 2024
-    version = '1.4.a'
-
-    additive_bias = 'from_extended'
-    shear_response = 'from_extended'
-
-
-    #if additive_bias == 'from_extended':
-        #return_mean_e = True
-    #else:
-        #return_mean_e = False
-    return_mean_e = additive_bias == "from_extended"
-    return_mean_R_shear = shear_response == 'from_extended'
-
-    R_select = get_R('R_select')
-
-    # Extended catalogue
-    if param.verbose:
-        print('Merging extended catalogue')
-    base_path = "."
-    input_sub_path = f'sp_output/shape_catalog_extended_{sh}.fits'
-    output_path = f'{survey}_{pipeline}_extended_{year}_v{version}.fits'
-    c_ext, R_shear_ext, g_corr = merge_catalogues(
-        patches,
-        base_path,
-        input_sub_path,
-        output_path,
-        sh,
-        R_select=R_select,
-        verbose=param.verbose,
-        return_mean_e=return_mean_e,
-        return_mean_R_shear=return_mean_R_shear,
-        hist_base=sh,
-    )
-
-    fname = 'c.txt'
-    dat = ascii.read(fname)
-    if dat[-1]['patch'] != 'all':
-        raise ValueError(
-            f'Invalid file {fname}, last row does not correspond to patch=\'all\''
-        )
-    c_err = np.empty(2)
-    c_err[0] = dat[-1]['dmcw_1']
-    c_err[1] = dat[-1]['dmcw_2']
-
-    c = np.empty(2)
-    if additive_bias == 'from_extended':
-        print('Getting additive bias from extended catalog')
-        c = c_ext
-    else:
-        print('Getting additive bias from combined run')
-        c[0] = dat[-1]['cw_1']
-        c[1] = dat[-1]['cw_2']
-
-    if shear_response == 'from_extended':
-        print('Getting shear response from extended catalog')
-        R_shear = R_shear_ext
-        R = R_shear + R_select
-    else:
-        R_shear = get_R('R_shear', key_base='R_shear')
-
-        # Check of total R
-        R = get_R('R', key_base='R_tot')
-        print('R - R_shear + R_select = 0 ?')
-        print(R - R_shear - R_select)
-
-    # Invert total response matrix
-    Rm1 = np.linalg.inv(R)
-
-    # Base catalogue
-    ra_all = np.array([])
-    dec_all = np.array([])
-    g1_corr_mc_all = np.array([]) if g_corr is None else g_corr[0] #The columns are already computed from extended catalogue
-    g2_corr_mc_all = np.array([]) if g_corr is None else g_corr[1]
-    w_all = np.array([])
-    mag_all = np.array([])
-    snr_all = np.array([])
-    patch_all = np.array([])
-    if param.verbose:
-        print('Merging base catalogue')
-    for idx, patch in enumerate(patches):
-
-        if param.verbose:
-            print(' ', patch)
-
-        input_path = f'{patch}/sp_output/shape_catalog_{sh}.fits'
-        ra, dec, g1, g2, w, mag, _ = read_shape_catalog(input_path, w_name="w_iv")
-
-        ra_all = np.append(ra_all, ra)
-        dec_all = np.append(dec_all, dec)
-        w_all = np.append(w_all, w)
-        mag_all = np.append(mag_all, mag)
-        patch_all = np.append(patch_all, [idx + 1] * len(ra))
-
-        #Only performs the calibration if the calibration is not done on the extended catalog
-        if g_corr is None:
-            g = np.array([g1, g2])
-
-            # Calibrate with global R and c
-            c_corr = Rm1.dot(c)
-
-            g_corr_mc = Rm1.dot(g)
-            for comp in (0, 1):
-                g_corr_mc[comp] = g_corr_mc[comp] - c_corr[comp]
-            g1_corr_mc_all = np.append(g1_corr_mc_all, g_corr_mc[0])
-            g2_corr_mc_all = np.append(g2_corr_mc_all, g_corr_mc[1])
-
-    if sh == 'ngmix':
-        ext_cat = fits.getdata(output_path , 1)
-        w_all = ext_cat['w_des']
-        e1_psf = ext_cat['e1_PSF']
-        e2_psf = ext_cat['e2_PSF']
-        alpha_1 = ext_cat['alpha_1']
-        alpha_2 = ext_cat['alpha_2']
-        e1_noleakage = g1_corr_mc_all - alpha_1 * e1_psf
-        e2_noleakage = g2_corr_mc_all - alpha_2 * e2_psf
-    output_path = f'{survey}_{pipeline}_{year}_v{version}.fits'
-    g_corr_mc_all = np.array([g1_corr_mc_all, g2_corr_mc_all])
-
-    add_col_data = { 'patch' : patch_all, }
-    add_col_format = { 'patch' : 'I' }
-
-    if sh == 'ngmix':
-        add_col_data['e1_noleakage'] = e1_noleakage
-        add_col_format['e1_noleakage'] = 'D'
-        add_col_data['e2_noleakage'] = e2_noleakage
-        add_col_format['e2_noleakage'] = 'D'
-    
-    write_shape_catalog(
-        output_path, 
-        ra_all,
-        dec_all,
-        w_all,
-        mag=mag_all,
-        g=g_corr_mc_all,
-        R=R,
-        R_shear=R_shear, 
-        R_select=R_select,
-        c=c,
-        c_err=c_err,
-        w_type="des",
-        add_cols=add_col_data,
-        add_cols_format=add_col_format, 
-    )
-
-    # PSF catalogue with single-epoch shapes (HSM moments)
-    if param.verbose:
-        print('Merging PSF catalogues (with single-epoch moments shapes)')
-    # Path of ShapePipe run
-    base_path = param.star_cat_base_path
-    input_sub_path = (
-        "output/run_sp_Ms/merge_starcat_runner/output/full_starcat-0000000.fits"
-    )
-
-    output_path = f'{survey}_{pipeline}_psf_{year}_v{version}.fits'
-    merge_catalogues(
-        patches,
-        base_path,
-        input_sub_path,
-        output_path,
-        None,
-        hdu_in=2,
-        verbose=param.verbose,
-    )
-
-    # Star catalogue with multi-epoch shapes (ngmix); from matching with
-    # galaxy sample
-    if param.verbose:
-        print('Merging star catalogues (matched with galaxy catalogue for shapes')
-    base_path = "."
-    input_sub_path = f'sp_output/psf_catalog_{sh}.fits'
-    output_path = f'{survey}_{pipeline}_star_{year}_v{version}.fits'
-    merge_catalogues(
-        patches,
-        base_path,
-        input_sub_path,
-        output_path,
-        None,
-        verbose=param.verbose,
-    )
-
-
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv))