diff --git a/CLAUDE.md b/CLAUDE.md index c80029b12..aaec79433 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,6 +36,30 @@ way to get all of that is the container. sandbox with a host clone of the repo bind-mounted in and `pip install -e` pointed at it, so edits on the host are live inside the container. +**Shadow a library build with `PYTHONPATH` (no image rebuild).** When you need +the container to run a *different* build of a pure-Python library than the one +baked into its venv — a feature worktree, an unreleased branch, this repo's own +`src/` against a stale image — prepend the host checkout to `PYTHONPATH`. Python +resolves the prepended path first, so the on-disk version shadows +`/app/.venv/...` without touching the image. This is the local-testing +counterpart of a git-ref dependency (e.g. `cs_util @ develop` in +`pyproject.toml`): the dep change makes CI build the right version; the shadow +lets you test that version *now*, before any rebuild. The recipe: + +```bash +apptainer exec --bind /n17data,/automnt bash -c \ + "cd && \ + PYTHONPATH=/path/to/libfoo-checkout:/src \ + python -m pytest -o addopts='' -q" +``` + +Notes: the checkout path is the **parent** of the importable package dir (the +dir containing `foo/`, not `foo/` itself); list several `:`-separated to stack +shadows; `-o addopts=''` clears `pyproject.toml`'s pytest defaults when a plugin +they reference (e.g. `pytest-cov`) isn't in the image. Use this for a quick +verify; land the real fix as the `pyproject.toml` / `uv.lock` dep change so CI +and the next image agree. + **Testing container changes: build remotely, pull locally.** Don't `apptainer build` images on a cluster — quotas are tight and the build is slow. The loop for any change to `Dockerfile` / `pyproject.toml` / `uv.lock` is: edit diff --git a/docs/source/pipeline_tutorial.md b/docs/source/pipeline_tutorial.md index 95a0aa94a..5279b5f98 100644 --- a/docs/source/pipeline_tutorial.md +++ b/docs/source/pipeline_tutorial.md @@ -289,16 +289,15 @@ job_sp TILE_ID -j 16 is the selection of galaxies as extended objects compared to the PSF. First, the PSF model is interpolated to galaxy positions, according to the PSF model with `psfex_interp` or `mccd_interp`. Next, postage stamps around galaxies -of the weights maps are created via `vignetmaker`. Then, the spread model -is computed by the `spread_model` module. Finally, postage stamps +of the weights maps are created via `vignetmaker`. Finally, postage stamps around galaxies of single-exposure data is extracted with another call to `vignetmaker`. The output directory is -- `run_sp_MiViSmVi` if the PSF model is `mccd`; -- `run_sp_tile_PsViSmVi` for the `PSFEx` PSF model. +- `run_sp_MiViVi` if the PSF model is `mccd`; +- `run_sp_tile_PsViVi` for the `PSFEx` PSF model. -This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), `spread_model` (`Sm`), and the +This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), and the second call to `vignetmaker` (`Vi`). @@ -325,7 +324,7 @@ job_sp TILE_ID -j 64 This task first merges the `NJOB` parallel `ngmix` output files from the previous step into one output file. Then, previously obtained information are pasted into a _final_ shape catalogue via `make_cat`. Included are galaxy detection and basic measurement parameters, the PSF model at -galaxy positions, the spread-model classification, and the shape measurement. +galaxy positions, and the shape measurement. Two output directories are created. The first one is `run_sp_Ms` for the `merge_sep` run. diff --git a/example/cfis/config_make_cat_mccd.ini b/example/cfis/config_make_cat_mccd.ini index 5aa33d272..4ac61823d 100644 --- a/example/cfis/config_make_cat_mccd.ini +++ b/example/cfis/config_make_cat_mccd.ini @@ -34,7 +34,7 @@ LOG_NAME = log_sp RUN_LOG_NAME = log_run_sp # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = last:sextractor_runner_run_1, last:spread_model_runner, last:mccd_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = last:sextractor_runner_run_1, last:mccd_interp_runner, last:merge_sep_cats_runner # Output directory OUTPUT_DIR = ./output @@ -56,12 +56,10 @@ TIMEOUT = 96:00:00 # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix -#, galsim +FILE_PATTERN = sexcat, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .fits, .sqlite, .fits -#, .fits +FILE_EXT = .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -69,9 +67,4 @@ FILE_EXT = .fits, .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = True -SM_STAR_THRESH = 0.003 -SM_GAL_THRESH = 0.01 - SHAPE_MEASUREMENT_TYPE = ngmix -#, galsim diff --git a/example/cfis/config_make_cat_psfex.ini b/example/cfis/config_make_cat_psfex.ini index a7407d990..552c578e6 100644 --- a/example/cfis/config_make_cat_psfex.ini +++ b/example/cfis/config_make_cat_psfex.ini @@ -55,14 +55,14 @@ TIMEOUT = 96:00:00 [MAKE_CAT_RUNNER] # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:spread_model_runner, last:psfex_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:psfex_interp_runner, last:merge_sep_cats_runner # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix +FILE_PATTERN = sexcat, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .fits, .sqlite, .fits +FILE_EXT = .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -70,8 +70,4 @@ FILE_EXT = .fits, .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = True -SM_STAR_THRESH = 0.003 -SM_GAL_THRESH = 0.01 - SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/example/cfis/config_make_cat_psfex_nosm.ini b/example/cfis/config_make_cat_psfex_nosm.ini index 1983f91c7..fc4582325 100644 --- a/example/cfis/config_make_cat_psfex_nosm.ini +++ b/example/cfis/config_make_cat_psfex_nosm.ini @@ -71,6 +71,4 @@ FILE_EXT = .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = False - SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index eae7a2c04..d403b034b 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -11,15 +11,9 @@ FLAGS IMAFLAGS_ISO NGMIX_MCAL_FLAGS -# PSF ellipticity -NGMIX_ELL_PSFo_NOSHEAR - -# spread class -#SPREAD_CLASS - -# spread model flag and error -#SPREAD_MODEL -#SPREADERR_MODEL +# PSF ellipticity (original image PSF) +NGMIX_G1_PSF_ORIG_NOSHEAR +NGMIX_G2_PSF_ORIG_NOSHEAR # Number of epochs (exposures) N_EPOCH @@ -29,16 +23,26 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_ELL_1M -NGMIX_ELL_1P -NGMIX_ELL_2M -NGMIX_ELL_2P -NGMIX_ELL_NOSHEAR -#NGMIX_ELL_ERR_1M -#NGMIX_ELL_ERR_1P -#NGMIX_ELL_ERR_2M -#NGMIX_ELL_ERR_2P -NGMIX_ELL_ERR_NOSHEAR +NGMIX_G1_1M +NGMIX_G2_1M +NGMIX_G1_1P +NGMIX_G2_1P +NGMIX_G1_2M +NGMIX_G2_2M +NGMIX_G1_2P +NGMIX_G2_2P +NGMIX_G1_NOSHEAR +NGMIX_G2_NOSHEAR +#NGMIX_G1_ERR_1M +#NGMIX_G2_ERR_1M +#NGMIX_G1_ERR_1P +#NGMIX_G2_ERR_1P +#NGMIX_G1_ERR_2M +#NGMIX_G2_ERR_2M +#NGMIX_G1_ERR_2P +#NGMIX_G2_ERR_2P +NGMIX_G1_ERR_NOSHEAR +NGMIX_G2_ERR_NOSHEAR # flags NGMIX_FLAGS_1M @@ -58,11 +62,11 @@ NGMIX_T_ERR_1P NGMIX_T_ERR_2M NGMIX_T_ERR_2P NGMIX_T_ERR_NOSHEAR -NGMIX_Tpsf_1M -NGMIX_Tpsf_1P -NGMIX_Tpsf_2M -NGMIX_Tpsf_2P -NGMIX_Tpsf_NOSHEAR +NGMIX_T_PSF_RECONV_1M +NGMIX_T_PSF_RECONV_1P +NGMIX_T_PSF_RECONV_2M +NGMIX_T_PSF_RECONV_2P +NGMIX_T_PSF_RECONV_NOSHEAR # flux and error NGMIX_FLUX_1M @@ -94,10 +98,10 @@ FWHM_IMAGE FWHM_WORLD # PSF size measured on original image -NGMIX_T_PSFo_NOSHEAR +NGMIX_T_PSF_ORIG_NOSHEAR # PSF size measured on reconvolved image -# NGMIX_Tpsf_NOSHEAR +# NGMIX_T_PSF_RECONV_NOSHEAR # ngmix moment failure flag NGMIX_MOM_FAIL diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index 0df134071..bfe7c160b 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -11,15 +11,9 @@ FLAGS IMAFLAGS_ISO NGMIX_MCAL_FLAGS -# PSF ellipticity -NGMIX_ELL_PSFo_NOSHEAR - -# spread class -SPREAD_CLASS - -# spread model flag and error -SPREAD_MODEL -SPREADERR_MODEL +# PSF ellipticity (original image PSF) +NGMIX_G1_PSF_ORIG_NOSHEAR +NGMIX_G2_PSF_ORIG_NOSHEAR # Number of epochs (exposures) N_EPOCH @@ -29,16 +23,26 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_ELL_1M -NGMIX_ELL_1P -NGMIX_ELL_2M -NGMIX_ELL_2P -NGMIX_ELL_NOSHEAR -#NGMIX_ELL_ERR_1M -#NGMIX_ELL_ERR_1P -#NGMIX_ELL_ERR_2M -#NGMIX_ELL_ERR_2P -NGMIX_ELL_ERR_NOSHEAR +NGMIX_G1_1M +NGMIX_G2_1M +NGMIX_G1_1P +NGMIX_G2_1P +NGMIX_G1_2M +NGMIX_G2_2M +NGMIX_G1_2P +NGMIX_G2_2P +NGMIX_G1_NOSHEAR +NGMIX_G2_NOSHEAR +#NGMIX_G1_ERR_1M +#NGMIX_G2_ERR_1M +#NGMIX_G1_ERR_1P +#NGMIX_G2_ERR_1P +#NGMIX_G1_ERR_2M +#NGMIX_G2_ERR_2M +#NGMIX_G1_ERR_2P +#NGMIX_G2_ERR_2P +NGMIX_G1_ERR_NOSHEAR +NGMIX_G2_ERR_NOSHEAR # flags NGMIX_FLAGS_1M @@ -58,11 +62,11 @@ NGMIX_T_ERR_1P NGMIX_T_ERR_2M NGMIX_T_ERR_2P NGMIX_T_ERR_NOSHEAR -NGMIX_Tpsf_1M -NGMIX_Tpsf_1P -NGMIX_Tpsf_2M -NGMIX_Tpsf_2P -NGMIX_Tpsf_NOSHEAR +NGMIX_T_PSF_RECONV_1M +NGMIX_T_PSF_RECONV_1P +NGMIX_T_PSF_RECONV_2M +NGMIX_T_PSF_RECONV_2P +NGMIX_T_PSF_RECONV_NOSHEAR # flux and error NGMIX_FLUX_1M @@ -83,14 +87,14 @@ FLAG_TILING # magnitude, mainly for plots MAG_AUTO -# SNR from SExtractor, used for cuts on GALSIM shapes +# SNR from SExtractor, used for shape cuts SNR_WIN # PSF size measured on original image -NGMIX_T_PSFo_NOSHEAR +NGMIX_T_PSF_ORIG_NOSHEAR # PSF size measured on reconvolved image -# NGMIX_Tpsf_NOSHEAR +# NGMIX_T_PSF_RECONV_NOSHEAR # ngmix moment failure flag NGMIX_MOM_FAIL diff --git a/pyproject.toml b/pyproject.toml index 9097b3cc8..69c73f067 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "astropy>=7.0", # major 6 → 7 "astroquery", "canfar", - "cs_util>=0.1.9", + "cs_util>=0.2.1", "galsim>=2.8", "h5py", "joblib>=1.4", @@ -90,6 +90,11 @@ environments = ["sys_platform == 'linux'"] [tool.uv.sources] ngmix = { git = "https://github.com/esheldon/ngmix", tag = "v2.4.0" } +# Track cs_util's develop branch (a git ref, not a pinned release) so new +# cs_util.size / cs_util.shape helpers are available without shipping a release +# and rebuilding the image each time. shapepipe's column grammar sources size +# (T = 2*sigma^2) and ellipticity conversions from cs_util. +cs_util = { git = "https://github.com/CosmoStat/cs_util", branch = "develop" } [build-system] # Declaring a build backend makes shapepipe an installed package (not a uv diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 1bd1cee5b..22b48d811 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -7,7 +7,6 @@ :Parent modules: - ``sextractor_runner`` -- ``spread_model_runner`` - ``psfex_interp_runner`` or ``mccd_interp_runner`` - ``ngmix_runner`` @@ -21,29 +20,14 @@ This module creates a *final* catalogue combining the output of various previous module runs. This gathers all relevant information on the measured galaxies for weak-lensing post-processing. This includes galaxy detection and -basic measurement parameters, the PSF model at galaxy positions, the -spread-model classification, and the shape measurement. +basic measurement parameters, the PSF model at galaxy positions, and the +shape measurement. Module-specific config file entries =================================== -SM_DO_CLASSIFICATION : bool, optional - Adds spread-model star/galaxy classification flag as the column - ``SPREAD_CLASS`` to the output if ``True`` -SM_STAR_THRESH : float, optional - Threshold :math:`s_{\rm star, thresh}` for star selection; object is - classified as a star if - :math:`|x s + 2 \sigma_s | < s_{\textrm{star, thresh}}` - where :math:`s` is the spread model and :math:`\sigma_s` is the spread - model error; default value is ``0.003`` -SM_GAL_THRESH : float, optional - Threshold :math:`s_{\rm gal, thresh}` for galaxy selection; object is - classified as a galaxy if - :math:`s + 2 \sigma_s > s_{\textrm{gal, thresh}}` where :math:`s` is the - spread model and :math:`\sigma_s` is the spread model error; default value - is ``0.01`` SHAPE_MEASUREMENT_TYPE : list - Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` + Shape measurement method; the only supported option is ``ngmix`` SAVE_PSF_DATA : bool, optional Save PSF information if ``True``; default value is ``False`` TILE_LIST : str, optional diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 79b0571a3..ff8dd455c 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -16,7 +16,6 @@ from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -from shapepipe.utilities import galaxy def get_output_name(output_dir, file_number_string): @@ -139,77 +138,6 @@ def save_sextractor_data(final_cat_file, sexcat_path, remove_vignet=True): return cat_size -def save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif=True, - star_thresh=0.003, - gal_thresh=0.01, - n_obj=-1, -): - r"""Save Spread-Model Data. - - Save the spread-model data into the final catalogue. - - Parameters - ---------- - final_cat_file : file_io.FITSCatalogue - Final catalogue - sexcat_sm_path : str - Path to spread-model catalogue to save. If ``None``, spread_model is - set to 99 - do_classif : bool - If ``True`` objects will be classified into stars, galaxies, and other, - using the classifier - :math:`{\rm class} = {\rm sm} + 2 * {\rm sm}_{\rm err}` - star_thresh : float - Threshold for star selection; object is classified as star if - :math:`|{\rm class}| <` ``star_thresh`` - gal_thresh : float - Threshold for galaxy selection; object is classified as galaxy if - :math:`{\rm class} >` ``gal_thresh`` - nobj : int, optional - Number of objects, only used if sexcat_sm_path is ``-1`` - - Returns - ------- - int - Number of objects saved - """ - final_cat_file.open() - - if sexcat_sm_path is not None: - sexcat_sm_file = file_io.FITSCatalogue( - sexcat_sm_path, - SEx_catalogue=True, - ) - sexcat_sm_file.open() - - sm = np.copy(sexcat_sm_file.get_data()["SPREAD_MODEL"]) - sm_err = np.copy(sexcat_sm_file.get_data()["SPREADERR_MODEL"]) - - sexcat_sm_file.close() - - else: - sm = np.ones(n_obj) * 99 - sm_err = np.ones(n_obj) * 99 - - final_cat_file.add_col("SPREAD_MODEL", sm) - final_cat_file.add_col("SPREADERR_MODEL", sm_err) - - if do_classif: - obj_flag = np.ones_like(sm, dtype="int16") * 2 - classif = sm + 2.0 * sm_err - obj_flag[np.where(np.abs(classif) < star_thresh)] = 0 - obj_flag[np.where(classif > gal_thresh)] = 1 - - final_cat_file.add_col("SPREAD_CLASS", obj_flag) - - final_cat_file.close() - - return n_obj - - class SaveCatalogue: """Save Catalogue. @@ -243,7 +171,7 @@ def process( Parameters ---------- mode : str - Run mode, options are ``ngmix``, ``galsim`` or ``psf`` + Run mode, options are ``ngmix`` or ``psf`` cat_path : str Path to input catalogue moments : bool @@ -263,15 +191,13 @@ def process( err_msg = None if mode == "ngmix": err_msg = self._save_ngmix_data(cat_path, moments) - elif mode == "galsim": - self._save_galsim_shapes(cat_path) elif mode == "psf": self._save_psf_data(cat_path) else: err_msg = ( f"Invalid process mode ({mode}) for " - + '``make_cat.Savecatalogue``. Options are "ngmix", ' - + '"galsim" or "psf".' + + '``make_cat.Savecatalogue``. Options are "ngmix" ' + + 'or "psf".' ) if err_msg is None: @@ -329,10 +255,26 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. + Column grammar: ``NGMIX[m]_[_ERR][_]_``, + plus three OBJECT/SHEAR-less per-object metadata columns + (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, + ``NGMIX_MCAL_TYPES_FAIL``). The galaxy is the implicit default object + and carries NO ``OBJECT`` token (``NGMIX_G1_NOSHEAR``, dropping the + ``GAL`` segment carried by the pre-#761 names). The explicit PSF + objects are ``PSF_ORIG`` + (the original image PSF, fit by + :func:`ngmix.average_original_psf`) and ``PSF_RECONV`` (the metacal + reconvolution kernel, fit by :func:`ngmix.average_multiepoch_psf`); + see those functions for what each PSF family IS. ``PSF_ORIG`` and + ``PSF_RECONV`` are independent fits of different PSFs, no longer the + single aliased value of the pre-fix code (shapepipe#749). + Parameters ---------- ngmix_cat_path : str Path to NGMIX catalogue + moments : bool, optional + If True, write the parallel ``NGMIXm_*`` (moments-branch) columns. """ self._key_ends = ["1M", "1P", "2M", "2P", "NOSHEAR"] @@ -374,29 +316,49 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" + # Galaxy (no object token), original image PSF (PSF_ORIG) and metacal + # reconvolution kernel (PSF_RECONV); see ngmix.average_original_psf / + # average_multiepoch_psf for what each PSF family is. G1/G2 are scalar + # reduced-shear components, not a 2-vector. Sentinels: + # sizes/fluxes/mags/flags 0, *_ERR fluxes/mags -1, ellipticities -10, + # *_ERR sizes 1e30. for key_str in ( f"{prefix}_T_", - f"{prefix}_Tpsf_", f"{prefix}_SNR_", f"{prefix}_FLUX_", f"{prefix}_MAG_", f"{prefix}_FLAGS_", - f"{prefix}_T_PSFo_", + f"{prefix}_T_PSF_ORIG_", + f"{prefix}_T_PSF_RECONV_", ): self._update_dict(key_str, np.zeros(n_obj)) - for key_str in (f"NGMIX{m}_FLUX_ERR_", f"NGMIX{m}_MAG_ERR_"): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) for key_str in ( - f"NGMIX{m}_ELL_", - f"NGMIX{m}_ELL_ERR_", - f"NGMIX{m}_ELL_PSFo_", + f"{prefix}_FLUX_ERR_", + f"{prefix}_MAG_ERR_", ): - self._update_dict(key_str, np.ones((len(self._obj_id), 2)) * -10.0) - self._update_dict( - f"NGMIX{m}_T_ERR_", - np.ones(len(self._obj_id)) * 1e30, - ) - self._add2dict(f"NGMIX{m}_MCAL_FLAGS", np.zeros(len(self._obj_id))) + self._update_dict(key_str, np.ones(n_obj) * -1) + for key_str in ( + f"{prefix}_G1_", + f"{prefix}_G2_", + f"{prefix}_G1_ERR_", + f"{prefix}_G2_ERR_", + f"{prefix}_G1_PSF_ORIG_", + f"{prefix}_G2_PSF_ORIG_", + f"{prefix}_G1_ERR_PSF_ORIG_", + f"{prefix}_G2_ERR_PSF_ORIG_", + f"{prefix}_G1_PSF_RECONV_", + f"{prefix}_G2_PSF_RECONV_", + f"{prefix}_G1_ERR_PSF_RECONV_", + f"{prefix}_G2_ERR_PSF_RECONV_", + ): + self._update_dict(key_str, np.ones(n_obj) * -10.0) + for key_str in ( + f"{prefix}_T_ERR_", + f"{prefix}_T_ERR_PSF_ORIG_", + f"{prefix}_T_ERR_PSF_RECONV_", + ): + self._update_dict(key_str, np.ones(n_obj) * 1e30) + self._add2dict(f"{prefix}_MCAL_FLAGS", np.zeros(n_obj)) for idx, id_tmp in enumerate(self._obj_id): ind = np.where(id_tmp == ngmix_id)[0] @@ -406,46 +368,84 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data = ngmix_cat_file.get_data(key) - g = (ncf_data["g1"][ind[0]], ncf_data["g2"][ind[0]]) - g_err = ( - ncf_data["g1_err"][ind[0]], - ncf_data["g2_err"][ind[0]], + # Galaxy shape, size and photometry. + self._add2dict( + f"{prefix}_G1_{key}", ncf_data["g1"][ind[0]], idx ) - - self._add2dict(f"NGMIX{m}_ELL_{key}", g, idx) - self._add2dict(f"NGMIX{m}_ELL_ERR_{key}", g_err, idx) - - t = ncf_data["T"][ind[0]] - t_err = ncf_data["T_err"][ind[0]] - tpsf = ncf_data["Tpsf"][ind[0]] - self._add2dict(f"NGMIX{m}_T_{key}", t, idx) - self._add2dict(f"NGMIX{m}_T_ERR_{key}", t_err, idx) - self._add2dict(f"NGMIX{m}_Tpsf_{key}", tpsf, idx) - - s2n = ncf_data["s2n"][ind[0]] - self._add2dict(f"NGMIX{m}_SNR_{key}", s2n, idx) - - flux = ncf_data["flux"][ind[0]] - flux_err = ncf_data["flux_err"][ind[0]] - self._add2dict(f"NGMIX{m}_FLUX_{key}", flux, idx) - self._add2dict(f"NGMIX{m}_FLUX_ERR_{key}", flux_err, idx) - - mag = ncf_data["mag"][ind[0]] - mag_err = ncf_data["mag_err"][ind[0]] - self._add2dict(f"NGMIX{m}_MAG_{key}", mag, idx) - self._add2dict(f"NGMIX{m}_MAG_ERR_{key}", mag_err, idx) - - flags = ncf_data["flags"][ind[0]] - self._add2dict(f"NGMIX{m}_FLAGS_{key}", flags, idx) - - g_psf = ( - ncf_data["g1_psfo_ngmix"][ind[0]], - ncf_data["g2_psfo_ngmix"][ind[0]], + self._add2dict( + f"{prefix}_G2_{key}", ncf_data["g2"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_{key}", + ncf_data["g1_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_{key}", + ncf_data["g2_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_{key}", ncf_data["T"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_{key}", + ncf_data["T_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_SNR_{key}", ncf_data["s2n"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLUX_{key}", + ncf_data["flux"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLUX_ERR_{key}", + ncf_data["flux_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_MAG_{key}", ncf_data["mag"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_MAG_ERR_{key}", + ncf_data["mag_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLAGS_{key}", + ncf_data["flags"][ind[0]], idx ) - self._add2dict(f"NGMIX{m}_ELL_PSFo_{key}", g_psf, idx) - t_psfo = ncf_data["Tpsf"][ind[0]] - self._add2dict(f"NGMIX{m}_T_PSFo_{key}", t_psfo, idx) + # Original image PSF (average_original_psf) and metacal + # reconvolution kernel (average_multiepoch_psf). Both PSF + # families share ONE write template, so the FITS column + # name (``{obj}``) and the res-key it reads (``{family}``) + # are generated from the same pair and cannot drift apart. + for family, obj in ( + ("orig", "PSF_ORIG"), + ("reconv", "PSF_RECONV"), + ): + self._add2dict( + f"{prefix}_G1_{obj}_{key}", + ncf_data[f"g1_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_{obj}_{key}", + ncf_data[f"g2_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_{obj}_{key}", + ncf_data[f"g1_err_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_{obj}_{key}", + ncf_data[f"g2_err_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_{obj}_{key}", + ncf_data[f"T_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_{obj}_{key}", + ncf_data[f"T_err_psf_{family}"][ind[0]], idx + ) self._add2dict( f"NGMIX{m}_MCAL_FLAGS", @@ -469,119 +469,6 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): return None - def _save_galsim_shapes(self, galsim_cat_path): - """Save GalSim Shapes. - - Save the GalSim catalogue into the final one. - - Parameters - ---------- - galsim_cat_path : str - Path to GalSim catalogue to save - - """ - galsim_cat_file = file_io.FITSCatalogue(galsim_cat_path) - galsim_cat_file.open() - - self._key_ends = galsim_cat_file.get_ext_name()[1:] - - galsim_id = galsim_cat_file.get_data()["id"] - - for key_str in ( - "GALSIM_GAL_SIGMA_", - "GALSIM_PSF_SIGMA_", - "GALSIM_FLUX_", - "GALSIM_MAG_", - ): - self._update_dict(key_str, np.zeros(len(self._obj_id))) - for key_str in ("GALSIM_FLUX_ERR_", "GALSIM_MAG_ERR_", "GALSIM_RES_"): - self._update_dict(key_str, np.ones(n_obj) * -1) - for key_str in ( - "GALSIM_GAL_ELL_", - "GALSIM_GAL_ELL_ERR_", - "GALSIM_GAL_ELL_UNCORR_", - "GALSIM_PSF_ELL_", - ): - self._update_dict(key_str, np.ones((len(self._obj_id), 2)) * -10.0) - self._update_dict( - "GALSIM_FLAGS_", - np.ones(len(self._obj_id), dtype="int16"), - ) - - for idx, id_tmp in enumerate(self._obj_id): - ind = np.where(id_tmp == galsim_id)[0] - if len(ind) > 0: - - for key in self._key_ends: - - gcf_data = galsim_cat_file.get_data(key) - - if key == "ORIGINAL_PSF": - - uncorr_g = ( - gcf_data["gal_uncorr_g1"][ind[0]], - gcf_data["gal_uncorr_g2"][ind[0]], - ) - psf_sig = gcf_data["gal_sigma"][ind[0]] - self._add2dict(f"GALSIM_PSF_ELL_{key}", uncorr_g, idx) - self._add2dict(f"GALSIM_PSF_SIGMA_{key}", psf_sig, idx) - - else: - - g = ( - gcf_data["gal_g1"][ind[0]], - gcf_data["gal_g2"][ind[0]], - ) - g_err = ( - gcf_data["gal_g1_err"][ind[0]], - gcf_data["gal_g2_err"][ind[0]], - ) - self._add2dict(f"GALSIM_GAL_ELL_{key}", g, idx) - self._add2dict(f"GALSIM_GAL_ELL_ERR_{key}", g_err, idx) - - uncorr_g = ( - gcf_data["gal_uncorr_g1"][ind[0]], - gcf_data["gal_uncorr_g2"][ind[0]], - ) - self._add2dict( - f"GALSIM_GAL_ELL_UNCORR_{key}", - uncorr_g, - idx, - ) - - sigma = gcf_data["gal_sigma"][ind[0]] - self._add2dict(f"GALSIM_GAL_SIGMA_{key}", sigma, idx) - - psf_g = ( - gcf_data["psf_g1"][ind[0]], - gcf_data["psf_g2"][ind[0]], - ) - psf_sigma = gcf_data["psf_sigma"][ind[0]] - self._add2dict(f"GALSIM_PSF_ELL_{key}", psf_g, idx) - self._add2dict( - f"GALSIM_PSF_SIGMA_{key}", - psf_sigma, - idx, - ) - - flux = gcf_data["gal_flux"][ind[0]] - flux_err = gcf_data["gal_flux_err"][ind[0]] - self._add2dict(f"GALSIM_FLUX_{key}", flux, idx) - self._add2dict(f"GALSIM_FLUX_ERR_{key}", flux_err, idx) - - mag = gcf_data["gal_mag"][ind[0]] - mag_err = gcf_data["gal_mag_err"][ind[0]] - self._add2dict(f"GALSIM_MAG_{key}", mag, idx) - self._add2dict(f"GALSIM_MAG_ERR_{key}", mag_err, idx) - - flags = gcf_data["gal_flag"][ind[0]] - self._add2dict(f"GALSIM_FLAGS_{key}", flags, idx) - - res = gcf_data["gal_resolution"][ind[0]] - self._add2dict(f"GALSIM_RES_{key}", res, idx) - - galsim_cat_file.close() - def _save_psf_data(self, galaxy_psf_path): """Save PSF data. @@ -598,20 +485,29 @@ def _save_psf_data(self, galaxy_psf_path): max_epoch = np.max(self._final_cat_file.get_data()["N_EPOCH"]) + 1 self._output_dict = { - f"PSF_ELL_{idx + 1}": np.ones((len(self._obj_id), 2)) * -10.0 + f"HSM_E1_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) } self._output_dict = { **self._output_dict, **{ - f"PSF_FWHM_{idx + 1}": np.zeros(len(self._obj_id)) + f"HSM_E2_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 + for idx in range(max_epoch) + }, + } + self._output_dict = { + **self._output_dict, + **{ + f"HSM_T_PSF_{idx + 1}": np.zeros(len(self._obj_id)) for idx in range(max_epoch) }, } self._output_dict = { **self._output_dict, **{ - f"PSF_FLAG_{idx + 1}": np.ones(len(self._obj_id), dtype="int16") + f"HSM_FLAG_PSF_{idx + 1}": np.ones( + len(self._obj_id), dtype="int16" + ) for idx in range(max_epoch) }, } @@ -625,21 +521,28 @@ def _save_psf_data(self, galaxy_psf_path): gpc_data = galaxy_psf_cat[str(id_tmp)][key] - if gpc_data["SHAPES"]["FLAG_PSF_HSM"] != 0: + if gpc_data["SHAPES"]["HSM_FLAG_PSF"] != 0: continue - e_psf = ( - gpc_data["SHAPES"]["E1_PSF_HSM"], - gpc_data["SHAPES"]["E2_PSF_HSM"], + self._add2dict( + f"HSM_E1_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_E1_PSF"], idx + ) + self._add2dict( + f"HSM_E2_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_E2_PSF"], idx ) - self._add2dict(f"PSF_ELL_{epoch + 1}", e_psf, idx) - psf_fwhm = galaxy.sigma_to_fwhm( - gpc_data["SHAPES"]["SIGMA_PSF_HSM"] + # HSM_T_PSF already holds T (sigma_to_T applied at the + # producer's _interpolate_me); read straight through. + self._add2dict( + f"HSM_T_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_T_PSF"], idx ) - self._add2dict(f"PSF_FWHM_{epoch + 1}", psf_fwhm, idx) - flag_psf = gpc_data["SHAPES"]["FLAG_PSF_HSM"] - self._add2dict(f"PSF_FLAG_{epoch + 1}", flag_psf, idx) + self._add2dict( + f"HSM_FLAG_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_FLAG_PSF"], idx + ) galaxy_psf_cat.close() diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index fa1fc43ff..7bf293cd3 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -16,17 +16,15 @@ version="1.1", input_module=[ "sextractor_runner", - "spread_model_runner", "psfex_interp_runner", "ngmix_runner", ], file_pattern=[ "tile_sexcat", - "sexcat_sm", "galaxy_psf", "ngmix", ], - file_ext=[".fits", ".fits", ".sqlite", ".fits"], + file_ext=[".fits", ".sqlite", ".fits"], depends=["numpy", "sqlitedict"], ) def make_cat_runner( @@ -39,37 +37,11 @@ def make_cat_runner( ): """Define The Make Catalogue Runner.""" # Set input file paths - if len(input_file_list) == 3: - # No spread model input - ( - tile_sexcat_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list - sexcat_sm_path = None - else: - # With spread model input - ( - tile_sexcat_path, - sexcat_sm_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list[0:4] - if len(input_file_list) == 5: - # With second shape catalogue input - shape2_cat_path = input_file_list[4] - - # Fetch classification options - do_classif = config.getboolean( - module_config_sec, - "SM_DO_CLASSIFICATION", - ) - if do_classif: - star_thresh = config.getfloat(module_config_sec, "SM_STAR_THRESH") - gal_thresh = config.getfloat(module_config_sec, "SM_GAL_THRESH") - else: - star_thresh = None - gal_thresh = None + ( + tile_sexcat_path, + galaxy_psf_path, + shape1_cat_path, + ) = input_file_list # Fetch shape measurement type shape_type_list = config.getlist( @@ -77,10 +49,8 @@ def make_cat_runner( "SHAPE_MEASUREMENT_TYPE", ) for shape_type in shape_type_list: - if shape_type.lower() not in ["ngmix", "galsim"]: - raise ValueError( - "SHAPE_MEASUREMENT_TYPE must be in [ngmix, galsim]" - ) + if shape_type.lower() != "ngmix": + raise ValueError("SHAPE_MEASUREMENT_TYPE must be ngmix") # Fetch PSF data option if config.has_option(module_config_sec, "SAVE_PSF_DATA"): @@ -99,37 +69,12 @@ def make_cat_runner( n_obj = make_cat.save_sextractor_data(final_cat_file, tile_sexcat_path) cat_size_sextractor = n_obj - # Save spread-model data - if sexcat_sm_path is None: - w_log.info("No sm cat input, setting spread model to 99") - else: - w_log.info("Save spread-model data") - cat_size_sm = make_cat.save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif, - star_thresh, - gal_thresh, - n_obj=n_obj - ) - - if cat_size_sextractor != cat_size_sm: - w_log( - f"Warnign: SExtractor catalogue {tile_sexcat_path} has different size" - + f" ({cat_size_sextractor} than spread_model catalogue" - + f" {sexcat_sm_path} ({cat_size_sm})" - ) - # Save shape data sc_inst = make_cat.SaveCatalogue(final_cat_file, cat_size_sextractor, w_log) w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - cat_path = ( - shape2_cat_path if shape_type == "galsim" else shape1_cat_path - ) - err_msg = sc_inst.process(shape_type.lower(), cat_path) - + err_msg = sc_inst.process(shape_type.lower(), shape1_cat_path) # If error message: delete (incomplete) output file and raise error if err_msg is not None: diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index d6625bc35..bfd9f3417 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -10,6 +10,7 @@ import mccd import numpy as np +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -243,10 +244,10 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } else: data = {"VIGNET": self.interp_PSFs} @@ -339,14 +340,14 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), - "E1_STAR_HSM": self.star_shapes[:, 0], - "E2_STAR_HSM": self.star_shapes[:, 1], - "SIGMA_STAR_HSM": self.star_shapes[:, 2], - "FLAG_STAR_HSM": self.star_shapes[:, 3].astype(int), + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), + "HSM_G1_STAR": self.star_shapes[:, 0], + "HSM_G2_STAR": self.star_shapes[:, 1], + "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), + "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } data = {**data, **star_dict} @@ -536,16 +537,16 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["E1_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_G1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["E2_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_G2_PSF"] = final_list[j][2][ where_res[0] ][1] - shape_dict["SIGMA_PSF_HSM"] = final_list[j][2][ - where_res[0] - ][2] - shape_dict["FLAG_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( + final_list[j][2][where_res[0]][2] + ) + shape_dict["HSM_FLAG_PSF"] = final_list[j][2][ where_res[0] ][3] output_dict[id_tmp][final_list[j][3][where_res[0]]][ diff --git a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py index cc20b6516..02bc55a87 100644 --- a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py +++ b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py @@ -254,23 +254,23 @@ def plot_meanshapes( ) # Flag mask - star_flags = starcat[hdu_no].data["FLAG_STAR_HSM"] - psf_flags = starcat[hdu_no].data["FLAG_PSF_HSM"] + star_flags = starcat[hdu_no].data["HSM_FLAG_STAR"] + psf_flags = starcat[hdu_no].data["HSM_FLAG_PSF"] flagmask = np.abs(star_flags - 1) * np.abs(psf_flags - 1) - # convert sigma to R^2's + # size column already holds T = 2 sigma^2 all_star_shapes = np.array( [ - starcat[hdu_no].data["E1_STAR_HSM"], - starcat[hdu_no].data["E2_STAR_HSM"], - 2.0 * starcat[hdu_no].data["SIGMA_STAR_HSM"] ** 2, + starcat[hdu_no].data["HSM_G1_STAR"], + starcat[hdu_no].data["HSM_G2_STAR"], + starcat[hdu_no].data["HSM_T_STAR"], ] ) all_psf_shapes = np.array( [ - starcat[hdu_no].data["E1_PSF_HSM"], - starcat[hdu_no].data["E2_PSF_HSM"], - 2.0 * starcat[hdu_no].data["SIGMA_PSF_HSM"] ** 2, + starcat[hdu_no].data["HSM_G1_PSF"], + starcat[hdu_no].data["HSM_G2_PSF"], + starcat[hdu_no].data["HSM_T_PSF"], ] ) all_CCDs = starcat[hdu_no].data["CCD_NB"] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 44b7cf55b..943433e8f 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -14,6 +14,7 @@ import mccd import numpy as np from astropy.io import fits +from cs_util import size as cs_size from shapepipe.pipeline import file_io @@ -471,10 +472,10 @@ def shapepipe_write_output( if get_shapes: data = { "VIGNET": interp_PSFs, - "E1_PSF_HSM": PSF_shapes[:, 0], - "E2_PSF_HSM": PSF_shapes[:, 1], - "SIGMA_PSF_HSM": PSF_shapes[:, 2], - "FLAG_PSF_HSM": PSF_shapes[:, 3].astype(int), + "HSM_G1_PSF": PSF_shapes[:, 0], + "HSM_G2_PSF": PSF_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(PSF_shapes[:, 2]), + "HSM_FLAG_PSF": PSF_shapes[:, 3].astype(int), } else: data = {"VIGNET": interp_PSFs} diff --git a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py index 3b06cd075..7bc0cb76b 100644 --- a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py +++ b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py @@ -12,6 +12,7 @@ import numpy as np from astropy.io import fits +from cs_util import size as cs_size from shapepipe.pipeline import file_io @@ -344,7 +345,7 @@ def process(self): ) ) - # shapes (convert sigmas to R^2) + # shapes (convert sigmas to T = 2 sigma^2) g1_psf += list( starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 0] ) @@ -352,12 +353,16 @@ def process(self): starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 1] ) size_psf += list( - starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 2] ** 2 + cs_size.sigma_to_T( + starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 2] + ) ) g1 += list(starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 0]) g2 += list(starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 1]) size += list( - starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 2] ** 2 + cs_size.sigma_to_T( + starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 2] + ) ) # flags @@ -479,21 +484,20 @@ def process(self): SEx_catalogue=True, ) - # Collect columns - # convert back to sigma for consistency + # Collect columns (size stored as T = 2 sigma^2) data = { "X": x, "Y": y, "RA": ra, "DEC": dec, - "E1_PSF_HSM": g1_psf, - "E2_PSF_HSM": g2_psf, - "SIGMA_PSF_HSM": np.sqrt(size_psf), - "E1_STAR_HSM": g1, - "E2_STAR_HSM": g2, - "SIGMA_STAR_HSM": np.sqrt(size), - "FLAG_PSF_HSM": flag_psf, - "FLAG_STAR_HSM": flag_star, + "HSM_G1_PSF": g1_psf, + "HSM_G2_PSF": g2_psf, + "HSM_T_PSF": size_psf, + "HSM_G1_STAR": g1, + "HSM_G2_STAR": g2, + "HSM_T_STAR": size, + "HSM_FLAG_PSF": flag_psf, + "HSM_FLAG_STAR": flag_star, "CCD_NB": ccd_nb, } @@ -580,17 +584,17 @@ def process(self): ra += list(data_j["RA"]) dec += list(data_j["DEC"]) - # shapes (convert sigmas to R^2) - g1_psf += list(data_j["E1_PSF_HSM"]) - g2_psf += list(data_j["E2_PSF_HSM"]) - size_psf += list(data_j["SIGMA_PSF_HSM"] ** 2) - g1 += list(data_j["E1_STAR_HSM"]) - g2 += list(data_j["E2_STAR_HSM"]) - size += list(data_j["SIGMA_STAR_HSM"] ** 2) + # shapes (size column already holds T = 2 sigma^2) + g1_psf += list(data_j["HSM_G1_PSF"]) + g2_psf += list(data_j["HSM_G2_PSF"]) + size_psf += list(data_j["HSM_T_PSF"]) + g1 += list(data_j["HSM_G1_STAR"]) + g2 += list(data_j["HSM_G2_STAR"]) + size += list(data_j["HSM_T_STAR"]) # flags - flag_psf += list(data_j["FLAG_PSF_HSM"]) - flag_star += list(data_j["FLAG_STAR_HSM"]) + flag_psf += list(data_j["HSM_FLAG_PSF"]) + flag_star += list(data_j["HSM_FLAG_STAR"]) # misc @@ -623,21 +627,20 @@ def process(self): SEx_catalogue=False, ) - # Collect columns - # convert back to sigma for consistency + # Collect columns (size stored as T = 2 sigma^2) data = { "X": x, "Y": y, "RA": ra, "DEC": dec, - "E1_PSF_HSM": g1_psf, - "E2_PSF_HSM": g2_psf, - "SIGMA_PSF_HSM": np.sqrt(size_psf), - "E1_STAR_HSM": g1, - "E2_STAR_HSM": g2, - "SIGMA_STAR_HSM": np.sqrt(size), - "FLAG_PSF_HSM": flag_psf, - "FLAG_STAR_HSM": flag_star, + "HSM_G1_PSF": g1_psf, + "HSM_G2_PSF": g2_psf, + "HSM_T_PSF": size_psf, + "HSM_G1_STAR": g1, + "HSM_G2_STAR": g2, + "HSM_T_STAR": size, + "HSM_FLAG_PSF": flag_psf, + "HSM_FLAG_STAR": flag_star, "MAG": mag, "SNR": snr, "ACCEPTED": psfex_acc, diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 386c25efc..371264e23 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -8,23 +8,54 @@ import os import re +from typing import NamedTuple + import ngmix import galsim import numpy as np from astropy.io import fits +from cs_util import size as cs_size from modopt.math.stats import sigma_mad from ngmix.observation import Observation, ObsList from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -# Gaussian half-light radius per unit sigma: r50 = sqrt(2 ln 2) * sigma -# = 1.17741 * sigma. With the ngmix area parameter T = 2 sigma^2 this -# gives r50 = SIGMA_TO_R50 * sqrt(T / 2) = sqrt(ln 2 * T). -SIGMA_TO_R50 = np.sqrt(2.0 * np.log(2.0)) - METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') +# Noise budget for the PSF observation's flat weight map (psf_wt = +# 1/PSF_NOISE**2). Mirrors the esheldon/aguinot pattern (sigma ~ 1e-5/1e-6); +# 1e-5 is the value Axel Guinot's #749 reproduction used. The fit is driven +# by the *relative* weighting of the likelihood vs the prior, so the exact +# value is non-critical once it is finite (validated on the digital twin: the +# recovered PSF shape/size are flat across 1e-4..1e-6). See +# make_ngmix_observation. +PSF_NOISE = 1e-5 + + +class MetacalResult(NamedTuple): + """Return of :func:`do_ngmix_metacal`: the metacal fit plus both PSFs. + + A NamedTuple so the two PSF families are named, not positional — guarding + against a reconv/orig transposition at call sites. Still a plain tuple, so + ``resdict, psf_res, psf_orig_res = do_ngmix_metacal(...)`` keeps working. + + Attributes + ---------- + resdict : dict + MetacalBootstrapper result dict (one entry per metacal type). + reconv : dict + Averaged metacal *reconvolution*-kernel PSF (round, enlarged): + :func:`average_multiepoch_psf`. + orig : dict + Averaged *original* image-PSF (psfex/mccd, its true shape and size): + :func:`average_original_psf`. + """ + + resdict: dict + reconv: dict + orig: dict + def get_mcal_flags(res): """Get Metacal Flags. @@ -355,8 +386,16 @@ def compile_results(self, results): Returns ------- dict - Compiled results ready to be written to a file - note: psfo is the original image psf from psfex or mccd + Compiled results ready to be written to a file. + + Two PSF column families — each carrying ellipticity *and* size, + for *different* PSFs (shapepipe#749). See the function docstrings + for what each family IS: + + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the original + image PSF, fit by :func:`average_original_psf`. + * ``*_psf_reconv`` — the metacal reconvolution kernel, fit by + :func:`average_multiepoch_psf`. Raises ------ @@ -378,31 +417,34 @@ def compile_results(self, results): 'n_epoch_model', 'mcal_types_fail', 'nfev_fit', - 'g1_psfo_ngmix', - 'g2_psfo_ngmix', - 'g1_err_psfo_ngmix', - 'g2_err_psfo_ngmix', + # galaxy 'g1', 'g1_err', 'g2', 'g2_err', 'T', 'T_err', - 'Tpsf', - 'Tpsf_err', - 'r50', - 'r50_err', - 'r50psf', - 'r50psf_err', - 'g1_psf', - 'g2_psf', 'flux', 'flux_err', 's2n', 'mag', 'mag_err', 'flags', - 'mcal_flags' + 'mcal_flags', + # original image PSF (psfex/mccd), fit by average_original_psf + 'g1_psf_orig', + 'g2_psf_orig', + 'g1_err_psf_orig', + 'g2_err_psf_orig', + 'T_psf_orig', + 'T_err_psf_orig', + # metacal reconvolution kernel, fit by average_multiepoch_psf + 'g1_psf_reconv', + 'g2_psf_reconv', + 'g1_err_psf_reconv', + 'g2_err_psf_reconv', + 'T_psf_reconv', + 'T_err_psf_reconv', ] output_dict = {k: {kk: [] for kk in names2} for k in names} for idx in range(len(results)): @@ -439,42 +481,28 @@ def compile_results(self, results): output_dict[name]["nfev_fit"].append( fit.get("nfev", np.nan) ) - output_dict[name]["g1_psfo_ngmix"].append( - results[idx]["g_PSFo"][0] - ) - output_dict[name]["g2_psfo_ngmix"].append( - results[idx]["g_PSFo"][1] - ) - output_dict[name]["g1_err_psfo_ngmix"].append( - results[idx]["g_err_PSFo"][0] - ) - output_dict[name]["g2_err_psfo_ngmix"].append( - results[idx]["g_err_PSFo"][1] - ) - output_dict[name]["T"].append(T_gal) - output_dict[name]["T_err"].append(T_gal_err) - output_dict[name]["Tpsf"].append(results[idx]["T_PSFo"]) - output_dict[name]["Tpsf_err"].append(results[idx]["T_err_PSFo"]) - output_dict[name]["g1_psf"].append(results[idx]["g_PSFo"][0]) - output_dict[name]["g2_psf"].append(results[idx]["g_PSFo"][1]) - - # Galaxy half-light radius from the fitted area T = 2 sigma^2: - # r50 = sqrt(ln 2 * T), with d r50 / d T = r50 / (2 T) - if T_gal > 0: - r50_gal = SIGMA_TO_R50 * np.sqrt(T_gal / 2) - r50_gal_err = r50_gal * T_gal_err / (2 * T_gal) - else: - r50_gal = r50_gal_err = np.nan - output_dict[name]['r50'].append(r50_gal) - output_dict[name]['r50_err'].append(r50_gal_err) - output_dict[name]['r50psf'].append(results[idx]["r50_PSFo"]) - output_dict[name]['r50psf_err'].append( - results[idx]["r50_err_PSFo"] - ) + # The two PSF families are object-level (one value per + # object, not per shear type) and self-named: every key + # below is copied straight through from compile-loop input to + # output, so the column name *is* the value's provenance. + # *_psf_orig = original image PSF (average_original_psf) + # *_psf_reconv = reconvolution kernel (average_multiepoch_psf) + for psf_key in ( + 'g1_psf_orig', 'g2_psf_orig', + 'g1_err_psf_orig', 'g2_err_psf_orig', + 'T_psf_orig', 'T_err_psf_orig', + 'g1_psf_reconv', 'g2_psf_reconv', + 'g1_err_psf_reconv', 'g2_err_psf_reconv', + 'T_psf_reconv', 'T_err_psf_reconv', + ): + output_dict[name][psf_key].append(results[idx][psf_key]) + output_dict[name]["g1"].append(g[0]) output_dict[name]["g2"].append(g[1]) output_dict[name]["g1_err"].append(np.sqrt(g_cov[0, 0])) output_dict[name]["g2_err"].append(np.sqrt(g_cov[1, 1])) + output_dict[name]["T"].append(T_gal) + output_dict[name]["T_err"].append(T_gal_err) output_dict[name]["flux"].append(flux) output_dict[name]["flux_err"].append(flux_err) output_dict[name]["mag"].append(mag) @@ -662,7 +690,7 @@ def process(self): if tile_cat.flux is not None else 1.0 ) - res, psf_res = do_ngmix_metacal( + res, psf_res, psf_orig_res = do_ngmix_metacal( stamp, prior, flux_guess, @@ -689,18 +717,19 @@ def process(self): if res.get(k, {}).get('flags', 0) != 0 ) res['mcal_flags'] = get_mcal_flags(res) - # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with - # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma) - sigma_psfo = np.sqrt(max(psf_res['T_psf'], 0) / 2) - res['g_PSFo'] = psf_res['g_psf'] - res['g_err_PSFo'] = psf_res['g_psf_err'] - res['T_PSFo'] = psf_res['T_psf'] - res['T_err_PSFo'] = psf_res['T_psf_err'] - res['r50_PSFo'] = SIGMA_TO_R50 * sigma_psfo - res['r50_err_PSFo'] = ( - SIGMA_TO_R50 * psf_res['T_psf_err'] / (4 * sigma_psfo) - if sigma_psfo > 0 else np.nan - ) + # Two distinct PSF families (shapepipe#749), each carrying its own + # ellipticity AND size: the metacal reconvolution kernel (psf_res) + # and the original image PSF (psf_orig_res). Tag both into res from + # ONE template so the families stay symmetric — same quantities, + # parallel ``*_psf_{family}`` names — and cannot drift apart by a + # hand-edit to one and not the other. + for family, psf in (("reconv", psf_res), ("orig", psf_orig_res)): + res[f"g1_psf_{family}"] = psf["g_psf"][0] + res[f"g2_psf_{family}"] = psf["g_psf"][1] + res[f"g1_err_psf_{family}"] = psf["g_psf_err"][0] + res[f"g2_err_psf_{family}"] = psf["g_psf_err"][1] + res[f"T_psf_{family}"] = psf["T_psf"] + res[f"T_err_psf_{family}"] = psf["T_psf_err"] final_res.append(res) n_fitted += 1 count_batch += 1 @@ -945,7 +974,7 @@ def get_noise(gal, weight, guess, pixel_scale, thresh=1.2): sig_tmp = sigma_mad(gal[m_weight]) - gauss_win = galsim.Gaussian(sigma=np.sqrt(guess[4] / 2), flux=guess[5]) + gauss_win = galsim.Gaussian(sigma=cs_size.T_to_sigma(guess[4]), flux=guess[5]) gauss_win = gauss_win.shear(g1=guess[2], g2=guess[3]) gauss_win = gauss_win.drawImage( nx=img_shape[0], ny=img_shape[1], scale=pixel_scale @@ -1071,7 +1100,19 @@ def make_ngmix_observation( col=(psf.shape[1] - 1) / 2, wcs=wcs, ) - psf_obs = Observation(psf, jacobian=psf_jacob) + # A model fit always needs a weight: without one ngmix defaults to unit + # weights on a unit-flux PSF stamp, so the galaxy GPriorBA(0.4) prior + # handed to the diagnostic PSF fitter swamps the (tiny) likelihood and the + # recovered PSF shape collapses toward the prior (#749). PSFEx/MCCD models + # already carry a little noise, so a finite flat weight matching the + # esheldon/aguinot noise budget (psf_noise ~ 1e-5) suffices to restore the + # likelihood — no explicit stamp-noise injection needed (#774). This is the + # PSF analogue of the galaxy weight built just below; force float so an + # integer-typed stamp cannot truncate the weight (matching that build). + # Metacal's own PSF fit is prior-free (AdmomFitter) and so is insensitive + # to this flat weight's scale, leaving the calibration untouched. + psf_wt = np.full_like(psf, 1.0 / PSF_NOISE ** 2, dtype=float) + psf_obs = Observation(psf, weight=psf_wt, jacobian=psf_jacob) gal_masked, weight_map, noise_img = prepare_ngmix_weights( gal, weight, flag, rng, bkg_rms=bkg_rms @@ -1122,55 +1163,140 @@ def make_ngmix_observation( noise=noise_img, ) -def average_multiepoch_psf(obsdict): - """ averages psf information over multiple epochs - we may need to do this for original psf as well +def _average_psf_fits(results_and_weights): + """Weight-average a set of per-epoch ngmix PSF-fit results. + + Shared core for both PSF families this module exports: the metacal + reconvolution kernel (:func:`average_multiepoch_psf`) and the original + image PSF (:func:`average_original_psf`). Epochs whose PSF fit failed + (``flags != 0``, carrying only flags/pars and no T/g) are dropped. + Parameters ---------- - obsdict : dict - Observation dict returned by MetacalBootstrapper.go(). + results_and_weights : iterable of (dict, float) + Per-epoch ``(result, weight)`` pairs, where ``result`` is an ngmix + Fitter result with keys ``flags``, ``g``, ``g_err``, ``T``, + ``T_err`` and ``weight`` is the epoch's averaging weight. Returns ------- dict - Keys: 'g_psf', 'g_psf_err', 'T_psf', 'T_psf_err' (weighted - averages over the epochs whose PSF fit succeeded) and 'n_epoch' - (the number of those surviving epochs). + Keys ``g_psf``, ``g_psf_err``, ``T_psf``, ``T_psf_err`` (weighted + averages over the surviving epochs) and ``n_epoch`` (their count). """ - psf_dict = {} - nepoch = len(obsdict['noshear']) n_epoch_used = 0 wsum = 0 g_psf_sum = np.array([0., 0.]) g_psf_err_sum = np.array([0., 0.]) T_psf_sum = 0 T_psf_err_sum = 0 - for n_e in np.arange(nepoch): - result = obsdict['noshear'][n_e].psf.meta['result'] - # ignore_failed_psf=True drops failed-PSF epochs from the galaxy - # fit but keeps them in obsdict; their result carries only - # flags/pars (no T/g), so skip them here too. + for result, weight in results_and_weights: if result['flags'] != 0: continue - ne_wsum = obsdict['noshear'][n_e].weight.sum() - n_epoch_used += 1 - wsum += ne_wsum - g_psf_sum += result['g'] * ne_wsum - g_psf_err_sum += result['g_err'] * ne_wsum - T_psf_sum += result['T'] * ne_wsum - T_psf_err_sum += result['T_err'] * ne_wsum + wsum += weight + g_psf_sum += result['g'] * weight + g_psf_err_sum += result['g_err'] * weight + T_psf_sum += result['T'] * weight + T_psf_err_sum += result['T_err'] * weight if wsum == 0: raise ZeroDivisionError('Sum of weights = 0, division by zero') - psf_dict['g_psf'] = g_psf_sum / wsum - psf_dict['g_psf_err'] = g_psf_err_sum / wsum - psf_dict['T_psf'] = T_psf_sum / wsum - psf_dict['T_psf_err'] = T_psf_err_sum / wsum - psf_dict['n_epoch'] = n_epoch_used + return { + 'g_psf': g_psf_sum / wsum, + 'g_psf_err': g_psf_err_sum / wsum, + 'T_psf': T_psf_sum / wsum, + 'T_psf_err': T_psf_err_sum / wsum, + 'n_epoch': n_epoch_used, + } + + +def average_multiepoch_psf(obsdict): + """Average the metacal *reconvolution* PSF over epochs. + + The PSF carried by each metacal observation (``obs.psf``) is the + Gaussian reconvolution kernel that metacal fit and convolved back in — + round by construction and slightly enlarged relative to the original + PSF. This is the kernel defining the sheared galaxy images, exported to + the reconvolution-kernel columns + (``NGMIX_G1/G2_PSF_RECONV``, ``NGMIX_T_PSF_RECONV``). The independent fit + of the *original* image PSF is :func:`average_original_psf`. + + Parameters + ---------- + obsdict : dict + Observation dict returned by MetacalBootstrapper.go(). + + Returns + ------- + dict + Keys: 'g_psf', 'g_psf_err', 'T_psf', 'T_psf_err' (weighted + averages over the epochs whose PSF fit succeeded) and 'n_epoch' + (the number of those surviving epochs). + """ + # ignore_failed_psf=True drops failed-PSF epochs from the galaxy fit but + # keeps them in obsdict; _average_psf_fits skips them on flags != 0. + return _average_psf_fits( + (obs.psf.meta['result'], obs.weight.sum()) + for obs in obsdict['noshear'] + ) + + +def average_original_psf(gal_obs_list, psf_runner): + """Fit and average the *original* image PSF over epochs. + + The original PSF is the psfex/mccd model stamp handed to ngmix + (``gal_obs.psf``), fit here with the same ``psf_runner`` (hence the same + fit prior and guesser) used inside metacal, but on the PSF *before* + metacal's reconvolution. Exported to the original-PSF columns + (``NGMIX_G1/G2_PSF_ORIG``, ``NGMIX_T_PSF_ORIG``). Distinct from the + reconvolution-kernel fit (:func:`average_multiepoch_psf`): the original + PSF retains its true ellipticity and size, whereas the reconvolution + kernel is round and enlarged by construction. This is the PSF whose true + shape and size enter object-wise PSF-leakage diagnostics. + + Weighted by the raw galaxy inverse variance (``gal_obs.weight.sum()`` per + epoch); :func:`average_multiepoch_psf` uses the same scheme but the + fixnoise-combined metacal-image weight instead. + + The fit runs on a *copy* of each PSF observation so ``gal_obs.psf`` — + the object metacal later deep-copies and consumes via + ``boot.go(gal_obs_list)`` — is never mutated. ``PSFRunner.go`` sets both + ``.meta['result']`` and, on success, the ``.gmix`` attribute of the + observation it fits; were that ``gal_obs.psf`` itself, the stray gmix + would survive metacal's deep copy and be reused as the + ``MetacalFitGaussPSF`` fallback when its own admom+ML PSF fits both fail, + silently rescuing objects the base branch dropped (``BootPSFFailure``) and + changing the galaxy/shear result set. Fitting a copy closes this + PSF-aliasing channel; combined with :func:`do_ngmix_metacal` seeding this + pre-fit from a *snapshot* of the metacal RNG (so the pre-fit does not + advance it), the add-column refactor stays bit-identical on the galaxy + results. + + Parameters + ---------- + gal_obs_list : ngmix.observation.ObsList + Per-epoch galaxy observations; each ``gal_obs.psf`` is the original + (pre-metacal) PSF observation to fit, with no further ``.psf`` of + its own so the runner fits the stamp itself. Left pristine. + psf_runner : ngmix.runners.PSFRunner + The module's PSF runner (built by :func:`make_runners` from the + shared ``prior``). + + Returns + ------- + dict + Same keys as :func:`average_multiepoch_psf`. + """ + def fit(gal_obs): + # Fit a COPY so gal_obs.psf stays pristine for metacal — see docstring. + # Failed fits keep flags != 0 and are dropped by _average_psf_fits. + psf_obs = gal_obs.psf.copy() + psf_runner.go(psf_obs) + return psf_obs.meta['result'], gal_obs.weight.sum() - return psf_dict + return _average_psf_fits(fit(gal_obs) for gal_obs in gal_obs_list) def make_runners(prior, flux_guess, rng): @@ -1232,9 +1358,13 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): Returns ------- - tuple - (resdict, psf_res) where resdict is the MetacalBootstrapper result - dict and psf_res is the averaged PSF dict from average_multiepoch_psf. + MetacalResult + Named 3-tuple ``(resdict, reconv, orig)``: the MetacalBootstrapper + result dict, the averaged metacal *reconvolution*-kernel PSF dict + (:func:`average_multiepoch_psf`), and the averaged *original* image-PSF + dict (:func:`average_original_psf`). The two PSF dicts share keys but + describe different PSFs; the named fields guard against transposing + them. Unpacks positionally as ``resdict, psf_res, psf_orig_res``. """ n_epoch = len(stamp.gals) if n_epoch == 0: @@ -1260,6 +1390,16 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): runner, psf_runner = make_runners(prior, flux_guess, rng) + # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it. Use a + # psf_runner seeded from a snapshot of rng's state so this prefit does NOT + # advance the rng that boot.go consumes below — keeping the galaxy/shear + # results bit-identical to the no-prefit branch. average_original_psf fits a + # COPY of each gal_obs.psf, so gal_obs_list also reaches boot.go pristine. + prefit_rng = np.random.RandomState() + prefit_rng.set_state(rng.get_state()) + _, prefit_psf_runner = make_runners(prior, flux_guess, prefit_rng) + psf_orig_res = average_original_psf(gal_obs_list, prefit_psf_runner) + metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], 'step': 0.01, @@ -1277,4 +1417,4 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): ) resdict, obsdict = boot.go(gal_obs_list) psf_res = average_multiepoch_psf(obsdict) - return resdict, psf_res + return MetacalResult(resdict=resdict, reconv=psf_res, orig=psf_orig_res) diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 956092f2e..c14aeb136 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -11,6 +11,7 @@ import numpy as np from astropy.io import fits +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -346,10 +347,10 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } else: data = {"VIGNET": self.interp_PSFs} @@ -499,14 +500,14 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), - "E1_STAR_HSM": self.star_shapes[:, 0], - "E2_STAR_HSM": self.star_shapes[:, 1], - "SIGMA_STAR_HSM": self.star_shapes[:, 2], - "FLAG_STAR_HSM": self.star_shapes[:, 3].astype(int), + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), + "HSM_G1_STAR": self.star_shapes[:, 0], + "HSM_G2_STAR": self.star_shapes[:, 1], + "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), + "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } data = {**data, **star_dict} @@ -754,16 +755,16 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["E1_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_G1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["E2_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_G2_PSF"] = final_list[j][2][ where_res[0] ][1] - shape_dict["SIGMA_PSF_HSM"] = final_list[j][2][ - where_res[0] - ][2] - shape_dict["FLAG_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( + final_list[j][2][where_res[0]][2] + ) + shape_dict["HSM_FLAG_PSF"] = final_list[j][2][ where_res[0] ][3] output_dict[id_tmp][final_list[j][3][where_res[0]]][ diff --git a/src/shapepipe/modules/spread_model_package/__init__.py b/src/shapepipe/modules/spread_model_package/__init__.py deleted file mode 100644 index 336283e78..000000000 --- a/src/shapepipe/modules/spread_model_package/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -"""SPREAD MODEL PACKAGE. - -This package contains the module for ``spread_model``. - -:Author: Axel Guinot - -:Parent modules: - -- ``sextractor_runner`` -- ``psfex_interp_runner`` or ``mccd_interp_runner`` - and ``vignetmaker_runner`` - -:Input: SExtractor output catalogue, PSFEx output catalogue and vignet files - -:Output: Updated SExtractor or new FITS file - -Description -=========== - -This module refines the galaxy sample that will be used for shape measurement -using the spread model method of :cite:`desai:12` and :cite:`mohr:12`. - -Module-specific config file entries -=================================== - -PREFIX : str, optional - Ouput file prefix -PIXEL_SCALE : float - Pixel scale in arcseconds -OUTPUT_MODE : str - Output mode, options are ``add`` to add the outputs to the input - SExtractor catalogue or ``new`` to generte a new output catalogue - -""" - -__all__ = ["spread_model"] diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py deleted file mode 100644 index 431405984..000000000 --- a/src/shapepipe/modules/spread_model_package/spread_model.py +++ /dev/null @@ -1,309 +0,0 @@ -"""SPREAD MODEL. - -Class to compute the spread model, criterion to select galaxies - -:Author: Axel Guinot - -""" - -import galsim -import numpy as np -from sqlitedict import SqliteDict - -from shapepipe.pipeline import file_io -from shapepipe.utilities import galaxy - - -def get_sm(obj_vign, psf_vign, model_vign, weight_vign): - """Get Spread Model. - - This method compute the spread moel for an object. - - Parameters - ---------- - obj_vign : numpy.ndarray - Vignet of the object - psf_vign : numpy.ndarray - Vignet of the gaussian model of the PSF - model_vign : numpy.ndarray - Vignet of the galaxy model - weight_vign : numpy.ndarray - Vignet of the weight at the object position - - Returns - ------- - tuple - Spread model and corresponding error values - - """ - # Mask invalid pixels - m = (obj_vign > -1e29) & (weight_vign > 0) - w = m.astype(float) - - # Set noise as inverse weight - noise_v = (1 / weight_vign).ravel() - - # Remove infinite noise pixels - noise_v[np.isinf(noise_v)] = 0 - - # Transform 2D vignets to 1D vectors - t_v = model_vign.ravel() - g_v = obj_vign.ravel() - psf_v = psf_vign.ravel() - w_v = w.ravel() - - # Compute scalar products used in spread model - tg = np.sum(t_v * w_v * g_v) - pg = np.sum(psf_v * w_v * g_v) - tp = np.sum(t_v * w_v * psf_v) - pp = np.sum(psf_v * w_v * psf_v) - - tnt = np.sum(t_v * noise_v * t_v * w_v) - pnp = np.sum(psf_v * noise_v * psf_v * w_v) - tnp = np.sum(t_v * noise_v * psf_v * w_v) - err = tnt * pg**2 + pnp * tg**2 - 2 * tnp * pg * tg - - # Compute spread model - if pg > 0: - sm = (tg / pg) - (tp / pp) - else: - sm = 1 - - if (pg > 0) & (err > 0): - sm_err = np.sqrt(err) / pg**2 - else: - sm_err = 1 - - return sm, sm_err - - -def get_model(sigma, flux, img_shape, pixel_scale=0.186): - """Get Model. - - This method computes - - an exponential galaxy model with scale radius = 1/16 FWHM - - a Gaussian model for the PSF - - Parameters - ---------- - sigma : float - Sigma of the PSF (in pixel units) - flux : float - Flux of the galaxy for the model - img_shape : list - Size of the output vignet ``[xsize, ysize]`` - pixel_scale : float, optional - Pixel scale to use for the model (in arcsec); default is ``0.186`` - - Returns - ------- - tuple - Vignet of the galaxy model and of the PSF model - - """ - # Get scale radius - scale_radius = 1 / 16 * galaxy.sigma_to_fwhm(sigma, pixel_scale=pixel_scale) - - # Get galaxy model - gal_obj = galsim.Exponential(scale_radius=scale_radius, flux=flux) - - # Get PSF - psf_obj = galsim.Gaussian(sigma=sigma * pixel_scale) - - # Convolve both - gal_obj = galsim.Convolve(gal_obj, psf_obj) - - # Draw galaxy and PSF on vignets - gal_vign = gal_obj.drawImage( - nx=img_shape[0], ny=img_shape[1], scale=pixel_scale - ).array - - psf_vign = psf_obj.drawImage( - nx=img_shape[0], ny=img_shape[1], scale=pixel_scale - ).array - - return gal_vign, psf_vign - - -class SpreadModel(object): - """The Spread Model Class. - - Parameters - ---------- - sex_cat_path : str - Path to SExtractor catalogue - psf_cat_path : str - Path to PSF catalogue - weight_cat_path : str - Path to weight catalogue - output_path : str - Output file path of pasted catalog - pixel_scale : float - Pixel scale in arcsec - output_mode : str - Options are ``new`` or ``add`` - - Notes - ----- - For the ``output_mode``: - - - ``new`` will create a new catalogue with - ``[number, mag, sm, sm_err]`` - - ``add`` will output a copy of the input SExtractor with the columns - ``sm`` and ``sm_err`` - - """ - - def __init__( - self, - sex_cat_path, - psf_cat_path, - weight_cat_path, - output_path, - pixel_scale, - output_mode, - ): - - self._sex_cat_path = sex_cat_path - self._psf_cat_path = psf_cat_path - self._weight_cat_path = weight_cat_path - self._output_path = output_path - self._pixel_scale = pixel_scale - self._output_mode = output_mode - - def process(self): - """Process. - - Process the spread model computation - - """ - # Get data - sex_cat = file_io.FITSCatalogue(self._sex_cat_path, SEx_catalogue=True) - sex_cat.open() - obj_id = np.copy(sex_cat.get_data()["NUMBER"]) - obj_vign = np.copy(sex_cat.get_data()["VIGNET"]) - obj_mag = None - if self._output_mode == "new": - obj_mag = np.copy(sex_cat.get_data()["MAG_AUTO"]) - sex_cat.close() - - psf_cat = SqliteDict(self._psf_cat_path) - - weight_cat = file_io.FITSCatalogue( - self._weight_cat_path, - SEx_catalogue=True, - ) - weight_cat.open() - weigh_vign = weight_cat.get_data()["VIGNET"] - weight_cat.close() - - # Get spread model - skip_obj = False - spread_model_final = [] - spread_model_err_final = [] - for idx, id_tmp in enumerate(obj_id): - sigma_list = [] - - if psf_cat[str(id_tmp)] == "empty": - spread_model_final.append(-1) - spread_model_err_final.append(1) - continue - - psf_expccd_name = list(psf_cat[str(id_tmp)].keys()) - - for expccd_name_tmp in psf_expccd_name: - psf_cat_id_ccd = psf_cat[str(id_tmp)][expccd_name_tmp] - sigma_list.append(psf_cat_id_ccd["SHAPES"]["SIGMA_PSF_HSM"]) - - obj_sigma_tmp = np.mean(sigma_list) - if obj_sigma_tmp > 0: - obj_vign_tmp = obj_vign[idx] - obj_flux_tmp = 1.0 - obj_weight_tmp = weigh_vign[idx] - obj_model_tmp, obj_psf_tmp = get_model( - obj_sigma_tmp, - obj_flux_tmp, - obj_vign_tmp.shape, - self._pixel_scale, - ) - - obj_sm, obj_sm_err = get_sm( - obj_vign_tmp, obj_psf_tmp, obj_model_tmp, obj_weight_tmp - ) - else: - # size < 0, something is not right with this object - obj_sm, obj_sm_err = -1.0, -1.0 - - spread_model_final.append(obj_sm) - spread_model_err_final.append(obj_sm_err) - - spread_model_final = np.array(spread_model_final, dtype="float64") - spread_model_err_final = np.array( - spread_model_err_final, - dtype="float64", - ) - - psf_cat.close() - - self.save_results( - spread_model_final, spread_model_err_final, obj_mag, obj_id - ) - - def save_results(self, sm, sm_err, mag, number): - """Save Results. - - Save output catalogue with spread model and errors. - - Parameters - ---------- - sm : numpy.ndarray - Value of the spread model for all objects - sm_err : numpy.ndarray - Value of the spread model error for all objects - mag : numpy.ndarray - Magnitude of all objects (only for a new catalogue) - number : numpy.ndarray - ID of all objects (only for a new catalogue) - - Raises - ------ - ValueError - For incorrect output mode - - """ - if self._output_mode == "new": - new_cat = file_io.FITSCatalogue( - self._output_path, - SEx_catalogue=True, - open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, - ) - dict_data = { - "NUMBER": number, - "MAG": mag, - "SPREAD_MODEL": sm, - "SPREADERR_MODEL": sm_err, - } - new_cat.save_as_fits( - data=dict_data, sex_cat_path=self._sex_cat_path - ) - elif self._output_mode == "add": - ori_cat = file_io.FITSCatalogue( - self._sex_cat_path, - SEx_catalogue=True, - ) - ori_cat.open() - new_cat = file_io.FITSCatalogue( - self._output_path, - SEx_catalogue=True, - open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, - ) - ori_cat.add_col( - "SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat - ) - ori_cat.add_col("SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat) - ori_cat.close() - new_cat.open() - new_cat.add_col("SPREADERR_MODEL", sm_err) - new_cat.close() - else: - raise ValueError("Mode must be in [new, add].") diff --git a/src/shapepipe/modules/spread_model_runner.py b/src/shapepipe/modules/spread_model_runner.py deleted file mode 100644 index 16c9a2d3b..000000000 --- a/src/shapepipe/modules/spread_model_runner.py +++ /dev/null @@ -1,69 +0,0 @@ -"""SPREAD MODEL RUNNER. - -Module runner for ``spread_model``. - -:Author: Axel Guinot - -""" - -from shapepipe.modules.module_decorator import module_runner -from shapepipe.modules.spread_model_package.spread_model import SpreadModel - - -@module_runner( - version="1.1", - input_module=[ - "sextractor_runner", - "psfex_interp_runner", - "vignetmaker_runner", - ], - file_pattern=["sexcat", "galaxy_psf", "weight_vign"], - file_ext=[".fits", ".sqlite", ".fits"], - depends=["numpy", "galsim"], - run_method="parallel", -) -def spread_model_runner( - input_file_list, - run_dirs, - file_number_string, - config, - module_config_sec, - w_log, -): - """Define The Spread Model Runner.""" - # Get input files - sex_cat_path, psf_cat_path, weight_cat_path = input_file_list - - # Get file prefix (optional) - if config.has_option(module_config_sec, "PREFIX"): - prefix = config.get(module_config_sec, "PREFIX") - if (prefix.lower() != "none") & (prefix != ""): - prefix = prefix + "_" - else: - prefix = "" - else: - prefix = "" - - # Get pixel scale and output mode - pixel_scale = config.getfloat(module_config_sec, "PIXEL_SCALE") - output_mode = config.get(module_config_sec, "OUTPUT_MODE") - - # Set output file path - file_name = f"{prefix}sexcat_sm{file_number_string}.fits" - output_path = f'{run_dirs["output"]}/{file_name}' - - # Create spread model class instance - sm_inst = SpreadModel( - sex_cat_path, - psf_cat_path, - weight_cat_path, - output_path, - pixel_scale, - output_mode, - ) - - # Process spread model computation - sm_inst.process() - - # No return objects - return None, None diff --git a/src/shapepipe/testing/simulate.py b/src/shapepipe/testing/simulate.py index 81b9b6ab0..8ab5a06f0 100644 --- a/src/shapepipe/testing/simulate.py +++ b/src/shapepipe/testing/simulate.py @@ -21,6 +21,7 @@ def make_data( psf_fwhm=0.55, pixel_scale=0.1857, img_size=201, + psf_shear=(0.0, 0.0), ): """Simulate an exponential galaxy with Moffat PSF. @@ -48,6 +49,12 @@ def make_data( Pixel scale in arcsec/pixel. Default 0.1857. img_size : int, optional Stamp size in pixels (square). Default 201. + psf_shear : tuple of float, optional + Ellipticity (g1, g2) applied to the Moffat PSF. Default (0, 0), a + round PSF. A non-round PSF is what the PSF-leakage / additive-bias + guardrail needs: the PSF carries a shape that an unbiased deconvolution + must remove from a round galaxy. The same sheared PSF is used both to + convolve the object and as the PSF model stamp. Returns ------- @@ -71,7 +78,9 @@ def make_data( if not share_shift: dy, dx = rng.uniform(low=-scale / 2, high=scale / 2, size=2) - psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm) + psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm).shear( + g1=psf_shear[0], g2=psf_shear[1] + ) obj = galsim.Convolve( psf, galsim.Exponential(half_light_radius=gal_hlr, flux=gal_flux).shear( diff --git a/src/shapepipe/utilities/galaxy.py b/src/shapepipe/utilities/galaxy.py index 40948a8f1..c533eac5c 100644 --- a/src/shapepipe/utilities/galaxy.py +++ b/src/shapepipe/utilities/galaxy.py @@ -5,90 +5,3 @@ :Author: Martin Kilbinger """ - -import numpy as np - - -def sigma_to_fwhm(sigma, pixel_scale=1.0): - r"""Convert Sigma to FWHM. - - Transform standard deviation of a 1D Gaussian, sigma, to FWHM - (Full Width Half Maximum). - - Parameters - ---------- - sigma : numpy.ndarray - input standard deviation(s) - pixel_scale : float, optional, default=1 - pixel size in arcsec, set to 1 if no scaling - required - - Returns - ------- - fwhm : (array of) float - output fwhm(s) - - Raises - ------ - TypeError - If ``sigma`` is not of type numpy array or float - TypeError - If ``sigma`` array values are not of type float - TypeError - If ``pixel_scale`` is not of type float - ValueError - If ``sigma`` array values are not greater than 0.0 - ValueError - If ``sigma`` is not greater than 0.0 - ValueError - If ``pixel_scale`` is not greater than 0.0 - - Notes - ----- - To compute the FWHMh for a 1D Gaussian N(x), solve the equation - - .. math:: - - N(x) = (\sigma \sqrt{2\pi})^{-1} \exp[x^2/2\sigma^2] = \frac 1 2 N(x) - - - for :math:`x`. The FWHM is :math:`x + (-x) = 2x`. The solution is - - .. math:: - - \textrm{FWHM} = 2 \sqrt(2 \ln 2) \sigma \approx 2.355 \sigma - - """ - if not isinstance(sigma, (np.ndarray, float)): - raise TypeError( - f"Sigma must be of type numpy array or float, not {type(sigma)}." - ) - elif isinstance(sigma, np.ndarray) and sigma.dtype != np.float64: - raise TypeError( - f"Sigma array values must be of type float, not {sigma.dtype}." - ) - - if not isinstance(pixel_scale, float): - raise TypeError( - f"The pixel scale must of type float, not {type(pixel_scale)}." - ) - - if isinstance(sigma, np.ndarray) and np.any(sigma <= 0.0): - raise ValueError( - f"Found {sigma[sigma <=0].size} invalid standard deviation array " - + "values, all elements must to be greater than 0.0." - ) - elif isinstance(sigma, float) and sigma <= 0.0: - raise ValueError( - f"Invalid standard deviation {sigma}, needs to be greater than " - + "0.0." - ) - - if pixel_scale <= 0.0: - raise ValueError( - f"Invalid pixel scale {pixel_scale}, needs to be greater than 0.0." - ) - - cst = 2.35482004503 - - return sigma * cst * pixel_scale diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py new file mode 100644 index 000000000..d5d249429 --- /dev/null +++ b/tests/module/test_make_cat.py @@ -0,0 +1,302 @@ +"""UNIT TESTS FOR MODULE PACKAGE: MAKE_CAT. + +Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue +to lock in the column grammar ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_SHEAR]`` +(shapepipe#749, #761): galaxy is the implicit default object and carries no +``GAL`` token (``NGMIX_G1_NOSHEAR``, never ``NGMIX_G1_GAL_NOSHEAR``), while +the PSF families keep an explicit object token — +``NGMIX_[_ERR]__`` for ``PSF_ORIG``/``PSF_RECONV`` — +plus the three OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, +``NGMIX_N_EPOCH``, ``NGMIX_MCAL_TYPES_FAIL``). The original image PSF +(``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) are +independent fits of *different* PSFs, no longer the single aliased value of +the pre-#749 code. +""" + +import numpy as np +import numpy.testing as npt +from astropy.io import fits + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue +from shapepipe.modules.ngmix_package.ngmix import Ngmix + + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +# Per-object key set that compile_results emits into every shear-type +# extension (mirrors ngmix.compile_results ``names2``). Sentinel values are +# distinct per key so a mis-routed column is caught by value, and the orig vs +# reconv PSF families carry *different* sentinels so any aliasing surfaces. +NGMIX_KEYS = [ + "id", + "n_epoch_model", + "mcal_types_fail", + "nfev_fit", + "g1", "g1_err", "g2", "g2_err", + "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", + "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", + "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", + "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] + +SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] +SHEAR_EXTS_LOWER = ["1m", "1p", "2m", "2p", "noshear"] + + +def _ngmix_row(obj_id): + """One object's per-key values, distinct for orig vs reconv PSF families. + + Built so every PSF column carries a sentinel that names its source: the + orig family from ``*_orig`` integers, the reconv family from ``*_reconv`` + integers, deliberately disjoint. + """ + return { + "id": obj_id, + "n_epoch_model": 3, + "mcal_types_fail": 0, + "nfev_fit": 7, + "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, + "T": 0.30, "T_err": 0.033, + "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, + "mag": 21.0, "mag_err": 0.05, + "flags": 0, "mcal_flags": 0, + # original image PSF — its own ellipticity and size + "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, + "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, + "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, + # metacal reconvolution kernel — round and enlarged, distinct values + "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, + "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, + "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, + } + + +def _write_ngmix_cat(path, obj_ids): + """Write a synthetic ngmix FITS with the five shear-type extensions. + + Each extension carries the exact ``compile_results`` key set, with the + same per-object values across extensions (the PSF families are + object-level, not per-shear). + """ + rows = [_ngmix_row(oid) for oid in obj_ids] + hdus = [fits.PrimaryHDU()] + for ext in SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in ("id", "n_epoch_model", "mcal_types_fail", + "nfev_fit", "flags", "mcal_flags") else "D", + array=np.array([row[key] for row in rows]), + ) + for key in NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_id, cat_size_target=None): + """Drive ``_save_ngmix_data`` and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_id) + inst._output_dict = {} + inst._cat_size_target = ( + len(inst._obj_id) if cat_size_target is None else cat_size_target + ) + inst._w_log = _NullLogger() + + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): + """Produced columns follow the new grammar; no old token survives. + + The renamed grammar drops ``_PSFo``, ``_Tpsf`` and the ellipticity + 2-vector ``NGMIX_ELL_*`` entirely (shapepipe#749). A regression to any of + those would be a silent break for the shipped param-file consumer + (``create_final_cat.py``), so the absence is asserted directly. + """ + ngmix_path = tmp_path / "ngmix-0.fits" + obj_ids = [11, 22, 33] + _write_ngmix_cat(ngmix_path, obj_ids) + + out = _run_save_ngmix(ngmix_path, obj_ids) + + # Every per-shear family is present under the new grammar. + for shear in SHEAR_EXTS: + for col in ( + f"NGMIX_G1_{shear}", f"NGMIX_G2_{shear}", + f"NGMIX_G1_ERR_{shear}", f"NGMIX_G2_ERR_{shear}", + f"NGMIX_T_{shear}", f"NGMIX_T_ERR_{shear}", + f"NGMIX_SNR_{shear}", + f"NGMIX_FLUX_{shear}", f"NGMIX_FLUX_ERR_{shear}", + f"NGMIX_MAG_{shear}", f"NGMIX_MAG_ERR_{shear}", + f"NGMIX_FLAGS_{shear}", + f"NGMIX_G1_PSF_ORIG_{shear}", f"NGMIX_G2_PSF_ORIG_{shear}", + f"NGMIX_T_PSF_ORIG_{shear}", + f"NGMIX_G1_PSF_RECONV_{shear}", f"NGMIX_G2_PSF_RECONV_{shear}", + f"NGMIX_T_PSF_RECONV_{shear}", + ): + assert col in out, f"missing {col}" + + # Object-level metadata columns carry no OBJECT/SHEAR token. + for col in ("NGMIX_MCAL_FLAGS", "NGMIX_N_EPOCH", "NGMIX_MCAL_TYPES_FAIL"): + assert col in out, f"missing {col}" + + # No old name survives anywhere in the produced columns. + for col in out: + assert "_PSFo" not in col, col + assert "_Tpsf" not in col and "TPSF" not in col, col + assert not col.startswith("NGMIX_ELL_"), col + assert "NGMIXm" not in col, col # moments branch off by default + # Galaxy is the implicit default object — no GAL token (shapepipe#761). + assert "_GAL_" not in col and not col.endswith("_GAL"), col + + +def test_save_ngmix_data_psf_families_trace_distinct_sources(tmp_path): + """The two PSF families read from their own ngmix columns, un-aliased. + + ``NGMIX_G1_PSF_ORIG_*`` must equal the orig source and + ``NGMIX_G1_PSF_RECONV_*`` the reconv source, and the two must differ — + the un-aliasing that shapepipe#749 restores. Likewise the sizes: + ``NGMIX_T_PSF_ORIG_*`` != ``NGMIX_T_PSF_RECONV_*``. + """ + ngmix_path = tmp_path / "ngmix-1.fits" + obj_ids = [11, 22, 33] + _write_ngmix_cat(ngmix_path, obj_ids) + row = _ngmix_row(11) + + out = _run_save_ngmix(ngmix_path, obj_ids) + + for shear in SHEAR_EXTS: + npt.assert_allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + [row["g1_psf_orig"]] * len(obj_ids), + ) + npt.assert_allclose( + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + [row["g1_psf_reconv"]] * len(obj_ids), + ) + # the un-aliasing: orig and reconv ellipticities differ + assert not np.allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + ) + # sizes are independent too + npt.assert_allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], [row["T_psf_orig"]] * len(obj_ids) + ) + npt.assert_allclose( + out[f"NGMIX_T_PSF_RECONV_{shear}"], + [row["T_psf_reconv"]] * len(obj_ids), + ) + assert not np.allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], + out[f"NGMIX_T_PSF_RECONV_{shear}"], + ) + + +def test_save_ngmix_data_fills_sentinels_for_absent_objects(tmp_path): + """An obj_id absent from the ngmix cat keeps its sentinel fill. + + make_cat pre-fills every column with a type-specific sentinel and only + overwrites the rows whose ``NUMBER`` matches an ngmix ``id``. An object + SExtractor saw but ngmix never fit (no matching id) must therefore keep + the sentinels: 0 for sizes/flags, -10 for ellipticities, 1e30 for + ``T_ERR``, -1 for flux/mag errors. + """ + ngmix_path = tmp_path / "ngmix-2.fits" + # ngmix fit only object 22; the final cat also carries 11 and 99. + _write_ngmix_cat(ngmix_path, [22]) + obj_ids = [11, 22, 99] + + out = _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=3) + + present = obj_ids.index(22) + absent = [obj_ids.index(11), obj_ids.index(99)] + + # The matched object got its measured value; the others keep sentinels. + row = _ngmix_row(22) + g1_orig = np.asarray(out["NGMIX_G1_PSF_ORIG_NOSHEAR"]) + npt.assert_allclose(g1_orig[present], row["g1_psf_orig"]) + npt.assert_allclose(g1_orig[absent], [-10.0, -10.0]) + + t_orig = np.asarray(out["NGMIX_T_PSF_ORIG_NOSHEAR"]) + npt.assert_allclose(t_orig[present], row["T_psf_orig"]) + npt.assert_allclose(t_orig[absent], [0.0, 0.0]) + + t_err_gal = np.asarray(out["NGMIX_T_ERR_NOSHEAR"]) + npt.assert_allclose(t_err_gal[present], row["T_err"]) + npt.assert_allclose(t_err_gal[absent], [1e30, 1e30]) + + flux_err = np.asarray(out["NGMIX_FLUX_ERR_NOSHEAR"]) + npt.assert_allclose(flux_err[present], row["flux_err"]) + npt.assert_allclose(flux_err[absent], [-1.0, -1.0]) + + n_epoch = np.asarray(out["NGMIX_N_EPOCH"]) + npt.assert_allclose(n_epoch[present], row["n_epoch_model"]) + npt.assert_allclose(n_epoch[absent], [0.0, 0.0]) + + +def test_save_ngmix_data_matches_module_serialised_catalogue(tmp_path): + """End-to-end: make_cat reads a catalogue ngmix itself serialised. + + Rather than hand-rolling the FITS, drive ngmix's own + ``compile_results`` + ``save_results`` (the real write path), then read it + back through ``_save_ngmix_data``. This guards the make_cat reader against + drift in the ngmix key set: the keys must line up end to end. + """ + obj_ids = [5, 7] + results = [] + for oid in obj_ids: + row = _ngmix_row(oid) + per_type = { + "nfev": row["nfev_fit"], + "g": [row["g1"], row["g2"]], + "g_cov": np.diag([row["g1_err"] ** 2, row["g2_err"] ** 2]), + "T": row["T"], "T_err": row["T_err"], + "flux": row["flux"], "flux_err": row["flux_err"], + "s2n": row["s2n"], "flags": 0, + } + res = { + "obj_id": oid, + "n_epoch_model": row["n_epoch_model"], + "mcal_types_fail": row["mcal_types_fail"], + "mcal_flags": row["mcal_flags"], + } + for key in NGMIX_KEYS: + if key.endswith("_psf_orig") or key.endswith("_psf_reconv"): + res[key] = row[key] + res.update({name: dict(per_type) for name in SHEAR_EXTS_LOWER}) + results.append(res) + + ngmix_inst = object.__new__(Ngmix) + ngmix_inst._zero_point = 30.0 + ngmix_inst._output_dir = str(tmp_path) + ngmix_inst._file_number_string = "-0" + out_dict = ngmix_inst.compile_results(results) + ngmix_inst.save_results(out_dict) + + ngmix_path = ngmix_inst.get_output_path(str(tmp_path)) + out = _run_save_ngmix(ngmix_path, obj_ids) + + # Both PSF families survive the round trip, distinct, on every object. + npt.assert_allclose( + out["NGMIX_G1_PSF_ORIG_NOSHEAR"], [_ngmix_row(o)["g1_psf_orig"] for o in obj_ids] + ) + npt.assert_allclose( + out["NGMIX_G1_PSF_RECONV_NOSHEAR"], + [_ngmix_row(o)["g1_psf_reconv"] for o in obj_ids], + ) + assert not np.allclose( + out["NGMIX_G1_PSF_ORIG_NOSHEAR"], out["NGMIX_G1_PSF_RECONV_NOSHEAR"] + ) diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index dcdcb2d24..37db4e3fc 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -119,7 +119,7 @@ def _metacal_noshear_g(seed): flags, jacobs, ) - res, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) + res, _, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) return np.asarray(res["noshear"]["g"]) @@ -135,15 +135,30 @@ def test_metacal_is_reproducible_with_fixed_seed(): npt.assert_array_equal(_metacal_noshear_g(42), _metacal_noshear_g(42)) +# The two PSF families carry distinct ellipticity AND size (shapepipe#749): +# the original image PSF (-> *_psf_orig) and the metacal reconvolution kernel +# (-> *_psf_reconv) are independent fits, so a regression to the old aliasing +# (both from one source) would surface here. The original PSF is elliptical +# and smaller than the round, enlarged reconvolution kernel. +RECONV_PSF_G = [0.001, 0.002] +RECONV_PSF_G_ERR = [1e-5, 1e-5] +ORIG_PSF_G = [0.004, -0.003] +ORIG_PSF_G_ERR = [2e-5, 3e-5] +# original-PSF size, deliberately != the reconvolution T_psf the builder is +# handed, so the size un-aliasing is exercised end to end. +ORIG_PSF_T = 0.07 +ORIG_PSF_T_ERR = 0.0008 + + def _fake_metacal_result(T, T_err, T_psf, T_psf_err): """Build one minimal metacal result as produced by the process loop. - The PSF size entries mirror the conversion done at the source: - ``r50_PSFo = sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T_psf / 2)``. + Both PSF families are filled with self-named scalar keys, as the process + loop back-fills them: ``*_psf_orig`` (original image PSF) and + ``*_psf_reconv`` (metacal reconvolution kernel). The reconvolution-kernel + size is the builder's ``T_psf`` argument; the original-PSF size is the + fixed ``ORIG_PSF_T``, deliberately different so the un-aliasing is tested. """ - from shapepipe.modules.ngmix_package.ngmix import SIGMA_TO_R50 - - sigma_psf = np.sqrt(max(T_psf, 0) / 2) per_type = { "nfev": 1, "g": [0.01, -0.02], @@ -159,16 +174,20 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "obj_id": 1, "n_epoch_model": 1, "mcal_types_fail": 0, - "g_PSFo": [0.001, 0.002], - "g_err_PSFo": [1e-5, 1e-5], - "T_PSFo": T_psf, - "T_err_PSFo": T_psf_err, - "r50_PSFo": SIGMA_TO_R50 * sigma_psf, - "r50_err_PSFo": ( - SIGMA_TO_R50 * T_psf_err / (4 * sigma_psf) - if sigma_psf > 0 - else np.nan - ), + # original image PSF (psfex/mccd) family + "g1_psf_orig": ORIG_PSF_G[0], + "g2_psf_orig": ORIG_PSF_G[1], + "g1_err_psf_orig": ORIG_PSF_G_ERR[0], + "g2_err_psf_orig": ORIG_PSF_G_ERR[1], + "T_psf_orig": ORIG_PSF_T, + "T_err_psf_orig": ORIG_PSF_T_ERR, + # metacal reconvolution-kernel family + "g1_psf_reconv": RECONV_PSF_G[0], + "g2_psf_reconv": RECONV_PSF_G[1], + "g1_err_psf_reconv": RECONV_PSF_G_ERR[0], + "g2_err_psf_reconv": RECONV_PSF_G_ERR[1], + "T_psf_reconv": T_psf, + "T_err_psf_reconv": T_psf_err, "mcal_flags": 0, } res.update( @@ -177,14 +196,16 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): return res -def test_compile_results_size_columns_are_half_light_radii(): - """Every r50 column is a true half-light radius, on both sides. +def test_compile_results_size_columns_are_unaliased(): + """Each PSF family carries its OWN fitted area ``T``, un-aliased (#749). - Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``), PSF - ``r50psf = sqrt(2 ln 2) * sigma_psf`` (not bare sigma), and the - redundant ``*_psfo_ngmix`` size duplicates are gone. + The galaxy area ``T`` is the fitted ``pars[4]``; the reconvolution kernel + (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are + independent fits with their own, different sizes. Half-light radius is no + longer carried in the catalogue — it is derivable from ``T`` downstream + via :func:`cs_util.size.T_to_r50` — so only the ``T`` columns are checked. """ - from shapepipe.modules.ngmix_package.ngmix import Ngmix, SIGMA_TO_R50 + from shapepipe.modules.ngmix_package.ngmix import Ngmix T, T_err = 0.18, 0.02 T_psf, T_psf_err = 0.09, 0.001 @@ -195,36 +216,51 @@ def test_compile_results_size_columns_are_half_light_radii(): noshear = out["noshear"] - # galaxy: r50 = sqrt(ln2 * T) = 1.17741 * sqrt(T / 2), error dT * r50/(2T) - r50_expected = np.sqrt(np.log(2) * T) - npt.assert_allclose(noshear["r50"], [r50_expected]) - npt.assert_allclose(noshear["r50_err"], [r50_expected * T_err / (2 * T)]) + # galaxy area is the fitted T + npt.assert_allclose(noshear["T"], [T]) + npt.assert_allclose(noshear["T_err"], [T_err]) - # PSF: r50psf = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) - sigma_psf = np.sqrt(T_psf / 2) - npt.assert_allclose(noshear["r50psf"], [SIGMA_TO_R50 * sigma_psf]) - npt.assert_allclose( - noshear["r50psf_err"], [SIGMA_TO_R50 * T_psf_err / (4 * sigma_psf)] - ) + # reconvolution kernel + npt.assert_allclose(noshear["T_psf_reconv"], [T_psf]) + npt.assert_allclose(noshear["T_err_psf_reconv"], [T_psf_err]) - # galaxy/PSF r50 are now commensurable: same convention on both sides - npt.assert_allclose( - np.array(noshear["r50"]) / np.array(noshear["r50psf"]), - [np.sqrt(T / T_psf)], - ) + # original image PSF carries its OWN size, un-aliased from the kernel + npt.assert_allclose(noshear["T_psf_orig"], [ORIG_PSF_T]) + npt.assert_allclose(noshear["T_err_psf_orig"], [ORIG_PSF_T_ERR]) + assert noshear["T_psf_orig"] != noshear["T_psf_reconv"] - # areas pass through untouched, with the err asymmetry fixed - npt.assert_allclose(noshear["Tpsf"], [T_psf]) - npt.assert_allclose(noshear["Tpsf_err"], [T_psf_err]) - # the *_psfo_ngmix size duplicates are retired - for retired in ( - "T_psfo_ngmix", - "T_err_psfo_ngmix", - "r50_psfo_ngmix", - "r50_err_psfo_ngmix", - ): - assert retired not in noshear +def test_compile_results_psf_families_are_unaliased(): + """The two PSF-ellipticity families document *different* PSFs (#749). + + ``*_psf_reconv`` carries the metacal RECONVOLUTION kernel; ``*_psf_orig`` + carries the ORIGINAL image PSF, fit independently. Before the fix both + came from one ``average_multiepoch_psf`` result and so were byte- + identical; here they must differ, each tracing its own source. The + companion size un-aliasing is checked in + ``test_compile_results_size_columns_are_unaliased``. + """ + from shapepipe.modules.ngmix_package.ngmix import Ngmix + + inst = object.__new__(Ngmix) + inst._zero_point = 30.0 + noshear = inst.compile_results( + [_fake_metacal_result(0.18, 0.02, 0.09, 0.001)] + )["noshear"] + + # reconvolution kernel + npt.assert_allclose(noshear["g1_psf_reconv"], [RECONV_PSF_G[0]]) + npt.assert_allclose(noshear["g2_psf_reconv"], [RECONV_PSF_G[1]]) + npt.assert_allclose(noshear["g1_err_psf_reconv"], [RECONV_PSF_G_ERR[0]]) + npt.assert_allclose(noshear["g2_err_psf_reconv"], [RECONV_PSF_G_ERR[1]]) + # original image PSF + npt.assert_allclose(noshear["g1_psf_orig"], [ORIG_PSF_G[0]]) + npt.assert_allclose(noshear["g2_psf_orig"], [ORIG_PSF_G[1]]) + npt.assert_allclose(noshear["g1_err_psf_orig"], [ORIG_PSF_G_ERR[0]]) + npt.assert_allclose(noshear["g2_err_psf_orig"], [ORIG_PSF_G_ERR[1]]) + # the un-aliasing: the two families are no longer the same value + assert noshear["g1_psf_orig"] != noshear["g1_psf_reconv"] + assert noshear["g2_psf_orig"] != noshear["g2_psf_reconv"] def test_compile_results_nan_fills_failed_fit_types(): @@ -254,12 +290,20 @@ def test_compile_results_nan_fills_failed_fit_types(): failed = out["1p"] for col in ( "g1", "g2", "g1_err", "g2_err", "T", "T_err", "flux", "flux_err", - "s2n", "mag", "mag_err", "r50", "r50_err", + "s2n", "mag", "mag_err", ): assert np.isnan(failed[col]).all(), col assert failed["flags"] == [0x8] assert failed["mcal_flags"] == [0x8] and failed["mcal_flags"][0] != 0 + # The object-level PSF columns survive on a failed fit type: they are copied + # outside the flags==0 guard, so the failed type keeps a full-length PSF row + # (gating that copy on the galaxy flags would shorten it -> save crash). + npt.assert_allclose(failed["T_psf_orig"], [ORIG_PSF_T]) + npt.assert_allclose(failed["T_psf_reconv"], [0.09]) + npt.assert_allclose(failed["g1_psf_orig"], [ORIG_PSF_G[0]]) + npt.assert_allclose(failed["g1_psf_reconv"], [RECONV_PSF_G[0]]) + # successful types are untouched, but share the object's mcal_flags npt.assert_allclose(out["noshear"]["flux"], [100.0]) assert out["noshear"]["flags"] == [0] @@ -283,18 +327,6 @@ def test_get_mcal_flags_ors_per_type_fit_flags(): assert get_mcal_flags(res) == 0xA -def test_compile_results_nonpositive_T_gives_nan_r50(): - """A non-positive fitted area cannot yield a real half-light radius.""" - from shapepipe.modules.ngmix_package.ngmix import Ngmix - - inst = object.__new__(Ngmix) - inst._zero_point = 30.0 - out = inst.compile_results([_fake_metacal_result(-0.05, 0.02, 0.09, 0.001)]) - - assert np.isnan(out["noshear"]["r50"]).all() - assert np.isnan(out["noshear"]["r50_err"]).all() - - def test_average_multiepoch_psf_skips_failed_psf_epochs(): """A failed-PSF epoch must be skipped, not KeyError the whole object. @@ -333,6 +365,181 @@ def epoch(result, weight_value): assert psf_res["n_epoch"] == 1 +def test_average_original_psf_fits_each_gal_psf_with_galaxy_weight(): + """The original-PSF average fits ``gal_obs.psf`` and weights by gal S/N. + + Restores the independent original-image-PSF fit (shapepipe#749): each + epoch's psfex/mccd PSF stamp (``gal_obs.psf``) is fit by the shared + ``psf_runner``, then weight-averaged by the *galaxy* inverse-variance + weight (matching :func:`average_multiepoch_psf`). A pre-set + ``psf.meta['result']`` per epoch stands in for the runner's fit so the + averaging math is checked in isolation. + """ + from types import SimpleNamespace + from shapepipe.modules.ngmix_package.ngmix import average_original_psf + + def gal_epoch(psf_result, weight_value): + # gal_obs carries a .psf whose meta['result'] is what the runner + # would set; .weight is the galaxy inverse-variance map. The PSF + # observation must expose .copy() (average_original_psf fits a copy + # so gal_obs.psf is never mutated); the copy carries the same + # pre-populated meta so the averaging math is unchanged. + psf_obs = SimpleNamespace(meta={"result": psf_result}) + psf_obs.copy = lambda _self=psf_obs: SimpleNamespace( + meta=dict(_self.meta) + ) + return SimpleNamespace( + psf=psf_obs, + weight=np.full((2, 2), weight_value), + ) + + # runner stub: a no-op .go, since meta['result'] is pre-populated. It + # must be *called* (the fit happens), so record that it was. + calls = [] + runner = SimpleNamespace(go=lambda obs: calls.append(obs)) + + good_a = { + "flags": 0, "T": 0.30, "T_err": 0.02, + "g": np.array([0.04, -0.03]), "g_err": np.array([1e-3, 1e-3]), + } + good_b = { + "flags": 0, "T": 0.20, "T_err": 0.01, + "g": np.array([0.06, -0.01]), "g_err": np.array([2e-3, 2e-3]), + } + failed = {"flags": 3, "nfev": 5, "pars": np.zeros(6)} # no T/g keys + + gal_obs_list = [ + gal_epoch(good_a, 1.0), # galaxy weight-sum = 4 + gal_epoch(good_b, 2.0), # galaxy weight-sum = 8 + gal_epoch(failed, 4.0), # dropped on flags != 0 + ] + psf_orig_res = average_original_psf(gal_obs_list, runner) + + # the runner fit every epoch's PSF + assert len(calls) == 3 + # weighted over the two surviving epochs (weights 4 and 8) + w = np.array([4.0, 8.0]) + npt.assert_allclose( + psf_orig_res["g_psf"], + (good_a["g"] * w[0] + good_b["g"] * w[1]) / w.sum(), + ) + npt.assert_allclose( + psf_orig_res["T_psf"], + (good_a["T"] * w[0] + good_b["T"] * w[1]) / w.sum(), + ) + assert psf_orig_res["n_epoch"] == 2 + # original PSF is elliptical — not the round reconvolution kernel + assert abs(psf_orig_res["g_psf"][0]) > 1e-3 + + +def _do_ngmix_metacal_on_psf(psf_shear, seed=7, n_epochs=2): + """Run do_ngmix_metacal on a simulated stamp with a given PSF shape. + + Returns ``(resdict, psf_reconv, psf_orig, gal_obs_list)``, where the last + is the list metacal consumed — so a caller can assert that the original-PSF + pre-fit left it pristine. + """ + from shapepipe.modules.ngmix_package.ngmix import ( + Postage_stamp, + do_ngmix_metacal, + get_prior, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(seed) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=n_epochs, + img_size=51, + psf_shear=psf_shear, + ) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( + gals, + psfs, + weights, + flags, + jacobs, + ) + resdict, psf_reconv, psf_orig = do_ngmix_metacal(stamp, prior, 1.0, rng) + return resdict, psf_reconv, psf_orig + + +def test_original_psf_prefit_leaves_gal_obs_psf_pristine(): + """The original-PSF pre-fit must not mutate the PSF metacal consumes (#749). + + ``average_original_psf`` fits each epoch's psfex/mccd PSF with the shared + ``psf_runner``. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) + on the observation it fits; if that were ``gal_obs.psf`` itself, the gmix + would survive metacal's deep copy and be reused as the + ``MetacalFitGaussPSF`` fallback when admom+ML both fail — silently + rescuing objects the base branch dropped (``BootPSFFailure``). The fix + fits a *copy*, so ``gal_obs.psf`` carries NO gmix afterward, matching the + base-branch state that makes this add-column refactor bit-identical on the + galaxy results. + """ + from ngmix.observation import ObsList + from shapepipe.modules.ngmix_package.ngmix import ( + average_original_psf, + get_prior, + make_ngmix_observation, + make_runners, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(7) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=2, + img_size=51, + psf_shear=(0.05, -0.04), + ) + gal_obs_list = ObsList() + for n_e in range(2): + gal_obs_list.append( + make_ngmix_observation( + gals[n_e], weights[n_e], flags[n_e], psfs[n_e], jacobs[n_e], + rng, + ) + ) + + # Pre-state: a freshly built PSF observation carries no gmix. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list) + + runner, psf_runner = make_runners(prior, 1.0, rng) + average_original_psf(gal_obs_list, psf_runner) + + # The pre-fit fit a copy, so the originals are untouched: no gmix and no + # leaked fit result in meta. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list), ( + "original-PSF pre-fit leaked a gmix into gal_obs.psf" + ) + assert not any("result" in obs.psf.meta for obs in gal_obs_list), ( + "original-PSF pre-fit leaked a fit result into gal_obs.psf.meta" + ) + + +def test_reconv_psf_is_rounder_and_larger_than_orig_on_elliptical_psf(): + """Reconvolution kernel is round + dilated relative to the original PSF. + + On an ELLIPTICAL PSF stamp the metacal reconvolution kernel is round and + enlarged by construction, so ``T_psf_reconv > T_psf_orig`` and + ``|g_psf_reconv| < |g_psf_orig|``. This end-to-end guard catches a + ``psf_res`` / ``psf_orig_res`` transposition in :func:`do_ngmix_metacal` + (the two families share keys, so a swap is otherwise silent). + """ + _, psf_reconv, psf_orig = _do_ngmix_metacal_on_psf((0.05, -0.04)) + + assert psf_reconv["T_psf"] > psf_orig["T_psf"] + assert np.hypot(*psf_reconv["g_psf"]) < np.hypot(*psf_orig["g_psf"]) + + def test_weight_map_recovers_injected_inverse_variance(): """A supplied inverse-variance map must not be renormalized twice.""" from shapepipe.modules.ngmix_package.ngmix import prepare_ngmix_weights diff --git a/tests/module/test_ngmix_weight_validation.py b/tests/module/test_ngmix_weight_validation.py index 243cbe837..abc908fd2 100644 --- a/tests/module/test_ngmix_weight_validation.py +++ b/tests/module/test_ngmix_weight_validation.py @@ -41,6 +41,7 @@ import pytest from shapepipe.modules.ngmix_package.ngmix import ( + PSF_NOISE, Postage_stamp, do_ngmix_metacal, get_prior, @@ -145,7 +146,9 @@ def fit_metacal(gal, psf_im, bkg_rms, seed): stamp.jacobs = [WCS.jacobian()] if bkg_rms is not None: stamp.bkg_rms = [bkg_rms] - res, _ = do_ngmix_metacal(stamp, get_prior(PIX_SCALE, rng), GAL_FLUX, rng) + res, _, _ = do_ngmix_metacal( + stamp, get_prior(PIX_SCALE, rng), GAL_FLUX, rng + ) return res["noshear"] @@ -316,3 +319,114 @@ def test_metacal_ivar_beats_binary_under_noise_gradient( f"metacal ivar/binary shape-scatter ratio {ratio:.3f} >= 0.9: the" " rms-weight advantage does not survive the metacal/fixnoise path" ) + + +# --------------------------------------------------------------------------- +# PSF-observation weight (#749 / #774) +# +# make_ngmix_observation built the PSF observation weightless, so ngmix +# defaulted to unit weight on the unit-flux PSF stamp and the galaxy +# GPriorBA(0.4) prior shared with the PSF fitter swamped the likelihood. The +# recovered PSF *shape* then collapsed toward the prior (zero ellipticity). +# With the column grammar (#761) this corrupts the *_psf_orig diagnostic +# columns, fit from the original image PSF by average_original_psf. A finite +# flat weight = 1/PSF_NOISE**2 restores the likelihood. The fix touches only +# the diagnostic original-PSF fit (run with the galaxy prior); metacal fits the +# same psf_obs with a prior-free AdmomFitter, which is insensitive to a flat +# weight's scale, so the calibrated shear is unchanged. +# --------------------------------------------------------------------------- + +PSF_SHEAR = (0.05, -0.03) # injected PSF ellipticity for the recovery test + + +def _psf_orig_via_metacal(psf_noise, psf_shear=PSF_SHEAR, seed=7): + """Run the real production path and return (HSM truth, psf_orig, resdict). + + Builds a sheared-PSF stamp with the module's own ``make_data`` simulator, + pushes it through ``do_ngmix_metacal`` (which fits the original image PSF + via ``average_original_psf`` into the ``*_psf_orig`` columns), under a + given ``PSF_NOISE``. ``psf_noise=1.0`` reproduces the pre-fix unit weight. + """ + import shapepipe.modules.ngmix_package.ngmix as ngm + from shapepipe.testing.simulate import make_data + + old = ngm.PSF_NOISE + ngm.PSF_NOISE = psf_noise + try: + rng = np.random.RandomState(seed) + prior = get_prior(PIX_SCALE, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), shear=(0.0, 0.0), noise=1e-5, + n_epochs=2, img_size=51, psf_shear=psf_shear, + ) + hsm = galsim.hsm.FindAdaptiveMom(galsim.Image(psfs[0], scale=PIX_SCALE)) + g_truth = np.array([hsm.observed_shape.g1, hsm.observed_shape.g2]) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + (stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs) = ( + gals, psfs, weights, flags, jacobs, + ) + resdict, _psf_reconv, psf_orig = do_ngmix_metacal( + stamp, prior, 1.0, rng, + ) + return g_truth, psf_orig, resdict + finally: + ngm.PSF_NOISE = old + + +def test_make_ngmix_observation_psf_obs_is_weighted(): + """Structural guard: psf_obs carries the finite 1/PSF_NOISE**2 weight. + + The whole #749 fix is this weight; a regression to the weightless build + (unit weight) is what we must never ship again. + """ + from shapepipe.testing.simulate import make_data + + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), shear=(0.0, 0.0), noise=1e-5, + n_epochs=1, img_size=51, psf_shear=PSF_SHEAR, + ) + obs = make_ngmix_observation( + gals[0], weights[0], flags[0], psfs[0], jacobs[0], + np.random.RandomState(0), + ) + npt.assert_allclose(obs.psf.weight, 1.0 / PSF_NOISE ** 2) + assert (obs.psf.weight > 1.0).all(), "PSF obs fell back to unit weight" + + +def test_psf_weight_recovers_original_psf_ellipticity(): + """#749 done-condition, end to end: *_psf_orig recovers the PSF shape. + + Through the real ``do_ngmix_metacal`` / ``average_original_psf`` path, the + original-PSF ellipticity matches the injected PSF moment shape with the + weight, and collapses toward the prior (zero) without it — the contrast + that proves the weight is load-bearing for the diagnostic columns. + """ + g_truth, psf_orig_w, _ = _psf_orig_via_metacal(PSF_NOISE) + _, psf_orig_u, _ = _psf_orig_via_metacal(1.0) # pre-fix unit weight + + # The fix: original-PSF column recovers the PSF moment shape. + npt.assert_allclose(psf_orig_w["g_psf"], g_truth, atol=1e-3) + + # The bug it cures: weightless collapses the shape toward the prior. + assert np.linalg.norm(psf_orig_u["g_psf"]) < 0.1 * np.linalg.norm(g_truth), ( + "weightless original-PSF fit did not collapse -- the #749 contrast is" + " gone, so this test no longer proves the weight is load-bearing" + ) + + +def test_psf_weight_leaves_metacal_shear_invariant(): + """The fix must not move the calibration: metacal shear flat in PSF_NOISE. + + Metacal fits the same psf_obs with a prior-free AdmomFitter (then dilates + to a round reconvolution kernel); a prior-free moment fit is insensitive to + a flat weight's absolute scale, so every metacal-type shear is invariant to + the diagnostic weight. Asserted bit-for-bit between the unit weight + (pre-fix) and the shipped PSF_NOISE. + """ + _, _, res_w = _psf_orig_via_metacal(PSF_NOISE) + _, _, res_u = _psf_orig_via_metacal(1.0) + for mtype in ("noshear", "1p", "1m", "2p", "2m"): + npt.assert_allclose( + res_w[mtype]["g"], res_u[mtype]["g"], rtol=0, atol=1e-6, + err_msg=f"PSF-obs weight moved the metacal {mtype} shear", + ) diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py new file mode 100644 index 000000000..3afaa7bbe --- /dev/null +++ b/tests/module/test_psf_averaging_properties.py @@ -0,0 +1,355 @@ +"""PROPERTY-BASED TESTS FOR PSF EPOCH-AVERAGING + make_cat SENTINELS. + +The two PSF families this module exports (the metacal reconvolution kernel via +``average_multiepoch_psf`` and the original image PSF via +``average_original_psf``) share a single averaging core, +:func:`shapepipe.modules.ngmix_package.ngmix._average_psf_fits`. These +hypothesis properties pin the contract of that core directly — a weighted mean +over the surviving (``flags == 0``) epochs — and the companion sentinel-fill +contract on the make_cat reader, which must leave an obj_id absent from the +ngmix catalogue at its type-specific sentinel. + +Strategies are kept in physically sensible ranges (positive epoch weights, +positive sizes ``T``, ``|g| < 1``) so every generated input is a valid PSF-fit +result the averaging core would actually be handed in production. +""" + +import os +import tempfile +from itertools import zip_longest + +import numpy as np +import numpy.testing as npt +import pytest +from astropy.io import fits +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue +from shapepipe.modules.ngmix_package.ngmix import _average_psf_fits + + +# --------------------------------------------------------------------------- # +# Strategies for one valid per-epoch PSF-fit result. +# --------------------------------------------------------------------------- # + +# Bounded, non-degenerate floats: positive sizes, |g| < 1, strictly positive +# weights. min_magnitude keeps weights away from 0 so wsum can never vanish on +# the surviving epochs (the core raises ZeroDivisionError there by contract). +_g_component = st.floats(min_value=-0.9, max_value=0.9, allow_nan=False) +_positive_T = st.floats(min_value=1e-3, max_value=10.0, allow_nan=False) +_positive_err = st.floats(min_value=1e-6, max_value=1.0, allow_nan=False) +_weight = st.floats( + min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False +) + + +def _result(g1, g2, t, g1_err, g2_err, t_err, flags=0): + """One ngmix PSF-fit result dict, as the averaging core consumes it.""" + return { + "flags": flags, + "g": np.array([g1, g2]), + "g_err": np.array([g1_err, g2_err]), + "T": t, + "T_err": t_err, + } + + +@st.composite +def _good_epoch(draw): + """A surviving (flags == 0) epoch paired with its positive weight.""" + return ( + _result( + draw(_g_component), draw(_g_component), draw(_positive_T), + draw(_positive_err), draw(_positive_err), draw(_positive_err), + ), + draw(_weight), + ) + + +# --------------------------------------------------------------------------- # +# (a) weighted average lies within [min, max] of the per-epoch values. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given(st.lists(_good_epoch(), min_size=1, max_size=8)) +def test_average_lies_within_per_epoch_range(epochs): + """A positive-weight weighted mean is a convex combination of its inputs. + + For every averaged component (g1, g2, T) the result must sit within the + [min, max] envelope of the per-epoch values. This is the defining property + of a weighted mean with strictly positive weights; it fails immediately if + the core ever divided by the wrong weight sum, summed unweighted, or let a + value escape the convex hull. + """ + out = _average_psf_fits(epochs) + + g1_vals = np.array([r["g"][0] for r, _ in epochs]) + g2_vals = np.array([r["g"][1] for r, _ in epochs]) + t_vals = np.array([r["T"] for r, _ in epochs]) + + # rtol absorbs only floating-point round-off, not a real bound violation. + npt.assert_array_less(out["g_psf"][0], g1_vals.max() + 1e-9) + npt.assert_array_less(g1_vals.min() - 1e-9, out["g_psf"][0]) + npt.assert_array_less(out["g_psf"][1], g2_vals.max() + 1e-9) + npt.assert_array_less(g2_vals.min() - 1e-9, out["g_psf"][1]) + npt.assert_array_less(out["T_psf"], t_vals.max() + 1e-9) + npt.assert_array_less(t_vals.min() - 1e-9, out["T_psf"]) + + assert out["n_epoch"] == len(epochs) + + +@settings(deadline=None, max_examples=50) +@given(st.lists(_good_epoch(), min_size=1, max_size=8)) +def test_average_matches_explicit_weighted_mean(epochs): + """The core's output equals the textbook weighted mean of the survivors. + + Stronger than the [min, max] envelope: pins the exact value, so a swap of + weighting factor or an off-by-one in the accumulation is caught. + """ + out = _average_psf_fits(epochs) + w = np.array([weight for _, weight in epochs]) + + g = np.array([r["g"] for r, _ in epochs]) + t = np.array([r["T"] for r, _ in epochs]) + npt.assert_allclose(out["g_psf"], (g * w[:, None]).sum(0) / w.sum()) + npt.assert_allclose(out["T_psf"], (t * w).sum() / w.sum()) + + +# --------------------------------------------------------------------------- # +# (b) epochs with flags != 0 are excluded from the average. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given( + st.lists(_good_epoch(), min_size=1, max_size=6), + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + min_size=1, + max_size=6, + ), +) +def test_flagged_epochs_are_excluded(good_epochs, flagged_specs): + """A failed-PSF epoch (flags != 0) must not enter the average at all. + + Each flagged epoch carries poisoned NaN measurement fields and a huge, + out-of-range T — values that would wreck the mean (NaN-poison it, or shove + it far outside the good-epoch envelope) if they leaked in. The averaged + result and n_epoch must match a clean average over the good epochs only, + proving the flagged ones were dropped, not merely down-weighted. + """ + flagged = [ + ( + _result( + np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=flags + ), + weight, + ) + for weight, flags in flagged_specs + ] + # Interleave good and flagged epochs WITHOUT duplicating any good epoch, so + # exclusion can't be a happy accident of ordering. itertools.zip_longest + # threads them together; the leftover tail of the longer list follows. + mixed = [ + e + for pair in zip_longest(good_epochs, flagged) + for e in pair + if e is not None + ] + + out = _average_psf_fits(mixed) + expected = _average_psf_fits(good_epochs) + + npt.assert_allclose(out["g_psf"], expected["g_psf"]) + npt.assert_allclose(out["T_psf"], expected["T_psf"]) + npt.assert_allclose(out["T_psf_err"], expected["T_psf_err"]) + assert out["n_epoch"] == len(good_epochs) + assert np.isfinite(out["g_psf"]).all() and np.isfinite(out["T_psf"]) + + +# --------------------------------------------------------------------------- # +# (c) a single surviving epoch returns that epoch's value. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given( + _good_epoch(), + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + max_size=5, + ), +) +def test_single_survivor_returns_its_own_value(survivor, flagged_specs): + """When exactly one epoch survives, its values pass through untouched. + + The lone survivor's weight cancels in mean = (v*w)/w, so the result must be + the survivor's value exactly, regardless of how many flagged epochs (with + arbitrary weights) surround it. n_epoch must be 1. + """ + result, weight = survivor + flagged = [ + (_result(np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=f), w) + for w, f in flagged_specs + ] + out = _average_psf_fits(flagged + [survivor] + flagged) + + npt.assert_allclose(out["g_psf"], result["g"]) + npt.assert_allclose(out["g_psf_err"], result["g_err"]) + npt.assert_allclose(out["T_psf"], result["T"]) + npt.assert_allclose(out["T_psf_err"], result["T_err"]) + assert out["n_epoch"] == 1 + + +@given( + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + min_size=1, max_size=5, + ) +) +@settings(deadline=None, max_examples=25) +def test_all_epochs_failed_raises_zero_division(flagged_specs): + """All-epochs-failed is the contract that wsum == 0 raises, not returns 0/NaN. + + An object whose PSF fit failed on every epoch reaches both PSF-family + wrappers (reconv and orig) with zero survivors, so ``wsum`` stays 0. The + core must raise ``ZeroDivisionError`` rather than silently divide — a + regression returning NaN/0 would be caught here. Every input epoch is + flagged (``flags != 0``) with an arbitrary positive weight, so no survivor + can rescue the sum. + """ + flagged = [ + (_result(np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=f), w) + for w, f in flagged_specs + ] + with pytest.raises(ZeroDivisionError): + _average_psf_fits(flagged) + + +# --------------------------------------------------------------------------- # +# (d) make_cat: an obj_id absent from the ngmix cat keeps its sentinel fill. +# --------------------------------------------------------------------------- # + +# The sentinel value each column family is pre-filled with before the matched +# rows are overwritten (mirrors make_cat._save_ngmix_data). The property: a row +# whose obj_id never appears among the ngmix ids keeps exactly these. +_SENTINELS = { + "NGMIX_T_NOSHEAR": 0.0, + "NGMIX_SNR_NOSHEAR": 0.0, + "NGMIX_FLAGS_NOSHEAR": 0.0, + "NGMIX_T_PSF_ORIG_NOSHEAR": 0.0, + "NGMIX_T_PSF_RECONV_NOSHEAR": 0.0, + "NGMIX_FLUX_ERR_NOSHEAR": -1.0, + "NGMIX_MAG_ERR_NOSHEAR": -1.0, + "NGMIX_G1_NOSHEAR": -10.0, + "NGMIX_G2_NOSHEAR": -10.0, + "NGMIX_G1_PSF_ORIG_NOSHEAR": -10.0, + "NGMIX_G1_PSF_RECONV_NOSHEAR": -10.0, + "NGMIX_T_ERR_NOSHEAR": 1e30, + "NGMIX_T_ERR_PSF_ORIG_NOSHEAR": 1e30, + "NGMIX_T_ERR_PSF_RECONV_NOSHEAR": 1e30, + "NGMIX_N_EPOCH": 0.0, + "NGMIX_MCAL_FLAGS": 0.0, + "NGMIX_MCAL_TYPES_FAIL": 0.0, +} + +# Per-key write format and a measured value distinct from every sentinel, so a +# matched row is unmistakably "overwritten" and an absent row unmistakably not. +_NGMIX_KEYS = [ + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", + "g1", "g1_err", "g2", "g2_err", "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] +_INT_KEYS = { + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", "flags", "mcal_flags" +} +_SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] + + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +def _measured_row(obj_id): + """One fit object whose every value is far from any sentinel (5 / 0.5).""" + row = {key: (5 if key in _INT_KEYS else 0.5) for key in _NGMIX_KEYS} + row["id"] = obj_id + return row + + +def _write_ngmix_cat(path, obj_ids): + rows = [_measured_row(oid) for oid in obj_ids] + hdus = [fits.PrimaryHDU()] + for ext in _SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in _INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in _NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_id): + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_id) + inst._output_dict = {} + inst._cat_size_target = len(inst._obj_id) + inst._w_log = _NullLogger() + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +# Distinct positive integer obj_ids; split into "fit by ngmix" vs "absent". +_distinct_ids = st.lists( + st.integers(min_value=1, max_value=10_000), + min_size=2, + max_size=8, + unique=True, +) + + +@settings(deadline=None, max_examples=25) +@given(_distinct_ids, st.data()) +def test_absent_obj_id_keeps_sentinel_fill(all_ids, data): + """An obj_id SExtractor saw but ngmix never fit keeps every sentinel. + + The final catalogue carries all of ``all_ids``; ngmix fit only a non-empty + proper subset. The unfit rows must retain the exact per-column sentinel + (0 / -10 / 1e30 / -1), while the fit rows must have been overwritten to the + measured value — so this is not vacuously satisfied by an all-sentinel cat. + """ + fit_ids = data.draw( + st.lists(st.sampled_from(all_ids), min_size=1, unique=True).filter( + lambda s: 0 < len(s) < len(all_ids) + ) + ) + absent_ids = [oid for oid in all_ids if oid not in fit_ids] + + with tempfile.TemporaryDirectory() as tmp: + ngmix_path = os.path.join(tmp, "ngmix.fits") + _write_ngmix_cat(ngmix_path, fit_ids) + out = _run_save_ngmix(ngmix_path, all_ids) + + absent_idx = [all_ids.index(oid) for oid in absent_ids] + fit_idx = [all_ids.index(oid) for oid in fit_ids] + + for col, sentinel in _SENTINELS.items(): + arr = np.asarray(out[col]) + npt.assert_allclose( + arr[absent_idx], sentinel, + err_msg=f"{col}: absent rows lost their sentinel {sentinel}", + ) + # The fit rows were overwritten — measured value 5 (int cols) or 0.5, + # both distinct from every sentinel, so the fill wasn't global. + assert not np.any(np.isclose(arr[fit_idx], sentinel)), ( + f"{col}: a fit row still carries the sentinel {sentinel}" + ) diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py new file mode 100644 index 000000000..593614a1c --- /dev/null +++ b/tests/module/test_psf_grammar_properties.py @@ -0,0 +1,461 @@ +"""PROPERTY-BASED TESTS FOR THE NGMIX PSF COLUMN GRAMMAR. + +Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under +hypothesis to pin three invariants of the renamed grammar +``NGMIX[m]_[_ERR][_]_`` (shapepipe#749, #761): the +galaxy is the implicit default object and carries NO ``OBJECT`` token +(``NGMIX_G1_NOSHEAR``, never ``NGMIX_G1_GAL_NOSHEAR``), plus the three +OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, +``NGMIX_MCAL_TYPES_FAIL``), where the ORIGINAL image PSF (``PSF_ORIG``) and the +metacal RECONVOLUTION kernel (``PSF_RECONV``) are independent fits of +*different* PSFs: + +(a) un-aliasing holds for ALL inputs: for arbitrary distinct values fed as the + orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column + traces to its orig source and every ``*_PSF_RECONV_*`` to its reconv + source, and the two families are never crossed; +(b) every column name the writer emits matches the grammar regex (the OBJECT + group is optional, so a regressed ``GAL`` token is rejected, not + absorbed); +(c) every ``NGMIX_*`` token the shipped param file + (``example/cfis/final_cat.param``) names is a column the writer can + produce — writer/param-file consistency; +(d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ + OBJECT[_metacaltype]`` — holds across both estimator families: ngmix + splits ellipticity into ``G1``/``G2`` (g-type), HSM into ``E1``/``E2`` + (e-type); every family stores exactly one size, ``T``; HSM keeps the + singular ``FLAG`` token, ngmix keeps plural ``FLAGS``. This part is a + static example-based check (not writer-driven), so it covers HSM without + depending on its producer module — see ``test_psfex_interp.py`` for the + writer-driven coverage of that family. + +Setup for (a)-(c) mirrors ``tests/module/test_make_cat.py``: a synthetic +ngmix FITS with the five shear-type extensions carrying the exact +``compile_results`` key set, read back through ``_save_ngmix_data``. +""" + +import re +from pathlib import Path + +import numpy as np +import numpy.testing as npt +import pytest +from astropy.io import fits +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue + + +# --------------------------------------------------------------------------- +# Harness (mirrors tests/module/test_make_cat.py) +# --------------------------------------------------------------------------- + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] + +# The full per-object key set ngmix.compile_results emits into every +# shear-type extension. Integer-typed keys carry FITS format "K", the rest +# "D"; everything else mirrors test_make_cat._ngmix_row. +INT_KEYS = { + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", "flags", "mcal_flags", +} +NGMIX_KEYS = [ + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", + "g1", "g1_err", "g2", "g2_err", + "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", + "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", + "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", + "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] + +# The six orig-PSF source keys and the six reconv-PSF source keys, paired by +# the output column they feed. Each pair is (output-column infix, source key); +# the infix slots into f"NGMIX_{infix}_{shear}". Holding this mapping +# explicitly is what makes the un-aliasing property check each column against +# its OWN source rather than against a single family-wide value, so a within- +# family swap (e.g. G1 <-> T, or value <-> error) is caught too. +ORIG_COLS = { + "G1_PSF_ORIG": "g1_psf_orig", + "G2_PSF_ORIG": "g2_psf_orig", + "G1_ERR_PSF_ORIG": "g1_err_psf_orig", + "G2_ERR_PSF_ORIG": "g2_err_psf_orig", + "T_PSF_ORIG": "T_psf_orig", + "T_ERR_PSF_ORIG": "T_err_psf_orig", +} +RECONV_COLS = { + "G1_PSF_RECONV": "g1_psf_reconv", + "G2_PSF_RECONV": "g2_psf_reconv", + "G1_ERR_PSF_RECONV": "g1_err_psf_reconv", + "G2_ERR_PSF_RECONV": "g2_err_psf_reconv", + "T_PSF_RECONV": "T_psf_reconv", + "T_ERR_PSF_RECONV": "T_err_psf_reconv", +} + + +def _base_row(obj_id): + """One object's per-key values; PSF keys overwritten by the caller.""" + return { + "id": obj_id, + "n_epoch_model": 3, "mcal_types_fail": 0, "nfev_fit": 7, + "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, + "T": 0.30, "T_err": 0.033, + "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, + "mag": 21.0, "mag_err": 0.05, + "flags": 0, "mcal_flags": 0, + "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, + "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, + "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, + "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, + "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, + "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, + } + + +def _write_ngmix_cat(path, rows): + """Write a synthetic ngmix FITS with the five shear-type extensions. + + Each extension carries the exact compile_results key set; PSF families are + object-level, so the same per-object values are written across extensions. + """ + hdus = [fits.PrimaryHDU()] + for ext in SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): + """Drive _save_ngmix_data and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_ids) + inst._output_dict = {} + inst._cat_size_target = ( + len(inst._obj_id) if cat_size_target is None else cat_size_target + ) + inst._w_log = _NullLogger() + + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +# --------------------------------------------------------------------------- +# Grammar regex + producible-column model +# --------------------------------------------------------------------------- + +# NGMIX[m]_[_ERR]__, plus the three per-object +# metadata columns that carry neither OBJECT nor SHEAR. Anchored so a stray +# legacy token (_PSFo, _Tpsf, NGMIX_ELL_*) fails to match. +# Galaxy is the implicit default object — no token (shapepipe#761), so the +# OBJECT group is OPTIONAL: ``NGMIX_G1_NOSHEAR`` (galaxy, no object token) +# matches alongside ``NGMIX_G1_PSF_ORIG_NOSHEAR`` (explicit PSF object). This +# also makes the regex auto-reject any future ``GAL``-token regression: a +# stray ``_GAL_`` between COMPONENT and SHEAR cannot be absorbed by the +# optional group (``GAL`` is not one of its alternatives) and there is no +# other slot for it to land in. +_COMPONENT = "G1|G2|T|SNR|FLUX|MAG|FLAGS" +_OBJECT = "PSF_ORIG|PSF_RECONV" +_SHEAR = "NOSHEAR|1P|1M|2P|2M" +GRAMMAR_RE = re.compile( + rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?(?:_(?:{_OBJECT}))?_(?:{_SHEAR})$" + rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" +) + +# The shipped final-catalogue param files, two levels up from tests/module/. +# Both are consumer contracts updated to the new grammar, so both are checked. +_EXAMPLE = Path(__file__).resolve().parents[2] / "example" +PARAM_PATHS = [ + _EXAMPLE / "cfis" / "final_cat.param", + _EXAMPLE / "unions_800" / "cat_matched.param", +] + +# The one param-file NGMIX token outside the _save_ngmix_data grammar: the +# moments-failure flag, set by a different code path (not the metacal writer). +# Named explicitly so the consistency check stays honest — it is excluded by +# name, not silently dropped, and the test asserts it really is absent from the +# producible set so this carve-out can never mask a real grammar regression. +NON_WRITER_PARAM_TOKENS = {"NGMIX_MOM_FAIL"} + + +def _param_ngmix_tokens(param_path): + """Every distinct NGMIX_* token named in a shipped param file.""" + text = param_path.read_text() + return set(re.findall(r"\bNGMIX[A-Za-z0-9_]*", text)) + + +def _produced_columns(obj_ids): + """The set of column names the writer emits for these obj_ids.""" + rows = [_base_row(oid) for oid in obj_ids] + import tempfile + with tempfile.NamedTemporaryFile(suffix=".fits", delete=False) as fh: + path = fh.name + _write_ngmix_cat(path, rows) + out = _run_save_ngmix(path, obj_ids) + Path(path).unlink() + return set(out) + + +# --------------------------------------------------------------------------- +# Strategies (physically-sensible ranges so generated inputs are valid) +# --------------------------------------------------------------------------- + +# Distinct, finite object NUMBERs. +obj_ids_strategy = st.lists( + st.integers(min_value=1, max_value=10_000), + min_size=1, max_size=5, unique=True, +) + +# A finite, well-separated value: ellipticity-/size-scale magnitudes the writer +# copies verbatim. Bounded away from zero and from each other (see below). +_finite = st.floats( + min_value=-1.0, max_value=1.0, + allow_nan=False, allow_infinity=False, +).filter(lambda x: abs(x) > 1e-6) + + +# --------------------------------------------------------------------------- +# (a) Un-aliasing for ALL inputs +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given( + orig_seed=_finite, + reconv_seed=_finite, + obj_ids=obj_ids_strategy, +) +def test_psf_orig_reconv_unaliased_for_all_distinct_sources( + orig_seed, reconv_seed, obj_ids, tmp_path_factory +): + """Orig/reconv PSF columns trace to their OWN source, never crossed (#749). + + For arbitrary distinct seeds, fill the six orig-PSF source keys with values + derived from ``orig_seed`` and the six reconv-PSF source keys from + ``reconv_seed`` — each of the twelve keys a DISTINCT value (seed times a + per-key multiplier). Then assert every emitted ``*_PSF_ORIG_*`` / + ``*_PSF_RECONV_*`` column equals the value of the exact source key it must + read. A family swap (orig<->reconv) OR a within-family swap (G1<->T, + value<->error) breaks at least one equality, so the property is non-vacuous. + """ + # Multipliers so each of the six keys per family gets its own value; the + # two families also never collide because orig_seed != reconv_seed is + # enforced below and the multiplier set is shared. + mult = { + "g1": 1.0, "g2": 1.3, "g1_err": 1.7, "g2_err": 2.1, + "T": 2.6, "T_err": 3.1, + } + # Guard against orig_seed and reconv_seed mapping any key to equal values + # (would make a cross undetectable for that key). assume() would discard; + # instead nudge reconv so the families are provably disjoint per key. + if abs(orig_seed - reconv_seed) < 1e-3: + reconv_seed = reconv_seed + (0.5 if reconv_seed < 0 else -0.5) + + def _src_val(suffix, family_seed): + # suffix e.g. "g1", "g1_err", "T"; matched to a multiplier by stripping + # the "_psf_orig"/"_psf_reconv" tail the caller already removed. + return family_seed * mult[suffix] + + rows = [] + for oid in obj_ids: + row = _base_row(oid) + for col_infix, src_key in ORIG_COLS.items(): + suffix = src_key.replace("_psf_orig", "") + row[src_key] = _src_val(suffix, orig_seed) + for col_infix, src_key in RECONV_COLS.items(): + suffix = src_key.replace("_psf_reconv", "") + row[src_key] = _src_val(suffix, reconv_seed) + rows.append(row) + + ngmix_path = tmp_path_factory.mktemp("ng") / "ngmix.fits" + _write_ngmix_cat(ngmix_path, rows) + out = _run_save_ngmix(ngmix_path, obj_ids) + + n = len(obj_ids) + for shear in SHEAR_EXTS: + for col_infix, src_key in ORIG_COLS.items(): + suffix = src_key.replace("_psf_orig", "") + npt.assert_allclose( + out[f"NGMIX_{col_infix}_{shear}"], + [_src_val(suffix, orig_seed)] * n, + err_msg=f"NGMIX_{col_infix}_{shear} not from {src_key}", + ) + for col_infix, src_key in RECONV_COLS.items(): + suffix = src_key.replace("_psf_reconv", "") + npt.assert_allclose( + out[f"NGMIX_{col_infix}_{shear}"], + [_src_val(suffix, reconv_seed)] * n, + err_msg=f"NGMIX_{col_infix}_{shear} not from {src_key}", + ) + # The two families are genuinely distinct everywhere — the un-aliasing. + assert not np.allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + ) + assert not np.allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], + out[f"NGMIX_T_PSF_RECONV_{shear}"], + ) + + +# --------------------------------------------------------------------------- +# (b) Every emitted column name matches the grammar +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy) +def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): + """Every column _save_ngmix_data emits matches the grammar regex. + + The grammar is ``NGMIX[m]_[_ERR]__`` plus the + three metadata columns. The regex is anchored, so a legacy token (``_PSFo``, + ``_Tpsf``, ``NGMIX_ELL_*``) or a malformed name would fail to match. Run + over hypothesis-varied object sets to confirm the emitted name set is + input-independent and always well-formed. + """ + rows = [_base_row(oid) for oid in obj_ids] + ngmix_path = tmp_path_factory.mktemp("ng") / "ngmix.fits" + _write_ngmix_cat(ngmix_path, rows) + out = _run_save_ngmix(ngmix_path, obj_ids) + + assert out, "writer produced no columns" + for col in out: + assert GRAMMAR_RE.match(col), f"off-grammar column emitted: {col!r}" + # Galaxy is the implicit default object — a regressed GAL token would + # match neither the optional OBJECT group nor any other slot, but + # assert it directly so the failure message names the real defect. + assert "_GAL_" not in col and not col.endswith("_GAL"), col + + +# --------------------------------------------------------------------------- +# (c) Param-file tokens are all producible by the writer +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "param_path", PARAM_PATHS, ids=lambda p: f"{p.parent.name}/{p.name}" +) +@settings(deadline=None, max_examples=15) +@given(obj_ids=obj_ids_strategy) +def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): + """Every NGMIX_* token the param file names is a column the writer produces. + + Each shipped final-catalogue param file (``example/cfis/final_cat.param`` + and ``example/unions_800/cat_matched.param``) is a consumer contract for the + final catalogue; ``create_final_cat`` keeps only the listed columns, so a + token it names that the writer cannot emit is a silent, empty column + downstream. Both files are checked so a future divergence in either (a + typo'd or stale NGMIX token) cannot escape the consistency check. The one + known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a + different path — is excluded BY NAME, and we assert it is genuinely outside + the producible set so the carve-out cannot hide a real grammar regression. + + Driven over varied object sets to confirm the producible set is stable + across inputs (the contract is structural, not data-dependent). + """ + produced = _produced_columns(obj_ids) + param_tokens = _param_ngmix_tokens(param_path) + + # The carve-out is real: the excluded token is NOT producible. (If a future + # change made the writer emit it, this fails and we drop the exclusion.) + for tok in NON_WRITER_PARAM_TOKENS: + assert tok not in produced, ( + f"{tok} is now producible — remove it from NON_WRITER_PARAM_TOKENS" + ) + + missing = (param_tokens - NON_WRITER_PARAM_TOKENS) - produced + assert not missing, ( + f"param file names NGMIX columns the writer cannot produce: " + f"{sorted(missing)}" + ) + + # Non-vacuous: the writer DOES cover real grammar tokens from the param + # file (so the assertion above is exercised against a non-empty set). + assert (param_tokens - NON_WRITER_PARAM_TOKENS) & produced + + +# --------------------------------------------------------------------------- +# (d) The FULL frozen grammar, across both estimator families +# --------------------------------------------------------------------------- +# +# ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_metacaltype]`` (shapepipe#761). This +# section encodes the grammar itself as a static property — example column +# names, not a driven writer — so it covers ngmix and HSM together without +# depending on the HSM producer module (covered by its own writer-driving +# test, ``test_psfex_interp.py``). What it pins: +# +# - galaxy is the implicit default object, no token, for the g-type ngmix +# writer; +# - ellipticity is split into named scalar components, never a packed +# 2-vector: ``G1``/``G2`` for g-type (ngmix), ``E1``/``E2`` for e-type +# (HSM); +# - exactly one stored size, ``T`` (no ``SIGMA``/``FWHM``/``r50``); +# - HSM keeps the singular ``FLAG`` token; ngmix keeps plural ``FLAGS``. + +FROZEN_GRAMMAR_RE = re.compile( + r"^(?:" + # ngmix: galaxy (no object token) or explicit PSF_ORIG/PSF_RECONV. + r"NGMIXm?_(?:G1|G2|T|SNR|FLUX|MAG|FLAGS)(?:_ERR)?" + r"(?:_(?:PSF_ORIG|PSF_RECONV))?_(?:NOSHEAR|1P|1M|2P|2M)" + r"|NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)" + # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch + # sink in make_cat._save_psf_data appends a bare epoch index. + r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" + r"|HSM_FLAG_(?:PSF|STAR)(?:_\d+)?" + r")$" +) + +# Representative columns the new grammar PRODUCES — one or more per family, +# chosen to exercise every rule above (GAL-drop, G1/G2 vs E1/E2, single T, +# FLAG vs FLAGS). +_GRAMMAR_VALID_EXAMPLES = [ + "NGMIX_G1_NOSHEAR", # galaxy, no object token + "NGMIX_T_ERR_PSF_ORIG_1M", + "NGMIX_FLAGS_2P", + "NGMIXm_G1_PSF_RECONV_NOSHEAR", + "NGMIX_MCAL_FLAGS", + "HSM_E1_PSF", + "HSM_T_STAR", + "HSM_FLAG_PSF", + "HSM_E1_PSF_3", # make_cat multi-epoch sink + "HSM_T_PSF_2", + "HSM_FLAG_STAR_1", +] + +# Pre-#761 / off-grammar columns the rename REMOVES — each violates exactly +# one rule, named so a failure here points at the specific regression. +_GRAMMAR_INVALID_EXAMPLES = [ + "NGMIX_G1_GAL_NOSHEAR", # GAL token regression + "NGMIX_ELL_NOSHEAR", # packed ellipticity + "E1_PSF_HSM", # pre-rename HSM naming (not HSM_-prefixed) + "SIGMA_PSF_HSM", # raw sigma, not T + "HSM_SIGMA_PSF", # stored sigma instead of T + "HSM_FLAGS_PSF", # plural — HSM is singular FLAG + "SPREAD_MODEL", # removed entirely, not renamed +] + + +@pytest.mark.parametrize("col", _GRAMMAR_VALID_EXAMPLES) +def test_frozen_grammar_accepts_new_grammar_examples(col): + """One representative column per family/rule matches the frozen grammar.""" + assert FROZEN_GRAMMAR_RE.match(col), f"should match grammar: {col!r}" + + +@pytest.mark.parametrize("col", _GRAMMAR_INVALID_EXAMPLES) +def test_frozen_grammar_rejects_legacy_examples(col): + """Each pre-#761 / removed column is rejected, not silently absorbed.""" + assert not FROZEN_GRAMMAR_RE.match(col), f"should NOT match grammar: {col!r}" diff --git a/tests/module/test_psf_noop_property.py b/tests/module/test_psf_noop_property.py new file mode 100644 index 000000000..98f47b2bf --- /dev/null +++ b/tests/module/test_psf_noop_property.py @@ -0,0 +1,183 @@ +"""PROPERTY-BASED TESTS: the original-PSF pre-fit is a science no-op. + +``average_original_psf`` fits each epoch's psfex/mccd PSF stamp +(``gal_obs.psf``) with the shared ``psf_runner`` to populate the independent +original-PSF columns (``NGMIX_*_PSF_ORIG``; shapepipe#749). It fits a *copy* of +each PSF observation, so the observation metacal later deep-copies and consumes +(``boot.go(gal_obs_list)``) is left untouched. + +Pristine ``gal_obs.psf`` is *one* of the two conditions for the add-column +refactor to be bit-identical to a no-original-fit branch on the galaxy/shear +results: it closes the PSF-aliasing channel, which this test pins. +``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the observation it +fits; were that ``gal_obs.psf`` itself, a stray gmix would survive metacal's +deep copy and be reused as the ``MetacalFitGaussPSF`` fallback when admom+ML +both fail, silently rescuing objects the base branch dropped +(``BootPSFFailure``). The *second* condition — the pre-fit not advancing the +RNG that metacal consumes — is ensured in ``do_ngmix_metacal`` by seeding the +pre-fit from a snapshot of the RNG state; together they leave the galaxy +results unchanged. + +The unit guard ``test_original_psf_prefit_leaves_gal_obs_psf_pristine`` in +``tests/module/test_ngmix.py`` pins this on one fixed input; here it is +generalised over varied PSF shapes, noise, epoch counts, stamp sizes, and RNG +seeds with hypothesis, so the no-op holds across the input space rather than at +a single point. The harness (``make_data`` -> ``make_ngmix_observation`` -> +``make_runners`` -> ``average_original_psf``) mirrors that unit test exactly. +""" + +import copy + +from hypothesis import given, settings +from hypothesis import strategies as st +import numpy as np +import numpy.testing as npt +from ngmix.observation import ObsList + +from shapepipe.modules.ngmix_package.ngmix import ( + average_original_psf, + get_prior, + make_ngmix_observation, + make_runners, +) +from shapepipe.testing.simulate import make_data + +PIXEL_SCALE = 0.1857 + + +def _jacobian_snapshot(jacobian): + """The six floats fully defining an ngmix Jacobian (affine + origin).""" + return ( + jacobian.dvdrow, jacobian.dvdcol, + jacobian.dudrow, jacobian.dudcol, + jacobian.row0, jacobian.col0, + ) + + +def _build_gal_obs_list(psf_shear, noise, n_epochs, img_size, data_seed, obs_seed): + """Build the per-epoch galaxy ObsList metacal would consume. + + Mirrors the construction in + ``test_original_psf_prefit_leaves_gal_obs_psf_pristine``: simulate epochs + with ``make_data`` (here over hypothesis-varied PSF shape, noise, epoch + count, stamp size, and seed), then wrap each through + ``make_ngmix_observation`` so each ``gal_obs.psf`` is a real, freshly built + PSF observation carrying no gmix. + """ + rng = np.random.RandomState(obs_seed) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(data_seed), + shear=(0.0, 0.0), + noise=noise, + n_epochs=n_epochs, + img_size=img_size, + psf_shear=psf_shear, + ) + gal_obs_list = ObsList() + for n_e in range(n_epochs): + gal_obs_list.append( + make_ngmix_observation( + gals[n_e], weights[n_e], flags[n_e], psfs[n_e], jacobs[n_e], + rng, + ) + ) + return gal_obs_list + + +# Physically sensible strategies: |g| well below 1 (Moffat shear must stay +# realisable), positive noise, >=1 epoch, odd stamp large enough to host the +# 0.55" FWHM PSF, and bounded RNG seeds. +psf_g_component = st.floats( + min_value=-0.3, max_value=0.3, allow_nan=False, allow_infinity=False +) +noise_strategy = st.floats( + min_value=1e-6, max_value=1e-3, allow_nan=False, allow_infinity=False +) +n_epochs_strategy = st.integers(min_value=1, max_value=3) +img_size_strategy = st.sampled_from([41, 51, 65]) +seed_strategy = st.integers(min_value=0, max_value=2**31 - 1) + + +@given( + g1=psf_g_component, + g2=psf_g_component, + noise=noise_strategy, + n_epochs=n_epochs_strategy, + img_size=img_size_strategy, + data_seed=seed_strategy, + obs_seed=seed_strategy, +) +@settings(deadline=None, max_examples=25) +def test_average_original_psf_leaves_gal_obs_psf_pristine_property( + g1, g2, noise, n_epochs, img_size, data_seed, obs_seed +): + """For arbitrary inputs, the original-PSF pre-fit leaves gal_obs.psf pristine. + + The no-op invariant that guarantees bit-identical galaxy/shear results: + after ``average_original_psf``, every epoch's ``gal_obs.psf`` is unchanged + in every channel metacal reads — + + * ``has_gmix()`` is still False (no fitted gmix leaked in), and no fit + ``result`` leaked into ``.meta``; + * the PSF ``image``, ``weight``, and ``jacobian`` are bit-for-bit what they + were before the call. + + Each assertion is load-bearing: were the pre-fit to fit ``gal_obs.psf`` + itself instead of a copy, ``PSFRunner.go`` would set ``.gmix`` / + ``.meta['result']`` (first two assertions catch that), and centroid/weight + handling inside the fit could perturb the arrays (the array/jacobian + snapshots catch that). The fit still *runs* on every epoch — this pins that + it runs harmlessly on a copy, not that it is skipped. + """ + psf_shear = (g1, g2) + rng = np.random.RandomState(obs_seed) + prior = get_prior(PIXEL_SCALE, rng) + gal_obs_list = _build_gal_obs_list( + psf_shear, noise, n_epochs, img_size, data_seed, obs_seed + ) + + # Pre-state: a freshly built PSF observation carries no gmix or result, and + # we snapshot every array channel metacal will consume. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list) + pre = [ + { + "image": obs.psf.image.copy(), + "weight": obs.psf.weight.copy(), + "jacobian": _jacobian_snapshot(obs.psf.jacobian), + "meta": copy.deepcopy(dict(obs.psf.meta)), + } + for obs in gal_obs_list + ] + + runner, psf_runner = make_runners(prior, 1.0, rng) + psf_orig_res = average_original_psf(gal_obs_list, psf_runner) + + # The pre-fit actually did work (so the no-op is non-vacuous): at least one + # epoch's PSF was fit and averaged in. + assert psf_orig_res["n_epoch"] >= 1 + + for obs, snapshot in zip(gal_obs_list, pre): + psf = obs.psf + # No fitted gmix leaked into the observation metacal consumes. + assert not psf.has_gmix(), ( + "original-PSF pre-fit leaked a gmix into gal_obs.psf" + ) + # No fit result leaked into its metadata. + assert "result" not in psf.meta, ( + "original-PSF pre-fit leaked a fit result into gal_obs.psf.meta" + ) + assert dict(psf.meta) == snapshot["meta"], ( + "original-PSF pre-fit mutated gal_obs.psf.meta" + ) + # The image, weight, and jacobian metacal reads are bit-for-bit intact. + npt.assert_array_equal( + psf.image, snapshot["image"], + err_msg="original-PSF pre-fit mutated gal_obs.psf.image", + ) + npt.assert_array_equal( + psf.weight, snapshot["weight"], + err_msg="original-PSF pre-fit mutated gal_obs.psf.weight", + ) + assert _jacobian_snapshot(psf.jacobian) == snapshot["jacobian"], ( + "original-PSF pre-fit mutated gal_obs.psf.jacobian" + ) diff --git a/tests/module/test_psf_physics_properties.py b/tests/module/test_psf_physics_properties.py new file mode 100644 index 000000000..b0feeab51 --- /dev/null +++ b/tests/module/test_psf_physics_properties.py @@ -0,0 +1,118 @@ +"""PROPERTY-BASED TESTS FOR NGMIX METACAL PSF PHYSICS. + +Hypothesis tests over *random elliptical PSF stamps*. The metacal +reconvolution kernel is round and dilated by construction, so for ANY input +PSF ellipticity / size it must come back rounder and larger than the original +image-PSF fit: + + T_reconv >= T_orig and |g_reconv| <= |g_orig|. + +This is a genuine metacal physics invariant (shapepipe#749) and, because the +two PSF families (``reconv`` -> ``average_multiepoch_psf``, ``orig`` -> +``average_original_psf``) share key names, it also catches any orig/reconv +routing transposition in :func:`do_ngmix_metacal` — a swap that is otherwise +silent. The single-example sibling lives in +``test_ngmix.test_reconv_psf_is_rounder_and_larger_than_orig_on_elliptical_psf``; +this widens it to a swept family of PSFs. +""" + +import numpy as np +from hypothesis import given, settings +from hypothesis import strategies as st + + +def _do_ngmix_metacal_on_psf(psf_shear, psf_fwhm=0.55, img_size=51, seed=7, + n_epochs=2): + """Run do_ngmix_metacal on a simulated stamp with a given PSF shape. + + Mirrors ``test_ngmix._do_ngmix_metacal_on_psf`` (the harness this file is + asked to reuse), exposing ``psf_fwhm`` so the PSF *size* can be swept too. + The galaxy stamp is drawn from a fixed seed (123); only the metacal RNG + varies with ``seed``. Returns ``(resdict, reconv, orig)`` — the + :class:`MetacalResult` NamedTuple, still unpacking positionally. + """ + from shapepipe.modules.ngmix_package.ngmix import ( + Postage_stamp, + do_ngmix_metacal, + get_prior, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(seed) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=n_epochs, + img_size=img_size, + psf_fwhm=psf_fwhm, + psf_shear=psf_shear, + ) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( + gals, + psfs, + weights, + flags, + jacobs, + ) + return do_ngmix_metacal(stamp, prior, 1.0, rng) + + +# Physically-sensible PSF strategies: ellipticity well inside |g| < 1 (real +# PSFs are mildly elliptical; a magnitude up to ~0.18 keeps the Moffat shear +# valid and the fit well-behaved), and a Moffat FWHM in arcsec spanning the +# realistic ground-based range. NB: the original-PSF fit uses the same +# (galaxy-shaped) prior as the galaxy fit, which shrinks the *recovered* +# |g_orig| to O(1e-4) even for input |g|~0.1 (the shapepipe#749 +# prior-domination) -- so the dilation (T) inequality, not the rounding (|g|) +# one, is the primary guard. +_g_complex = st.complex_numbers( + min_magnitude=0.02, max_magnitude=0.18, allow_nan=False, allow_infinity=False +) +_psf_fwhm = st.floats(min_value=0.5, max_value=0.7, allow_nan=False, + allow_infinity=False) + + +@settings(deadline=None, max_examples=25) +@given(g=_g_complex, psf_fwhm=_psf_fwhm) +def test_reconv_psf_rounder_and_larger_than_orig_over_random_psfs(g, psf_fwhm): + """For ANY elliptical PSF, the reconv kernel is rounder and larger. + + Sweeps the input PSF ellipticity ``(g1, g2)`` (magnitude 0.02-0.18, any + orientation) and the Moffat FWHM, runs the full :func:`do_ngmix_metacal`, + and asserts the metacal reconvolution kernel is round + dilated relative to + the independently-fit original image PSF: + + ``T_reconv >= T_orig`` and ``|g_reconv| <= |g_orig|``. + + The ``T`` inequality is the robust transposition catcher: the kernel is + dilated, so ``T_reconv`` exceeds ``T_orig`` by a clear margin (~0.01-0.03) + and a strict ``>`` is asserted below -- a transposition makes ``T_reconv`` + the smaller value and fails it hard. The ``|g|`` inequality corroborates but + with a thin margin: under the same (galaxy-shaped) prior as the galaxy fit + the recovered ``|g_orig|`` is only O(1e-4) (shapepipe#749 prior-domination), + so it is a secondary check. + """ + _, reconv, orig = _do_ngmix_metacal_on_psf( + (g.real, g.imag), psf_fwhm=psf_fwhm + ) + + g_reconv = np.hypot(*reconv["g_psf"]) + g_orig = np.hypot(*orig["g_psf"]) + + # Round: the reconvolution kernel carries less ellipticity than the + # original PSF it was built to dominate (tiny tolerance for fit noise). + assert g_reconv <= g_orig + 1e-6, ( + f"reconv |g|={g_reconv:.5f} not <= orig |g|={g_orig:.5f} " + f"(input PSF g=({g.real:.3f},{g.imag:.3f}), fwhm={psf_fwhm:.3f})" + ) + + # Larger: the reconvolution kernel is strictly dilated relative to the + # original PSF (observed dT ~0.01-0.03, far above fit noise); a transposition + # makes T_reconv the smaller value and fails this hard. + assert reconv["T_psf"] > orig["T_psf"], ( + f"reconv T={reconv['T_psf']:.5f} not dilated above orig T={orig['T_psf']:.5f} " + f"(input PSF g=({g.real:.3f},{g.imag:.3f}), fwhm={psf_fwhm:.3f})" + ) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index 34da52e00..5e133fda0 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -2,11 +2,15 @@ from types import SimpleNamespace +import galsim from astropy.io import fits import numpy as np import numpy.testing as npt import pytest +from hypothesis import assume, given, settings +from hypothesis import strategies as st +from shapepipe.pipeline import file_io from shapepipe.modules.psfex_interp_package.psfex_interp import ( PSFExInterpolator, ) @@ -32,6 +36,45 @@ def _write_galaxy_catalogue(path, **columns): ).writeto(path) +def _elliptical_psf_stamps(shears, sigma=3.0, stamp=51, scale=1.0): + """Draw elliptical Gaussian PSF stamps with given e-type distortions. + + Each ``(e1, e2)`` is applied as a galsim *distortion* (``shear(e1=, e2=)``) + so the stamp is genuinely non-round; round stamps make ``e == g == 0`` and + the e/g grammar fix becomes invisible. + """ + return np.array( + [ + galsim.Gaussian(sigma=sigma) + .shear(e1=e1, e2=e2) + .drawImage(nx=stamp, ny=stamp, scale=scale, method="no_pixel") + .array + for e1, e2 in shears + ] + ) + + +@pytest.fixture(scope="module") +def psfex_interpolator(tmp_path_factory): + """A shape-computing interpolator backed by a minimal galaxy catalogue. + + Module-scoped so it can be driven by the hypothesis property test without + tripping the function-scoped-fixture health check; ``_get_psfshapes`` only + reads ``interp_PSFs`` and overwrites ``psf_shapes``, so reuse is safe. + """ + cat_path = tmp_path_factory.mktemp("psfex") / "galaxies.fits" + _write_galaxy_catalogue(cat_path, X_IMAGE=[1.5, 2.5], Y_IMAGE=[3.5, 4.5]) + return PSFExInterpolator( + dotpsf_path=None, + galcat_path=str(cat_path), + output_path=str(cat_path.parent), + img_number="001", + w_log=SimpleNamespace(info=lambda message: None), + pos_params=["X_IMAGE", "Y_IMAGE"], + get_shapes=True, + ) + + def test_get_galaxy_positions_reads_requested_columns(tmp_path): cat_path = tmp_path / "galaxies.fits" @@ -66,3 +109,100 @@ def test_get_galaxy_positions_raises_for_missing_column(tmp_path): with pytest.raises(KeyError, match="Y_IMAGE"): interp._get_galaxy_positions() + + +# Distortion magnitude well inside |e| < 1, large enough that the e-type and +# g-type representations of the same shape differ by far more than fit noise +# (so the property has teeth), any orientation. +_e_complex = st.complex_numbers( + min_magnitude=0.05, max_magnitude=0.6, allow_nan=False, allow_infinity=False +) + + +@settings(deadline=None, max_examples=25) +@given(e=_e_complex) +def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): + """The stored HSM PSF shapes are e-type distortion, not g-type shear. + + ``_get_psfshapes`` reads ``moms.observed_shape.e1/.e2``; this pins it to the + galsim *distortion* accessor and away from the reduced-shear ``.g1/.g2``. + The check is exact against the same image's ``observed_shape`` (independent + of how well HSM recovers the input), and the magnitude inequality is the + tooth: for any non-round shape ``|e| > |g|`` strictly, so storing ``.g`` + instead (the pre-fix bug) would fail it. + """ + psfex_interpolator.interp_PSFs = _elliptical_psf_stamps([(e.real, e.imag)]) + psfex_interpolator._get_psfshapes() + + moms = galsim.hsm.FindAdaptiveMom( + galsim.Image(psfex_interpolator.interp_PSFs[0]), strict=False + ) + e1, e2 = moms.observed_shape.e1, moms.observed_shape.e2 + g1, g2 = moms.observed_shape.g1, moms.observed_shape.g2 + + mag_e = np.hypot(e1, e2) + mag_g = np.hypot(g1, g2) + assume(mag_e - mag_g > 1e-3) # ensure a non-degenerate e-vs-g distinction + + # Stored values are exactly the g-type distortion components ... + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], g1) + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], g2) + + # ... whose magnitude strictly subceeds the e-type shear. + mag_stored = np.hypot( + psfex_interpolator.psf_shapes[0, 0], psfex_interpolator.psf_shapes[0, 1] + ) + npt.assert_allclose(mag_stored, mag_g) + assert mag_e > mag_stored + 1e-4 + + +def test_write_output_uses_hsm_grammar_and_routes_size_to_T( + tmp_path, monkeypatch +): + """``_write_output`` writes the HSM grammar names and stores T, not sigma. + + Captures the dict handed to ``save_as_fits`` and asserts the frozen + grammar (``HSM_G1_PSF`` / ``HSM_G2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, + no legacy ``*_PSF_HSM``) and that the size column is routed through + ``cs_util.size.sigma_to_T`` (``T = 2 sigma^2``) at the producer. + """ + cat_path = tmp_path / "galaxies.fits" + _write_galaxy_catalogue(cat_path, X_IMAGE=[1.5, 2.5], Y_IMAGE=[3.5, 4.5]) + interp = PSFExInterpolator( + dotpsf_path=None, + galcat_path=str(cat_path), + output_path=str(tmp_path), + img_number="001", + w_log=SimpleNamespace(info=lambda message: None), + pos_params=["X_IMAGE", "Y_IMAGE"], + get_shapes=True, + ) + interp.interp_PSFs = _elliptical_psf_stamps([(0.3, 0.1), (-0.2, 0.25)]) + interp._get_psfshapes() + + captured = {} + + def _capture(self, data=None, **kwargs): + captured["data"] = data + + monkeypatch.setattr(file_io.FITSCatalogue, "save_as_fits", _capture) + + interp._write_output() + data = captured["data"] + + assert { + "VIGNET", + "HSM_G1_PSF", + "HSM_G2_PSF", + "HSM_T_PSF", + "HSM_FLAG_PSF", + } <= set(data) + for legacy in ("E1_PSF_HSM", "E2_PSF_HSM", "SIGMA_PSF_HSM", "FLAG_PSF_HSM"): + assert legacy not in data + + # g-type distortion stored straight through from _get_psfshapes ... + npt.assert_array_equal(data["HSM_G1_PSF"], interp.psf_shapes[:, 0]) + npt.assert_array_equal(data["HSM_G2_PSF"], interp.psf_shapes[:, 1]) + # ... and the size column is T = 2 sigma^2 (sigma_to_T), not raw sigma. + npt.assert_allclose(data["HSM_T_PSF"], 2.0 * interp.psf_shapes[:, 2] ** 2) + assert not np.allclose(data["HSM_T_PSF"], interp.psf_shapes[:, 2]) diff --git a/tests/module/test_utilities.py b/tests/module/test_utilities.py index da098d33f..453455cba 100644 --- a/tests/module/test_utilities.py +++ b/tests/module/test_utilities.py @@ -5,126 +5,3 @@ :Author: Samuel Farrens """ - -from unittest import TestCase - -from hypothesis import given -from hypothesis import strategies as st -import numpy as np -import numpy.testing as npt - -from shapepipe.utilities import galaxy - -positive_floats = st.floats( - min_value=1.0e-100, - max_value=1.0e100, - allow_nan=False, - allow_infinity=False, -) - - -class GalaxyTestCase(TestCase): - - def setUp(self): - - self.sigma_float = 5.5 - self.sigma_array = np.arange(1, 4) * 0.1 - self.sigma_int = 1 - self.sigma_array_int = np.arange(3) - self.pixel_scale = 2.0 - self.pixel_int = 1 - self.sigma_float_exp = 12.9515102 - self.sigma_float_ps_exp = 25.9030204 - self.sigma_array_exp = np.array([0.235482, 0.47096401, 0.70644601]) - - def tearDown(self): - - self.sigma_float = None - self.sigma_array = None - self.sigma_int = None - self.sigma_array_int = None - self.pixel_scale = None - self.pixel_int = None - self.sigma_float_exp = None - self.sigma_float_ps_exp = None - self.sigma_array_exp = None - - def test_sigma_to_fwhm(self): - - npt.assert_almost_equal( - galaxy.sigma_to_fwhm(self.sigma_float), - self.sigma_float_exp, - err_msg="sigma_to_fwhm gave invalid result for float input", - ) - - npt.assert_almost_equal( - galaxy.sigma_to_fwhm(self.sigma_float, self.pixel_scale), - self.sigma_float_ps_exp, - err_msg=( - "sigma_to_fwhm gave invalid result for float input with " - + "non-default pixel scale" - ), - ) - - npt.assert_allclose( - galaxy.sigma_to_fwhm(self.sigma_array), - self.sigma_array_exp, - err_msg="sigma_to_fwhm gave invalid result for array input", - ) - - npt.assert_raises(TypeError, galaxy.sigma_to_fwhm, self.sigma_int) - - npt.assert_raises( - TypeError, - galaxy.sigma_to_fwhm, - self.sigma_array_int, - ) - - npt.assert_raises( - TypeError, - galaxy.sigma_to_fwhm, - self.sigma_float, - self.pixel_int, - ) - - npt.assert_raises(ValueError, galaxy.sigma_to_fwhm, -self.sigma_float) - - npt.assert_raises( - ValueError, - galaxy.sigma_to_fwhm, - np.negative(self.sigma_array), - ) - - npt.assert_raises( - ValueError, - galaxy.sigma_to_fwhm, - self.sigma_float, - -self.pixel_scale, - ) - - -@given(positive_floats, positive_floats) -def test_sigma_to_fwhm_is_linear_in_sigma_and_pixel_scale(sigma, pixel_scale): - - fwhm = galaxy.sigma_to_fwhm(sigma, pixel_scale) - - assert fwhm > 0.0 - npt.assert_allclose( - galaxy.sigma_to_fwhm(sigma) * pixel_scale, - fwhm, - rtol=1.0e-12, - ) - - -@given( - st.lists(positive_floats, min_size=1, max_size=20), - positive_floats, -) -def test_sigma_to_fwhm_preserves_array_shape_and_scale(sigmas, pixel_scale): - - sigma_array = np.array(sigmas, dtype=np.float64) - fwhm = galaxy.sigma_to_fwhm(sigma_array, pixel_scale) - - assert fwhm.shape == sigma_array.shape - assert np.all(fwhm > 0.0) - npt.assert_allclose(fwhm / sigma_array, fwhm[0] / sigma_array[0]) diff --git a/tests/science/test_mbias.py b/tests/science/test_mbias.py index 347bb5be0..d7a0ebec8 100644 --- a/tests/science/test_mbias.py +++ b/tests/science/test_mbias.py @@ -56,7 +56,7 @@ def _recover_g1_with_response(seed=42): stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( gals, psfs, weights, flags, jacobs, ) - res, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) + res, _, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) g1_noshear = res["noshear"]["g"][0] R11 = (res["1p"]["g"][0] - res["1m"]["g"][0]) / (2 * METACAL_STEP) diff --git a/uv.lock b/uv.lock index 7360bb6c2..f5fe9c705 100644 --- a/uv.lock +++ b/uv.lock @@ -654,8 +654,8 @@ wheels = [ [[package]] name = "cs-util" -version = "0.2" -source = { registry = "https://pypi.org/simple" } +version = "0.2.1" +source = { git = "https://github.com/CosmoStat/cs_util?branch=develop#ff8fa2b2168737353ac867cc21a8e95be082f17b" } dependencies = [ { name = "astropy", marker = "sys_platform == 'linux'" }, { name = "camb", marker = "sys_platform == 'linux'" }, @@ -670,10 +670,6 @@ dependencies = [ { name = "swig", marker = "sys_platform == 'linux'" }, { name = "vos", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/1d/068d7522652fed361d80502c958f37080a9f0cfee5ad201b9dea0197b562/cs_util-0.2.tar.gz", hash = "sha256:4cbe01556fb47e832b5d517af812a6cf4a94856ad134be8c21e74874b4d56de6", size = 24932, upload-time = "2026-04-07T07:59:21.174Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/63/9dcd114438db8b7be74dee98dc3841e43335b1c0cc6aba64ec4bd1de3c01/cs_util-0.2-py3-none-any.whl", hash = "sha256:e329ea5f2a4f3ea9b8f0649f5c9321bc18ddf4ad76bffc8152fb22e3f4168ef4", size = 29723, upload-time = "2026-04-07T07:59:19.764Z" }, -] [[package]] name = "cycler" @@ -3515,7 +3511,7 @@ requires-dist = [ { name = "black", marker = "extra == 'lint'" }, { name = "build", marker = "extra == 'release'" }, { name = "canfar" }, - { name = "cs-util", specifier = ">=0.1.9" }, + { name = "cs-util", git = "https://github.com/CosmoStat/cs_util?branch=develop" }, { name = "fitsio", marker = "extra == 'fitsio'" }, { name = "galsim", specifier = ">=2.8" }, { name = "h5py" },