Skip to content

Commit 6b2de10

Browse files
committed
Add testthat coverage for trait meta-analysis pipeline functions
Add comprehensive test coverage for the core meta-analysis pipeline functions as preparation for modularity improvements planned in the GSoC 'Refactoring the PEcAn trait meta-analysis workflow' project. New test files: - helper-test-data.R: Shared fixtures - test-check_consistent.R: Unit tests for check_consistent() - test-run.meta.analysis.pft.R: Tests for workflow wrapper - test-meta_analysis_standalone.R: Tests for core analysis function - test-get.parameter.samples.R: Documents invisible(NULL) return problem These tests lock in current behavior before any refactoring begins. No functional changes to existing code.
1 parent fe4a948 commit 6b2de10

5 files changed

Lines changed: 577 additions & 0 deletions

File tree

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# helper-test-data.R
2+
# Shared test fixtures for meta-analysis pipeline tests
3+
#
4+
# These helpers create minimal but realistic test data structures
5+
# that mirror what the actual pipeline produces and consumes.
6+
7+
# Create minimal trait data matching the structure expected by
8+
# meta_analysis_standalone() and run.meta.analysis.pft()
9+
#
10+
# The columns here match the @param documentation in run.meta.analysis.pft.R:
11+
# name, mean, statname, stat, greenhouse, n,
12+
# site_id, specie_id, citation_id, cultivar_id,
13+
# date, time, control
14+
create_test_trait_data <- function(n_obs = 10, trait_mean = 20, trait_sd = 2, seed = 42) {
15+
set.seed(seed)
16+
test_trait <- data.frame(
17+
citation_id = rep(1L, n_obs),
18+
site_id = rep(1:2, length.out = n_obs),
19+
name = rep(paste0("species_", 1:2), length.out = n_obs),
20+
trt_id = rep("control", n_obs),
21+
control = rep(1L, n_obs),
22+
greenhouse = rep(0L, n_obs),
23+
date = rep(1, n_obs),
24+
time = rep(NA, n_obs),
25+
cultivar_id = rep(1L, n_obs),
26+
specie_id = rep(1L, n_obs),
27+
n = rep(5L, n_obs),
28+
mean = rnorm(n_obs, mean = trait_mean, sd = trait_sd),
29+
stat = rep(trait_sd / sqrt(5), n_obs),
30+
statname = rep("SE", n_obs),
31+
treatment_id = seq_len(n_obs),
32+
stringsAsFactors = FALSE
33+
)
34+
return(test_trait)
35+
}
36+
37+
# Create a minimal prior distributions data.frame
38+
# matching the structure returned by PEcAn.DB::query.priors()
39+
#
40+
# Uses normal distribution so that check_consistent() and
41+
# p.point.in.prior() work without exotic distribution functions.
42+
create_test_priors <- function(trait_names = "SLA",
43+
distn = "norm",
44+
parama = 20,
45+
paramb = 5) {
46+
prior.distns <- data.frame(
47+
distn = rep(distn, length(trait_names)),
48+
parama = rep(parama, length(trait_names)),
49+
paramb = rep(paramb, length(trait_names)),
50+
n = rep(NA_real_, length(trait_names)),
51+
stringsAsFactors = FALSE
52+
)
53+
rownames(prior.distns) <- trait_names
54+
return(prior.distns)
55+
}
56+
57+
# Create a minimal pft list matching what run.meta.analysis.pft() expects
58+
#
59+
# This mirrors the structure from settings$pfts[[i]] after get.trait.data.pft()
60+
# has been run, which adds posteriorid to the pft object.
61+
create_test_pft <- function(outdir = file.path(tempdir(), "test-pft"),
62+
pft_name = "temperate.deciduous",
63+
posteriorid = 9999L) {
64+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
65+
list(
66+
name = pft_name,
67+
outdir = outdir,
68+
posteriorid = posteriorid
69+
)
70+
}
71+
72+
# Write trait.data.Rdata and prior.distns.Rdata files to a directory
73+
# so that run.meta.analysis.pft() can load them
74+
#
75+
# This simulates what get.trait.data.pft() produces as output.
76+
setup_trait_files <- function(outdir,
77+
trait_names = "SLA",
78+
trait_mean = 20,
79+
trait_sd = 2,
80+
n_obs = 10,
81+
prior_parama = 20,
82+
prior_paramb = 5) {
83+
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
84+
85+
# Build trait.data as a named list of data.frames (one per trait)
86+
trait.data <- stats::setNames(
87+
lapply(trait_names, function(tn) {
88+
create_test_trait_data(
89+
n_obs = n_obs,
90+
trait_mean = trait_mean,
91+
trait_sd = trait_sd
92+
)
93+
}),
94+
trait_names
95+
)
96+
save(trait.data, file = file.path(outdir, "trait.data.Rdata"))
97+
98+
# Build prior.distns data.frame
99+
prior.distns <- create_test_priors(
100+
trait_names = trait_names,
101+
parama = prior_parama,
102+
paramb = prior_paramb
103+
)
104+
save(prior.distns, file = file.path(outdir, "prior.distns.Rdata"))
105+
106+
invisible(list(trait.data = trait.data, prior.distns = prior.distns))
107+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# test-check_consistent.R
2+
# Tests for check_consistent() — the prior-data consistency checker
3+
#
4+
# check_consistent() is a pure function with no side effects or DB dependency,
5+
# making it ideal for unit testing. It is called twice inside
6+
# meta_analysis_standalone(): once to check data vs prior, and again to check
7+
# posterior vs prior.
8+
9+
10+
11+
test_that("check_consistent returns no_error=TRUE, no_warning=TRUE when point is well within prior", {
12+
# A normal(0, 10) prior — point at the mean should be perfectly consistent
13+
14+
prior <- data.frame(distn = "norm", parama = 0, paramb = 10,
15+
stringsAsFactors = FALSE)
16+
result <- PEcAn.MA:::check_consistent(point = 0, prior = prior)
17+
18+
expect_type(result, "logical")
19+
expect_named(result, c("no_error", "no_warning"))
20+
expect_true(result[["no_error"]])
21+
expect_true(result[["no_warning"]])
22+
})
23+
24+
test_that("check_consistent returns warning (but not error) for moderately extreme points", {
25+
26+
# normal(0, 1): p(2.1) ≈ 0.982 which is > 0.975 (warning threshold)
27+
28+
# but < 0.9995 (error threshold)
29+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
30+
stringsAsFactors = FALSE)
31+
result <- PEcAn.MA:::check_consistent(point = 2.1, prior = prior)
32+
33+
expect_true(result[["no_error"]])
34+
expect_false(result[["no_warning"]])
35+
})
36+
37+
test_that("check_consistent returns error for extremely inconsistent points", {
38+
# normal(0, 1): p(5) ≈ 1.0 which exceeds both thresholds
39+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
40+
stringsAsFactors = FALSE)
41+
result <- PEcAn.MA:::check_consistent(point = 5, prior = prior)
42+
43+
expect_false(result[["no_error"]])
44+
expect_false(result[["no_warning"]])
45+
})
46+
47+
test_that("check_consistent works symmetrically for low-tail extremes", {
48+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
49+
stringsAsFactors = FALSE)
50+
result <- PEcAn.MA:::check_consistent(point = -5, prior = prior)
51+
52+
expect_false(result[["no_error"]])
53+
expect_false(result[["no_warning"]])
54+
})
55+
56+
test_that("check_consistent respects custom p_error and p_warning thresholds", {
57+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
58+
stringsAsFactors = FALSE)
59+
60+
# With very permissive thresholds, even extreme points pass
61+
result <- PEcAn.MA:::check_consistent(
62+
point = 3, prior = prior,
63+
p_error = 1e-10, p_warning = 1e-5
64+
)
65+
expect_true(result[["no_error"]])
66+
expect_true(result[["no_warning"]])
67+
})
68+
69+
test_that("check_consistent validates that p_warning >= p_error", {
70+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1,
71+
stringsAsFactors = FALSE)
72+
expect_error(
73+
PEcAn.MA:::check_consistent(point = 0, prior = prior,
74+
p_error = 0.05, p_warning = 0.01)
75+
)
76+
})
77+
78+
test_that("check_consistent works with non-normal prior distributions", {
79+
# Gamma distribution: dgamma with shape=2, rate=1
80+
# Median of gamma(2,1) ≈ 1.678
81+
prior_gamma <- data.frame(distn = "gamma", parama = 2, paramb = 1,
82+
stringsAsFactors = FALSE)
83+
84+
# Point near the mode should be consistent
85+
result <- PEcAn.MA:::check_consistent(point = 1.5, prior = prior_gamma)
86+
expect_true(result[["no_error"]])
87+
expect_true(result[["no_warning"]])
88+
89+
# Point far in the tail should trigger error
90+
result_extreme <- PEcAn.MA:::check_consistent(point = 50, prior = prior_gamma)
91+
expect_false(result_extreme[["no_error"]])
92+
})
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# test-get.parameter.samples.R
2+
# Documents the invisible(NULL) return value problem in get.parameter.samples()
3+
#
4+
# get.parameter.samples() ends with save() and has no return() statement,
5+
# so it returns invisible(NULL). This means callers cannot programmatically
6+
# access results without loading the saved file.
7+
# The GSoC project "Refactoring the PEcAn trait meta-analysis workflow"
8+
# aims to fix this by returning a named list instead.
9+
10+
11+
12+
test_that("get.parameter.samples returns invisible(NULL) — documenting the problem", {
13+
skip(paste0(
14+
"Requires full PEcAn settings + database connection. ",
15+
"Current function ends with save() and no return(), ",
16+
"so it returns invisible(NULL). ",
17+
"GSoC refactoring will make it return a named list."
18+
))
19+
})
20+
21+
# ---------------------------------------------------------------------------
22+
# p.point.in.prior (helper used throughout the pipeline)
23+
# ---------------------------------------------------------------------------
24+
25+
26+
27+
test_that("p.point.in.prior returns correct quantile for normal distribution", {
28+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
29+
result <- PEcAn.MA:::p.point.in.prior(point = 0, prior = prior)
30+
expect_equal(result, 0.5)
31+
})
32+
33+
test_that("p.point.in.prior returns correct quantile for extreme values", {
34+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
35+
result_low <- PEcAn.MA:::p.point.in.prior(point = -5, prior = prior)
36+
expect_true(result_low < 0.001)
37+
result_high <- PEcAn.MA:::p.point.in.prior(point = 5, prior = prior)
38+
expect_true(result_high > 0.999)
39+
})
40+
41+
test_that("p.point.in.prior works with gamma distribution", {
42+
prior <- data.frame(distn = "gamma", parama = 2, paramb = 1)
43+
result <- PEcAn.MA:::p.point.in.prior(point = 2, prior = prior)
44+
expected <- pgamma(2, shape = 2, rate = 1)
45+
expect_equal(result, expected)
46+
})
47+
48+
test_that("p.point.in.prior returns numeric of length 1", {
49+
prior <- data.frame(distn = "norm", parama = 0, paramb = 1)
50+
result <- PEcAn.MA:::p.point.in.prior(point = 1.5, prior = prior)
51+
expect_type(result, "double")
52+
expect_length(result, 1)
53+
expect_true(result >= 0 && result <= 1)
54+
})
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# test-meta_analysis_standalone.R
2+
# Tests for meta_analysis_standalone() — the pure analysis function
3+
#
4+
# meta_analysis_standalone() is the core analysis function that:
5+
# 1. Calls jagify() to prepare data for JAGS
6+
# 2. Checks data-prior consistency via check_consistent()
7+
# 3. Runs pecan.ma() (JAGS MCMC)
8+
# 4. Summarizes results via pecan.ma.summary()
9+
# 5. Fits approximate posteriors via approx.posterior()
10+
# 6. Returns list(trait.mcmc, post.distns, jagged.data)
11+
#
12+
# NOTE: meta_analysis_standalone() does not exist yet in the package.
13+
# It will be created as part of GSoC refactoring of meta.analysis.R.
14+
# These tests document the expected behavior and return structure
15+
# so that when the function is implemented, it has a ready safety net.
16+
17+
# ---------------------------------------------------------------------------
18+
# Greenhouse data filtering
19+
# ---------------------------------------------------------------------------
20+
21+
test_that("meta_analysis_standalone filters greenhouse data when use_ghs=FALSE", {
22+
skip(paste0(
23+
"meta_analysis_standalone() does not exist yet — ",
24+
"it will be created as part of GSoC refactoring of meta.analysis.R. ",
25+
"This test documents the expected behavior when use_ghs=FALSE filters ",
26+
"all data: the function should handle empty trait data gracefully."
27+
))
28+
})
29+
30+
# ---------------------------------------------------------------------------
31+
# Full integration test (requires JAGS)
32+
# ---------------------------------------------------------------------------
33+
34+
test_that("meta_analysis_standalone returns correct structure with valid inputs", {
35+
skip(paste0(
36+
"meta_analysis_standalone() does not exist yet — ",
37+
"it will be created as part of GSoC refactoring of meta.analysis.R. ",
38+
"This test documents the expected return structure: ",
39+
"list(trait.mcmc, post.distns, jagged.data)."
40+
))
41+
})

0 commit comments

Comments
 (0)