Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
de6a562
ENH: add parameter finder for degrees or freedom
dschmitz89 Apr 12, 2026
7b015ec
Add early exit if approximation is precise to machine precision
dschmitz89 Apr 12, 2026
a1609a9
Loosen tolerance and add tests in the tails
dschmitz89 Apr 12, 2026
1cf9ea2
Modularize and simplify
dschmitz89 Apr 16, 2026
1132241
Use overflow error instead of inf
dschmitz89 Apr 16, 2026
123fdd1
Relax all test tolerances
dschmitz89 Apr 16, 2026
24c88ba
Should use a linter ..
dschmitz89 Apr 16, 2026
0c8365a
Relax test tolerance for float precision and remove ill conditioned t…
dschmitz89 Apr 16, 2026
1868814
Fix indentation
dschmitz89 Apr 16, 2026
b97dceb
Loosen tolerance for very high degrees of freedom
dschmitz89 Apr 16, 2026
9a26afc
Test if approximation is a good hint, otherwise fall back to 0.01
dschmitz89 Apr 18, 2026
eecf3d0
Add test coverage of all cases for approximation
dschmitz89 Apr 18, 2026
0308d3c
Do not use symmetry for better numerical accuracy in the tails
dschmitz89 Apr 18, 2026
4957f2e
Add edge case tests
dschmitz89 Apr 18, 2026
9c865a3
Exclude float from testing for extreme values
dschmitz89 Apr 19, 2026
80de922
Update test comment
dschmitz89 Apr 19, 2026
2540e5f
Replace non ASCII character..
dschmitz89 Apr 19, 2026
739df4b
Simplify using boost's own relative error function
dschmitz89 Apr 20, 2026
ef2e0d2
Add early exits for df=1 and df=inf
dschmitz89 May 2, 2026
a16d4bd
Try to fix CI carnage
dschmitz89 May 2, 2026
04b0577
Simplify analytical cases and raise overflow error for normal case
dschmitz89 May 14, 2026
8e9c3e3
Rename to find_degrees_of_freedom
dschmitz89 May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions doc/distributions/students_t.qbk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
RealType beta,
RealType sd,
RealType hint = 100);

// degrees of freedom inversion from a quantile and probability:
BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
RealType t,
RealType p);
};

}} // namespaces
Expand Down Expand Up @@ -106,6 +111,13 @@ For more information on this function see the
[@http://www.itl.nist.gov/div898/handbook/prc/section2/prc222.htm
NIST Engineering Statistics Handbook].

BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
RealType t,
RealType p);

Returns the degrees of freedom [nu] such that CDF(/x/; [nu]) = /p/.
Requires 0 < /p/ < 1 and /x/ [ne] 0, otherwise calls __domain_error.

[h4 Non-member Accessors]

All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
Expand Down Expand Up @@ -164,6 +176,7 @@ without the subtraction implied above.]]
[[skewness][if (v > 3) 0 else NaN ]]
[[kurtosis][if (v > 4) 3 * (v - 2) \/ (v - 4) else NaN]]
[[kurtosis excess][if (v > 4) 6 \/ (df - 4) else NaN]]
[[find_degrees_of_freedom(t, p)][Uses a 2nd-order Edgeworth expansion as initial guess for a numerical root finder]]
]

If the moment index /k/ is less than /v/, then the moment is undefined.
Expand Down
168 changes: 159 additions & 9 deletions include/boost/math/distributions/students_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
#include <boost/math/distributions/fwd.hpp>
#include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x).
#include <boost/math/special_functions/digamma.hpp>
#include <boost/math/special_functions/relative_difference.hpp>
#include <boost/math/distributions/complement.hpp>
#include <boost/math/distributions/detail/common_error_handling.hpp>
#include <boost/math/distributions/normal.hpp>
#include <boost/math/distributions/normal.hpp>
#include <boost/math/distributions/cauchy.hpp>
#include <boost/math/policies/policy.hpp>

#ifdef _MSC_VER
Expand Down Expand Up @@ -58,6 +60,10 @@ class students_t_distribution
RealType sd,
RealType hint = 100);

BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
RealType t,
RealType p);

private:
// Data member:
RealType df_; // degrees of freedom is a real number > 0 or +infinity.
Expand Down Expand Up @@ -281,6 +287,18 @@ BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<student
// Parameter estimation follows:
//
namespace detail{

template <class Distribution, class RealType>
BOOST_MATH_GPU_ENABLED inline bool analytical_df_if_cdf_matches(const Distribution& dist, RealType x, RealType p)
{
RealType cdf_val = cdf(dist, x);
return epsilon_difference(cdf_val, p) < 4;
}

// Minimum degrees-of-freedom used as the warm-start fallback when the
// Edgeworth approximation yields no valid positive root or is inaccurate
constexpr double df_hint_fallback = 0.01;

//
// Functors for finding degrees of freedom:
//
Expand Down Expand Up @@ -308,6 +326,93 @@ struct sample_size_func
RealType alpha, beta, ratio;
};

template <class RealType, class Policy>
struct cdf_to_df_func
{
BOOST_MATH_GPU_ENABLED cdf_to_df_func(RealType x_val, RealType p_val, bool c)
: x(x_val), p(p_val), comp(c) {}

BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df)
{
students_t_distribution<RealType, Policy> t(df);
return comp ?
RealType(p - cdf(complement(t, x)))
: RealType(cdf(t, x) - p);
}
RealType x, p;
bool comp;
};

//
// Shared root-finding helper used by find_degrees_of_freedom.
//
template <class RealType, class Policy, class Func>
BOOST_MATH_GPU_ENABLED RealType solve_for_degrees_of_freedom(
Func f,
RealType hint,
bool rising,
const char* function,
Policy const&)
{
tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(
f, hint, RealType(2), rising, tol, max_iter, Policy());
RealType result = r.first + (r.second - r.first) / 2;
if (max_iter >= policies::get_max_root_iterations<Policy>())
{
return policies::raise_evaluation_error<RealType>(function, // LCOV_EXCL_LINE
"Unable to locate solution in a reasonable time: either there is no answer to " // LCOV_EXCL_LINE
"the degrees of freedom or the answer is infinite. Current best guess is %1%", // LCOV_EXCL_LINE
result, Policy()); // LCOV_EXCL_LINE
}
return result;
}

//
// Edgeworth warm-start for find_degrees_of_freedom(t, p).
// On success writes a df estimate into 'result' and returns true.
// Returns false when no positive root is found; 'result' is left unchanged.
//
template <class RealType, class Policy>
BOOST_MATH_GPU_ENABLED bool approximate_df_with_edgeworth_expansion(RealType x, RealType p, RealType& result)
{
BOOST_MATH_STD_USING
// F(x; nu) ~ cdf_normal(x) - pdf_normal(x)*(x + x^3)/(4*nu) + pdf_normal(x)*(3x + 5x^3 + 7x^5 - 3x^7)/(96*nu^2)
// Substituting u = 1/nu and setting F(x; nu) = p gives b*u^2 - a*u + c = 0, where:
// a = pdf_normal(x) * (x + x^3) / 4
// b = pdf_normal(x) * (3x + 5x^3 + 7x^5 - 3x^7) / 96
// c = cdf_normal(x) - p
normal_distribution<RealType, Policy> std_normal(0, 1);
RealType pdf_val = pdf(std_normal, x);
RealType cdf_val = cdf(std_normal, x);

RealType x2 = x * x;
RealType x3 = x2 * x;
RealType x5 = x3 * x2;
RealType x7 = x5 * x2;

RealType a = pdf_val * (x + x3) / 4;
RealType b = pdf_val * (3 * x + 5 * x3 + 7 * x5 - 3 * x7) / 96;
RealType c = cdf_val - p;

RealType discriminant = a * a - 4 * b * c;
if (discriminant >= 0 && b != 0)
{
RealType sqrt_disc = sqrt(discriminant);
// The two roots of b*u^2 - a*u + c = 0. Pick the smallest positive u (= largest df = 1/u).
RealType u = (a - sqrt_disc) / (2 * b);
if (u <= 0)
u = (a + sqrt_disc) / (2 * b);
if (u > 0)
{
result = 1 / u;
return true;
}
}
return false;
}

} // namespace detail

template <class RealType, class Policy>
Expand All @@ -332,16 +437,61 @@ BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_
hint = 1;

detail::sample_size_func<RealType, Policy> f(alpha, beta, sd, difference_from_mean);
tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy());
RealType result = r.first + (r.second - r.first) / 2;
if(max_iter >= policies::get_max_root_iterations<Policy>())
return detail::solve_for_degrees_of_freedom(f, hint, false, function, Policy());
}

template <class RealType, class Policy>
BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(
RealType t,
RealType p)
{
BOOST_MATH_STD_USING // for ADL of std functions
constexpr auto function = "boost::math::students_t_distribution<%1%>::find_degrees_of_freedom";
//
// Check for domain errors:
//
RealType error_result;
if (false == detail::check_probability(function, p, &error_result, Policy()))
return error_result;

if (t == 0)
{
return policies::raise_evaluation_error<RealType>(function, "Unable to locate solution in a reasonable time: either there is no answer to how many degrees of freedom are required" // LCOV_EXCL_LINE
" or the answer is infinite. Current best guess is %1%", result, Policy()); // LCOV_EXCL_LINE
// CDF(0; df) = 0.5 for all df; only solvable when p == 0.5.
if (p == static_cast<RealType>(0.5))
return policies::raise_overflow_error<RealType>(function, nullptr, Policy());
return policies::raise_domain_error<RealType>(
function,
"No degrees of freedom can satisfy CDF(0; df) == %1% (must be 0.5).",
p, Policy());
}
return result;

// Analytical cases: df = infinity (normal), df = 1 (cauchy)
boost::math::normal_distribution<RealType, Policy> norm(0, 1);
if (detail::analytical_df_if_cdf_matches(norm, t, p))
return policies::raise_overflow_error<RealType>(function, nullptr, Policy());
boost::math::cauchy_distribution<RealType, Policy> cauchy(0, 1);
if (detail::analytical_df_if_cdf_matches(cauchy, t, p))
return RealType(1);

// Edgeworth warm start: compute a df estimate; fall back to df_hint_fallback if it fails
// or is too inaccurate
RealType hint = static_cast<RealType>(detail::df_hint_fallback);
if (detail::approximate_df_with_edgeworth_expansion<RealType, Policy>(t, p, hint))
{
// Check that approximation is at least somewhat close;
// for small degrees of freedom it does not fail but is very inaccurate.
RealType relative_error_threshold = static_cast<RealType>(0.1);
students_t_distribution<RealType, Policy> t_approx(hint);
RealType p_approx = cdf(t_approx, t);
if (relative_difference(p_approx, p) > relative_error_threshold)
hint = static_cast<RealType>(detail::df_hint_fallback);
}
// Root-find on f(df) = CDF(t; df) - p.
// CDF(t; df) is strictly increasing in df for t > 0, decreasing for t < 0.
// Use the smaller of p and q=1-p to avoid cancellation near 1.
RealType q = 1 - p;
detail::cdf_to_df_func<RealType, Policy> f(t, p < q ? p : q, p < q ? false : true);
return detail::solve_for_degrees_of_freedom(f, hint, t > 0, function, Policy());
}

template <class RealType, class Policy>
Expand Down
144 changes: 144 additions & 0 deletions test/test_students_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,150 @@ void test_spots(RealType)
static_cast<RealType>(1.0))),
9);

// Tests for find_degrees_of_freedom(t, p) overload.
// Each case is derived from the CDF spot tests above: the exact df is known,
// so we verify that inverting CDF(x; df) = p recovers df to tight tolerance.
// float has ~7 significant digits; large-df inversion is ill-conditioned at that precision,
// so use a looser tolerance for single precision.
RealType tol_inv_df = std::is_same<RealType, float>::value
? static_cast<RealType>(0.1) // float: 0.1%
: static_cast<RealType>(0.01); // double+: 0.01%
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(-6.96455673428326),
static_cast<RealType>(0.01)),
static_cast<RealType>(2),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(-3.36492999890721),
static_cast<RealType>(0.01)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(-0.559429644),
static_cast<RealType>(0.3)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(1.475884049),
static_cast<RealType>(0.9)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(-1.475884049),
static_cast<RealType>(0.1)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(-5.2410429995425),
static_cast<RealType>(0.00001)),
static_cast<RealType>(25),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(6.96455673428326),
static_cast<RealType>(0.99)),
static_cast<RealType>(2),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.610822886098362)),
static_cast<RealType>(0.1),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.777242554908434)),
static_cast<RealType>(0.5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.822925875908677)),
static_cast<RealType>(0.75),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.977114826753374)),
static_cast<RealType>(1000),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(1),
static_cast<RealType>(0.8413326478347855)),
static_cast<RealType>(1e4),
tol_inv_df * static_cast<RealType>(5)); // Looser tolerance for ill-conditioned problem with very large df
// Small df edge cases where approximation becomes problematic: would need different values for float
// For now just test double and long double to have test overage of these code paths
if (!std::is_same<RealType, float>::value)
{
// Small df case 1: Edgeworth expansion breaks down for small degrees of freedom, use fallback
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(1e20),
static_cast<RealType>(0.5244796002843015)),
static_cast<RealType>(1e-3),
tol_inv_df);
// Small df case 2: Edgeworth expansion succeeds but is inaccurate, use fallback
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2.0),
static_cast<RealType>(0.500000000644961)),
static_cast<RealType>(1e-10),
tol_inv_df);
}
// Analytical test: df=1 (Cauchy) case
{
boost::math::cauchy_distribution<RealType> cauchy(0, 1);
RealType x = static_cast<RealType>(1.0);
RealType p = cdf(cauchy, x);
RealType df_result = students_t_distribution<RealType>::find_degrees_of_freedom(x, p);
BOOST_CHECK_EQUAL(df_result, static_cast<RealType>(1.0));
}


// Domain error: p outside (0,1)
#ifndef BOOST_NO_EXCEPTIONS
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(-0.1)),
std::domain_error);
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(1.1)),
std::domain_error);
// x == 0, p != 0.5: no df can satisfy CDF(0; df) == p -> domain error
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(0),
static_cast<RealType>(0.3)),
std::domain_error);
// x == 0, p == 0.5: CDF(0; df) == 0.5 for all df -> overflow (infinite solutions)
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::find_degrees_of_freedom(
static_cast<RealType>(0),
static_cast<RealType>(0.5)),
std::overflow_error);
{
// Analytical test: df=infinity (Normal) case returns overflow error
boost::math::normal_distribution<RealType> norm(0, 1);
RealType x = static_cast<RealType>(1.0);
RealType p = cdf(norm, x);
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::find_degrees_of_freedom(x, p),
std::overflow_error);
}
#endif

// Test for large degrees of freedom when should be same as normal.
RealType inf = std::numeric_limits<RealType>::infinity();
RealType nan = std::numeric_limits<RealType>::quiet_NaN();
Expand Down
Loading