Skip to content

Commit 6be67e0

Browse files
authored
feat(app/inbound): introduce response duration metrics (#4420)
* feat(app/inbound): introduce response duration metrics this commit introduces a new middleware layer to the inbound proxy. this instruments inbound traffic with Prometheus telemetry that records response body latency, and emits a histogram of response body durations. the buckets are chosen to mimic the request and response buckets emitted by the outbound proxy, with their granularity flipped. in other words, the inbound proxy is more interested in fine-grained request body metrics than response body metrics, while the outbound proxy is more interested in fine-grained response body metrics than request body metrics. * #4418 * #4419 Signed-off-by: katelyn martin <kate@buoyant.io> * docs(app): document latency metrics' histogram buckets this addresses review feedback, and introduces further documentation to the inbound proxy's response latency metrics. this commit also further polishes the outbound proxy's language around these constants, and adds mention to each half of the proxy regarding the other corresponding pair of metrics, to prevent drift in the future. to avoid churn, this commit phrases the inbound proxy's comments and language in terms that expect the forthcoming addition of a request latency layer to the inbound proxy, though that has not yet been implemented in this branch at time of writing. a todo comment is left in place to indicate this. Signed-off-by: katelyn martin <kate@buoyant.io> --------- Signed-off-by: katelyn martin <kate@buoyant.io>
1 parent b6353d8 commit 6be67e0

4 files changed

Lines changed: 157 additions & 15 deletions

File tree

linkerd/app/inbound/src/http/router/metrics.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
11
use crate::InboundMetrics;
22
use linkerd_app_core::svc;
33

4-
pub use self::{count_reqs::*, labels::RouteLabels, req_body::*, rsp_body::*, status::*};
4+
pub use self::{
5+
count_reqs::*, labels::RouteLabels, req_body::*, rsp_body::*, rsp_duration::*, status::*,
6+
};
57

68
mod count_reqs;
79
mod labels;
810
mod req_body;
911
mod rsp_body;
12+
mod rsp_duration;
1013
mod status;
1114

1215
pub(super) fn layer<N>(
1316
InboundMetrics {
1417
request_count,
1518
request_body_data,
1619
response_body_data,
20+
response_duration,
1721
status_codes,
1822
..
1923
}: &InboundMetrics,
@@ -25,6 +29,11 @@ pub(super) fn layer<N>(
2529
count_reqs::NewCountRequests::layer_via(extract)
2630
};
2731

32+
let response_duration = {
33+
let extract = ExtractResponseDurationMetrics(response_duration.clone());
34+
NewResponseDuration::layer_via(extract)
35+
};
36+
2837
let response_body = {
2938
let extract = ExtractResponseBodyDataMetrics::new(response_body_data.clone());
3039
NewRecordResponseBodyData::layer_via(extract)
@@ -41,10 +50,15 @@ pub(super) fn layer<N>(
4150
};
4251

4352
svc::layer::mk(move |inner| {
44-
count.layer(response_body.layer(request_body.layer(status.layer(inner))))
53+
count.layer(
54+
response_duration.layer(response_body.layer(request_body.layer(status.layer(inner)))),
55+
)
4556
})
4657
}
4758

4859
/// An `N`-typed service instrumented with metrics middleware.
49-
type Instrumented<N> =
50-
NewCountRequests<NewRecordResponseBodyData<NewRecordRequestBodyData<NewRecordStatusCode<N>>>>;
60+
type Instrumented<N> = NewCountRequests<
61+
NewResponseDuration<
62+
NewRecordResponseBodyData<NewRecordRequestBodyData<NewRecordStatusCode<N>>>,
63+
>,
64+
>;
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
use super::RouteLabels;
2+
use crate::policy::PermitVariant;
3+
use linkerd_app_core::{
4+
metrics::prom::{self, EncodeLabelSetMut},
5+
svc,
6+
};
7+
use linkerd_http_prom::{
8+
record_response::{self, Params},
9+
stream_label::with::MkWithLabels,
10+
};
11+
12+
pub type NewResponseDuration<N> =
13+
record_response::NewResponseDuration<MkLabelDuration, ExtractResponseDurationMetrics, N>;
14+
15+
pub type ResponseDurationParams =
16+
Params<MkLabelDuration, record_response::ResponseMetrics<ResponseDurationLabels>>;
17+
18+
#[derive(Clone, Debug)]
19+
pub struct ExtractResponseDurationMetrics(pub ResponseDurationFamilies);
20+
21+
#[derive(Clone, Debug)]
22+
pub struct ResponseDurationFamilies {
23+
grpc: record_response::ResponseMetrics<ResponseDurationLabels>,
24+
http: record_response::ResponseMetrics<ResponseDurationLabels>,
25+
}
26+
27+
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
28+
pub struct ResponseDurationLabels {
29+
route: RouteLabels,
30+
}
31+
32+
pub type MkLabelDuration = MkWithLabels<ResponseDurationLabels>;
33+
34+
// === impl ResponseDurationFamilies ===
35+
36+
impl ResponseDurationFamilies {
37+
/// Registers a new [`ResponseDurationFamilies`] with the given registry.
38+
pub fn register(
39+
reg: &mut prom::Registry,
40+
histo: impl Clone + IntoIterator<Item = f64>,
41+
) -> Self {
42+
let grpc = {
43+
let reg = reg.sub_registry_with_prefix("grpc");
44+
record_response::ResponseMetrics::register(reg, histo.clone())
45+
};
46+
47+
let http = {
48+
let reg = reg.sub_registry_with_prefix("http");
49+
record_response::ResponseMetrics::register(reg, histo)
50+
};
51+
52+
Self { grpc, http }
53+
}
54+
}
55+
56+
// === impl ExtractResponseDurationMetrics ===
57+
58+
impl<T> svc::ExtractParam<ResponseDurationParams, T> for ExtractResponseDurationMetrics
59+
where
60+
T: svc::Param<PermitVariant> + svc::Param<RouteLabels>,
61+
{
62+
fn extract_param(&self, target: &T) -> ResponseDurationParams {
63+
let Self(families) = self;
64+
65+
let labeler = {
66+
let route: RouteLabels = target.param();
67+
let labels = ResponseDurationLabels { route };
68+
MkLabelDuration::new(labels)
69+
};
70+
71+
let metric = {
72+
let variant: PermitVariant = target.param();
73+
let ResponseDurationFamilies { grpc, http } = families;
74+
match variant {
75+
PermitVariant::Grpc => grpc,
76+
PermitVariant::Http => http,
77+
}
78+
.clone()
79+
};
80+
81+
ResponseDurationParams { labeler, metric }
82+
}
83+
}
84+
85+
// === impl ResponseDurationLabels ===
86+
87+
impl prom::EncodeLabelSetMut for ResponseDurationLabels {
88+
fn encode_label_set(
89+
&self,
90+
encoder: &mut prom::encoding::LabelSetEncoder<'_>,
91+
) -> std::fmt::Result {
92+
let Self { route } = self;
93+
route.encode_label_set(encoder)?;
94+
Ok(())
95+
}
96+
}
97+
98+
impl prom::encoding::EncodeLabelSet for ResponseDurationLabels {
99+
fn encode(&self, mut encoder: prom::encoding::LabelSetEncoder<'_>) -> std::fmt::Result {
100+
self.encode_label_set(&mut encoder)
101+
}
102+
}

linkerd/app/inbound/src/metrics.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ pub(crate) mod authz;
1212
pub(crate) mod error;
1313

1414
use crate::http::router::{
15-
RequestBodyFamilies, RequestCountFamilies, ResponseBodyFamilies, StatusCodeFamilies,
15+
RequestBodyFamilies, RequestCountFamilies, ResponseBodyFamilies, ResponseDurationFamilies,
16+
StatusCodeFamilies,
1617
};
1718
pub use linkerd_app_core::metrics::*;
1819

@@ -34,6 +35,7 @@ pub struct InboundMetrics {
3435
pub request_count: RequestCountFamilies,
3536
pub request_body_data: RequestBodyFamilies,
3637
pub response_body_data: ResponseBodyFamilies,
38+
pub response_duration: ResponseDurationFamilies,
3739
pub status_codes: StatusCodeFamilies,
3840
}
3941

@@ -47,6 +49,8 @@ impl InboundMetrics {
4749
let request_count = RequestCountFamilies::register(reg);
4850
let request_body_data = RequestBodyFamilies::register(reg);
4951
let response_body_data = ResponseBodyFamilies::register(reg);
52+
let response_duration =
53+
ResponseDurationFamilies::register(reg, Self::RESPONSE_BUCKETS.iter().copied());
5054
let status_codes = StatusCodeFamilies::register(reg);
5155

5256
Self {
@@ -60,9 +64,24 @@ impl InboundMetrics {
6064
request_count,
6165
request_body_data,
6266
response_body_data,
67+
response_duration,
6368
status_codes,
6469
}
6570
}
71+
72+
// There are two histograms for which we need to register metrics:
73+
// (1) request durations, which are measured on routes. TODO(kate): forthcoming.
74+
// (2) response durations, which are measured on route-backends.
75+
//
76+
// Should these change in the future, be sure to consider the outbound proxy's corresponding
77+
// constants measuring request and response latency for *outgoing* traffic.
78+
79+
/// Histogram buckets for response latency.
80+
///
81+
/// These buckets for this histogram are coarse, eliding several buckets for short response
82+
/// durations to be conservative about the costs of tracking two histograms' respective time
83+
/// series.
84+
const RESPONSE_BUCKETS: &'static [f64] = &[0.05, 0.5, 1.0, 10.0];
6685
}
6786

6887
impl legacy::FmtMetrics for InboundMetrics {

linkerd/app/outbound/src/http/logical/policy/route/metrics.rs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,18 +142,25 @@ where
142142
B::DurationLabels: LabelSet,
143143
B::StatusLabels: LabelSet,
144144
{
145-
// There are two histograms for which we need to register metrics: request
146-
// durations, measured on routes, and response durations, measured on
147-
// route-backends.
145+
// There are two histograms for which we need to register metrics:
146+
// (1) request durations, which are measured on routes.
147+
// (2) response durations, which are measured on route-backends.
148148
//
149-
// Response duration is probably the more meaninful metric
150-
// operationally--and it includes more backend metadata--so we opt to
151-
// preserve higher fidelity for response durations (especially for lower
152-
// values).
153-
//
154-
// We elide several buckets for request durations to be conservative about
155-
// the costs of tracking these two largely overlapping histograms
149+
// Should these change in the future, be sure to consider the inbound proxy's corresponding
150+
// constants measuring request and response latency for *incoming* traffic.
151+
152+
/// Histogram buckets for request latency.
153+
///
154+
/// These buckets for this histogram are coarser than those of [`Self::RESPONSE_BUCKETS`],
155+
/// eliding several buckets for short request durations to be conservative about the costs of
156+
/// tracking these two histograms' respective time series.
156157
const REQUEST_BUCKETS: &'static [f64] = &[0.05, 0.5, 1.0, 10.0];
158+
159+
/// Histogram buckets for response latency.
160+
///
161+
/// Because response duration is the more meaningful metric operationally for the outbound
162+
/// proxy, and because backend metrics includes additional metadata, we opt to preserve higher
163+
/// fidelity for response durations (especially for lower values).
157164
const RESPONSE_BUCKETS: &'static [f64] = &[0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 10.0];
158165
}
159166

0 commit comments

Comments
 (0)