Skip to content

Commit ff2c591

Browse files
committed
Merge branch 'mptcp-misc-fixes-for-v7-0-rc2'
Matthieu Baerts says: ==================== mptcp: misc fixes for v7.0-rc2 Here are various unrelated fixes: - Patch 1: avoid bufferbloat in simult_flows selftest which can cause instabilities. A fix for v5.10. - Patches 2-3: reduce RM_ADDR lost by not sending it over the same subflow as the one being removed, if possible. A fix for v5.13. - Patches 4-5: avoid a WARN when using signal + subflow endpoints with a subflow limit of 0, and removing such endpoints during an active connection. A fix for v5.17. ==================== Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-0-4b5462b6f016@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents f43ed0c + 1777f34 commit ff2c591

4 files changed

Lines changed: 108 additions & 16 deletions

File tree

net/mptcp/pm.c

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
212212
spin_lock_bh(&msk->pm.lock);
213213
}
214214

215-
void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
215+
static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow,
216+
const struct mptcp_rm_list *rm_list)
217+
{
218+
u8 i, id = subflow_get_local_id(subflow);
219+
220+
for (i = 0; i < rm_list->nr; i++) {
221+
if (rm_list->ids[i] == id)
222+
return true;
223+
}
224+
225+
return false;
226+
}
227+
228+
static void
229+
mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
230+
const struct mptcp_rm_list *rm_list)
216231
{
217-
struct mptcp_subflow_context *subflow, *alt = NULL;
232+
struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;
218233

219234
msk_owned_by_me(msk);
220235
lockdep_assert_held(&msk->pm.lock);
@@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
224239
return;
225240

226241
mptcp_for_each_subflow(msk, subflow) {
227-
if (__mptcp_subflow_active(subflow)) {
228-
if (!subflow->stale) {
229-
mptcp_pm_send_ack(msk, subflow, false, false);
230-
return;
231-
}
242+
if (!__mptcp_subflow_active(subflow))
243+
continue;
232244

233-
if (!alt)
234-
alt = subflow;
245+
if (unlikely(subflow->stale)) {
246+
if (!stale)
247+
stale = subflow;
248+
} else if (unlikely(rm_list &&
249+
subflow_in_rm_list(subflow, rm_list))) {
250+
if (!same_id)
251+
same_id = subflow;
252+
} else {
253+
goto send_ack;
235254
}
236255
}
237256

238-
if (alt)
239-
mptcp_pm_send_ack(msk, alt, false, false);
257+
if (same_id)
258+
subflow = same_id;
259+
else if (stale)
260+
subflow = stale;
261+
else
262+
return;
263+
264+
send_ack:
265+
mptcp_pm_send_ack(msk, subflow, false, false);
266+
}
267+
268+
void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
269+
{
270+
mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
240271
}
241272

242273
int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
470501
msk->pm.rm_list_tx = *rm_list;
471502
rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
472503
WRITE_ONCE(msk->pm.addr_signal, rm_addr);
473-
mptcp_pm_addr_send_ack(msk);
504+
mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
474505
return 0;
475506
}
476507

net/mptcp/pm_kernel.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
418418
}
419419

420420
exit:
421+
/* If an endpoint has both the signal and subflow flags, but it is not
422+
* possible to create subflows -- the 'while' loop body above never
423+
* executed -- then still mark the endp as used, which is somehow the
424+
* case. This avoids issues later when removing the endpoint and calling
425+
* __mark_subflow_endp_available(), which expects the increment here.
426+
*/
427+
if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id)
428+
msk->pm.local_addr_used++;
429+
421430
mptcp_pm_nl_check_work_pending(msk);
422431
}
423432

tools/testing/selftests/net/mptcp/mptcp_join.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
104104
6 0 0 65535,
105105
6 0 0 0"
106106

107+
# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption
108+
# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c &&
109+
# (ip[66] & 0xf0) == 0x40'"
110+
CBPF_MPTCP_SUBOPTION_RM_ADDR="13,
111+
48 0 0 0,
112+
84 0 0 240,
113+
21 0 9 64,
114+
48 0 0 32,
115+
84 0 0 240,
116+
21 0 6 192,
117+
48 0 0 53,
118+
21 0 4 12,
119+
48 0 0 66,
120+
84 0 0 240,
121+
21 0 1 64,
122+
6 0 0 65535,
123+
6 0 0 0"
124+
107125
init_partial()
108126
{
109127
capout=$(mktemp)
@@ -2608,6 +2626,19 @@ remove_tests()
26082626
chk_rst_nr 0 0
26092627
fi
26102628

2629+
# signal+subflow with limits, remove
2630+
if reset "remove signal+subflow with limits"; then
2631+
pm_nl_set_limits $ns1 0 0
2632+
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow
2633+
pm_nl_set_limits $ns2 0 0
2634+
addr_nr_ns1=-1 speed=slow \
2635+
run_tests $ns1 $ns2 10.0.1.1
2636+
chk_join_nr 0 0 0
2637+
chk_add_nr 1 1
2638+
chk_rm_nr 1 0 invert
2639+
chk_rst_nr 0 0
2640+
fi
2641+
26112642
# addresses remove
26122643
if reset "remove addresses"; then
26132644
pm_nl_set_limits $ns1 3 3
@@ -4217,6 +4248,14 @@ endpoint_tests()
42174248
chk_subflow_nr "after no reject" 3
42184249
chk_mptcp_info subflows 2 subflows 2
42194250

4251+
# To make sure RM_ADDR are sent over a different subflow, but
4252+
# allow the rest to quickly and cleanly close the subflow
4253+
local ipt=1
4254+
ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \
4255+
-p tcp -m tcp --tcp-option 30 \
4256+
-m bpf --bytecode \
4257+
"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
4258+
-j DROP || ipt=0
42204259
local i
42214260
for i in $(seq 3); do
42224261
pm_nl_del_endpoint $ns2 1 10.0.1.2
@@ -4229,6 +4268,7 @@ endpoint_tests()
42294268
chk_subflow_nr "after re-add id 0 ($i)" 3
42304269
chk_mptcp_info subflows 3 subflows 3
42314270
done
4271+
[ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1
42324272

42334273
mptcp_lib_kill_group_wait $tests_pid
42344274

@@ -4288,11 +4328,20 @@ endpoint_tests()
42884328
chk_mptcp_info subflows 2 subflows 2
42894329
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
42904330

4331+
# To make sure RM_ADDR are sent over a different subflow, but
4332+
# allow the rest to quickly and cleanly close the subflow
4333+
local ipt=1
4334+
ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \
4335+
-p tcp -m tcp --tcp-option 30 \
4336+
-m bpf --bytecode \
4337+
"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
4338+
-j DROP || ipt=0
42914339
pm_nl_del_endpoint $ns1 42 10.0.1.1
42924340
sleep 0.5
42934341
chk_subflow_nr "after delete ID 0" 2
42944342
chk_mptcp_info subflows 2 subflows 2
42954343
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
4344+
[ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1
42964345

42974346
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
42984347
wait_mpj 4

tools/testing/selftests/net/mptcp/simult_flows.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,13 @@ run_test()
237237
for dev in ns2eth1 ns2eth2; do
238238
tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
239239
done
240-
tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
241-
tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
242-
tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
243-
tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
240+
241+
# keep the queued pkts number low, or the RTT estimator will see
242+
# increasing latency over time.
243+
tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50
244+
tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50
245+
tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50
246+
tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50
244247

245248
# time is measured in ms, account for transfer size, aggregated link speed
246249
# and header overhead (10%)

0 commit comments

Comments
 (0)