Skip to content

Commit 6edd85a

Browse files
mjruhlgregkh
authored andcommitted
IB/hfi1: Fix destroy_qp hang after a link down
commit b4a4957 upstream. rvt_destroy_qp() cannot complete until all in process packets have been released from the underlying hardware. If a link down event occurs, an application can hang with a kernel stack similar to: cat /proc/<app PID>/stack quiesce_qp+0x178/0x250 [hfi1] rvt_reset_qp+0x23d/0x400 [rdmavt] rvt_destroy_qp+0x69/0x210 [rdmavt] ib_destroy_qp+0xba/0x1c0 [ib_core] nvme_rdma_destroy_queue_ib+0x46/0x80 [nvme_rdma] nvme_rdma_free_queue+0x3c/0xd0 [nvme_rdma] nvme_rdma_destroy_io_queues+0x88/0xd0 [nvme_rdma] nvme_rdma_error_recovery_work+0x52/0xf0 [nvme_rdma] process_one_work+0x17a/0x440 worker_thread+0x126/0x3c0 kthread+0xcf/0xe0 ret_from_fork+0x58/0x90 0xffffffffffffffff quiesce_qp() waits until all outstanding packets have been freed. This wait should be momentary. During a link down event, the cleanup handling does not ensure that all packets caught by the link down are flushed properly. This is caused by the fact that the freeze path and the link down event is handled the same. This is not correct. The freeze path waits until the HFI is unfrozen and then restarts PIO. A link down is not a freeze event. The link down path cannot restart the PIO until link is restored. If the PIO path is restarted before the link comes up, the application (QP) using the PIO path will hang (until link is restored). Fix by separating the linkdown path from the freeze path and use the link down path for link down events. Close a race condition sc_disable() by acquiring both the progress and release locks. Close a race condition in sc_stop() by moving the setting of the flag bits under the alloc lock. Cc: <stable@vger.kernel.org> # 4.9.x+ Fixes: 7724105 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 4d5af83 commit 6edd85a

3 files changed

Lines changed: 42 additions & 9 deletions

File tree

drivers/infiniband/hw/hfi1/chip.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6722,6 +6722,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
67226722
struct hfi1_devdata *dd = ppd->dd;
67236723
struct send_context *sc;
67246724
int i;
6725+
int sc_flags;
67256726

67266727
if (flags & FREEZE_SELF)
67276728
write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
@@ -6732,11 +6733,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
67326733
/* notify all SDMA engines that they are going into a freeze */
67336734
sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
67346735

6736+
sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ?
6737+
SCF_LINK_DOWN : 0);
67356738
/* do halt pre-handling on all enabled send contexts */
67366739
for (i = 0; i < dd->num_send_contexts; i++) {
67376740
sc = dd->send_contexts[i].sc;
67386741
if (sc && (sc->flags & SCF_ENABLED))
6739-
sc_stop(sc, SCF_FROZEN | SCF_HALTED);
6742+
sc_stop(sc, sc_flags);
67406743
}
67416744

67426745
/* Send context are frozen. Notify user space */
@@ -10646,6 +10649,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
1064610649
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
1064710650

1064810651
handle_linkup_change(dd, 1);
10652+
pio_kernel_linkup(dd);
10653+
1064910654
ppd->host_link_state = HLS_UP_INIT;
1065010655
break;
1065110656
case HLS_UP_ARMED:

drivers/infiniband/hw/hfi1/pio.c

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -942,20 +942,18 @@ void sc_free(struct send_context *sc)
942942
void sc_disable(struct send_context *sc)
943943
{
944944
u64 reg;
945-
unsigned long flags;
946945
struct pio_buf *pbuf;
947946

948947
if (!sc)
949948
return;
950949

951950
/* do all steps, even if already disabled */
952-
spin_lock_irqsave(&sc->alloc_lock, flags);
951+
spin_lock_irq(&sc->alloc_lock);
953952
reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
954953
reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
955954
sc->flags &= ~SCF_ENABLED;
956955
sc_wait_for_packet_egress(sc, 1);
957956
write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
958-
spin_unlock_irqrestore(&sc->alloc_lock, flags);
959957

960958
/*
961959
* Flush any waiters. Once the context is disabled,
@@ -965,7 +963,7 @@ void sc_disable(struct send_context *sc)
965963
* proceed with the flush.
966964
*/
967965
udelay(1);
968-
spin_lock_irqsave(&sc->release_lock, flags);
966+
spin_lock(&sc->release_lock);
969967
if (sc->sr) { /* this context has a shadow ring */
970968
while (sc->sr_tail != sc->sr_head) {
971969
pbuf = &sc->sr[sc->sr_tail].pbuf;
@@ -976,7 +974,8 @@ void sc_disable(struct send_context *sc)
976974
sc->sr_tail = 0;
977975
}
978976
}
979-
spin_unlock_irqrestore(&sc->release_lock, flags);
977+
spin_unlock(&sc->release_lock);
978+
spin_unlock_irq(&sc->alloc_lock);
980979
}
981980

982981
/* return SendEgressCtxtStatus.PacketOccupancy */
@@ -1199,11 +1198,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd)
11991198
sc = dd->send_contexts[i].sc;
12001199
if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
12011200
continue;
1201+
if (sc->flags & SCF_LINK_DOWN)
1202+
continue;
12021203

12031204
sc_enable(sc); /* will clear the sc frozen flag */
12041205
}
12051206
}
12061207

1208+
/**
1209+
* pio_kernel_linkup() - Re-enable send contexts after linkup event
1210+
* @dd: valid devive data
1211+
*
1212+
* When the link goes down, the freeze path is taken. However, a link down
1213+
* event is different from a freeze because if the send context is re-enabled
1214+
* whowever is sending data will start sending data again, which will hang
1215+
* any QP that is sending data.
1216+
*
1217+
* The freeze path now looks at the type of event that occurs and takes this
1218+
* path for link down event.
1219+
*/
1220+
void pio_kernel_linkup(struct hfi1_devdata *dd)
1221+
{
1222+
struct send_context *sc;
1223+
int i;
1224+
1225+
for (i = 0; i < dd->num_send_contexts; i++) {
1226+
sc = dd->send_contexts[i].sc;
1227+
if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
1228+
continue;
1229+
1230+
sc_enable(sc); /* will clear the sc link down flag */
1231+
}
1232+
}
1233+
12071234
/*
12081235
* Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
12091236
* Returns:
@@ -1403,11 +1430,10 @@ void sc_stop(struct send_context *sc, int flag)
14031430
{
14041431
unsigned long flags;
14051432

1406-
/* mark the context */
1407-
sc->flags |= flag;
1408-
14091433
/* stop buffer allocations */
14101434
spin_lock_irqsave(&sc->alloc_lock, flags);
1435+
/* mark the context */
1436+
sc->flags |= flag;
14111437
sc->flags &= ~SCF_ENABLED;
14121438
spin_unlock_irqrestore(&sc->alloc_lock, flags);
14131439
wake_up(&sc->halt_wait);

drivers/infiniband/hw/hfi1/pio.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ struct send_context {
145145
#define SCF_IN_FREE 0x02
146146
#define SCF_HALTED 0x04
147147
#define SCF_FROZEN 0x08
148+
#define SCF_LINK_DOWN 0x10
148149

149150
struct send_context_info {
150151
struct send_context *sc; /* allocated working context */
@@ -312,6 +313,7 @@ void set_pio_integrity(struct send_context *sc);
312313
void pio_reset_all(struct hfi1_devdata *dd);
313314
void pio_freeze(struct hfi1_devdata *dd);
314315
void pio_kernel_unfreeze(struct hfi1_devdata *dd);
316+
void pio_kernel_linkup(struct hfi1_devdata *dd);
315317

316318
/* global PIO send control operations */
317319
#define PSC_GLOBAL_ENABLE 0

0 commit comments

Comments
 (0)