Skip to content

Commit f36f3eb

Browse files
sagigrimberggregkh
authored andcommitted
nvmet-rdma: fix possible bogus dereference under heavy load
[ Upstream commit 8407879 ] Currently we always repost the recv buffer before we send a response capsule back to the host. Since ordering is not guaranteed for send and recv completions, it is posible that we will receive a new request from the host before we got a send completion for the response capsule. Today, we pre-allocate 2x rsps the length of the queue, but in reality, under heavy load there is nothing that is really preventing the gap to expand until we exhaust all our rsps. To fix this, if we don't have any pre-allocated rsps left, we dynamically allocate a rsp and make sure to free it when we are done. If under memory pressure we fail to allocate a rsp, we silently drop the command and wait for the host to retry. Reported-by: Steve Wise <swise@opengridcomputing.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> [hch: dropped a superflous assignment] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent a90a52c commit f36f3eb

1 file changed

Lines changed: 25 additions & 2 deletions

File tree

drivers/nvme/target/rdma.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ struct nvmet_rdma_rsp {
6565

6666
struct nvmet_req req;
6767

68+
bool allocated;
6869
u8 n_rdma;
6970
u32 flags;
7071
u32 invalidate_rkey;
@@ -167,11 +168,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
167168
unsigned long flags;
168169

169170
spin_lock_irqsave(&queue->rsps_lock, flags);
170-
rsp = list_first_entry(&queue->free_rsps,
171+
rsp = list_first_entry_or_null(&queue->free_rsps,
171172
struct nvmet_rdma_rsp, free_list);
172-
list_del(&rsp->free_list);
173+
if (likely(rsp))
174+
list_del(&rsp->free_list);
173175
spin_unlock_irqrestore(&queue->rsps_lock, flags);
174176

177+
if (unlikely(!rsp)) {
178+
rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
179+
if (unlikely(!rsp))
180+
return NULL;
181+
rsp->allocated = true;
182+
}
183+
175184
return rsp;
176185
}
177186

@@ -180,6 +189,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
180189
{
181190
unsigned long flags;
182191

192+
if (rsp->allocated) {
193+
kfree(rsp);
194+
return;
195+
}
196+
183197
spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
184198
list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
185199
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
@@ -756,6 +770,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
756770

757771
cmd->queue = queue;
758772
rsp = nvmet_rdma_get_rsp(queue);
773+
if (unlikely(!rsp)) {
774+
/*
775+
* we get here only under memory pressure,
776+
* silently drop and have the host retry
777+
* as we can't even fail it.
778+
*/
779+
nvmet_rdma_post_recv(queue->dev, cmd);
780+
return;
781+
}
759782
rsp->queue = queue;
760783
rsp->cmd = cmd;
761784
rsp->flags = 0;

0 commit comments

Comments
 (0)