Skip to content

Commit 7db82a5

Browse files
Eran Ben Elishagregkh
authored andcommitted
net/mlx5: Fix a race when moving command interface to events mode
commit d43b700 upstream. After driver creates (via FW command) an EQ for commands, the driver will be informed on new commands completion by EQE. However, due to a race in driver's internal command mode metadata update, some new commands will still be miss-handled by driver as if we are in polling mode. Such commands can get two non forced completion, leading to already freed command entry access. CREATE_EQ command, that maps EQ to the command queue must be posted to the command queue while it is empty and no other command should be posted. Add SW mechanism that once the CREATE_EQ command is about to be executed, all other commands will return error without being sent to the FW. Allow sending other commands only after successfully changing the driver's internal command mode metadata. We can safely return error to all other commands while creating the command EQ, as all other commands might be sent from the user/application during driver load. Application can rerun them later after driver's load was finished. Fixes: e126ba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com> Signed-off-by: Moshe Shemesh <moshe@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Cc: Timo Rothenpieler <timo@rothenpieler.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 3fa9daa commit 7db82a5

3 files changed

Lines changed: 40 additions & 4 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
875875
static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
876876
struct mlx5_cmd_msg *msg);
877877

878+
static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
879+
{
880+
if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
881+
return true;
882+
883+
return cmd->allowed_opcode == opcode;
884+
}
885+
878886
static void cmd_work_handler(struct work_struct *work)
879887
{
880888
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -941,7 +949,8 @@ static void cmd_work_handler(struct work_struct *work)
941949

942950
/* Skip sending command to fw if internal error */
943951
if (pci_channel_offline(dev->pdev) ||
944-
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
952+
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
953+
!opcode_allowed(&dev->cmd, ent->op)) {
945954
u8 status = 0;
946955
u32 drv_synd;
947956

@@ -1459,6 +1468,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
14591468
mlx5_cmdif_debugfs_init(dev);
14601469
}
14611470

1471+
void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
1472+
{
1473+
struct mlx5_cmd *cmd = &dev->cmd;
1474+
int i;
1475+
1476+
for (i = 0; i < cmd->max_reg_cmds; i++)
1477+
down(&cmd->sem);
1478+
down(&cmd->pages_sem);
1479+
1480+
cmd->allowed_opcode = opcode;
1481+
1482+
up(&cmd->pages_sem);
1483+
for (i = 0; i < cmd->max_reg_cmds; i++)
1484+
up(&cmd->sem);
1485+
}
1486+
14621487
static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
14631488
{
14641489
struct mlx5_cmd *cmd = &dev->cmd;
@@ -1751,12 +1776,13 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
17511776
int err;
17521777
u8 status = 0;
17531778
u32 drv_synd;
1779+
u16 opcode;
17541780
u8 token;
17551781

1782+
opcode = MLX5_GET(mbox_in, in, opcode);
17561783
if (pci_channel_offline(dev->pdev) ||
1757-
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1758-
u16 opcode = MLX5_GET(mbox_in, in, opcode);
1759-
1784+
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
1785+
!opcode_allowed(&dev->cmd, opcode)) {
17601786
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
17611787
MLX5_SET(mbox_out, out, status, status);
17621788
MLX5_SET(mbox_out, out, syndrome, drv_synd);
@@ -2058,6 +2084,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
20582084
mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
20592085

20602086
cmd->mode = CMD_MODE_POLLING;
2087+
cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
20612088

20622089
create_msg_cache(dev);
20632090

drivers/net/ethernet/mellanox/mlx5/core/eq.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,11 +648,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
648648
.nent = MLX5_NUM_CMD_EQE,
649649
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
650650
};
651+
mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
651652
err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
652653
if (err)
653654
goto err1;
654655

655656
mlx5_cmd_use_events(dev);
657+
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
656658

657659
param = (struct mlx5_eq_param) {
658660
.irq_index = 0,
@@ -682,6 +684,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
682684
mlx5_cmd_use_polling(dev);
683685
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
684686
err1:
687+
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
685688
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
686689
return err;
687690
}

include/linux/mlx5/driver.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ struct mlx5_cmd {
299299
struct semaphore sem;
300300
struct semaphore pages_sem;
301301
int mode;
302+
u16 allowed_opcode;
302303
struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
303304
struct dma_pool *pool;
304305
struct mlx5_cmd_debug dbg;
@@ -890,10 +891,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
890891
return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
891892
}
892893

894+
enum {
895+
CMD_ALLOWED_OPCODE_ALL,
896+
};
897+
893898
int mlx5_cmd_init(struct mlx5_core_dev *dev);
894899
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
895900
void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
896901
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
902+
void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
897903

898904
struct mlx5_async_ctx {
899905
struct mlx5_core_dev *dev;

0 commit comments

Comments
 (0)