Skip to content

Commit d98995b

Browse files
Jianbo Liurleon
Jianbo Liu
authored andcommitted
net/mlx5: Reimplement write combining test
The test of write combining was added before in mlx5_ib driver. It opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When BlueFlame is used, WQEs get written directly to a PCI BAR of the device (in addition to memory) so that the device handles them without having to access memory. In this test, the WQEs written in memory are different from the ones written to the BlueFlame which request CQE update. By checking the completion reports posted on CQ, we can know if BlueFlame succeeds or not. The write combining must be supported if BlueFlame succeeds as its register is written using write combining. This patch reimplements the test in the same way, but using a pair of SQ and CQ only. It is moved to mlx5_core as a general feature used by both mlx5_core and mlx5_ib. Besides, save write combine test result of the PCI function, so that its thousands of child functions such as SF can query without paying the time and resource penalty by itself. The test function is called only after failing to get the cached result. With this enhancement, all thousands of SFs of the PF attached to same driver no longer need to perform WC check explicitly, which is already done in the system. This saves several commands per SF, thereby speeds up SF creation and also saves completion EQ creation. Signed-off-by: Jianbo Liu <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 83a7eef commit d98995b

File tree

8 files changed

+451
-234
lines changed

8 files changed

+451
-234
lines changed

drivers/infiniband/hw/mlx5/main.c

+3-16
Original file line numberDiff line numberDiff line change
@@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
18101810
}
18111811

18121812
resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
1813-
if (dev->wc_support)
1813+
if (mlx5_wc_support_get(dev->mdev))
18141814
resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
18151815
log_bf_reg_size);
18161816
resp->cache_line_size = cache_line_size();
@@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
23372337
switch (command) {
23382338
case MLX5_IB_MMAP_WC_PAGE:
23392339
case MLX5_IB_MMAP_ALLOC_WC:
2340-
if (!dev->wc_support)
2340+
if (!mlx5_wc_support_get(dev->mdev))
23412341
return -EPERM;
23422342
fallthrough;
23432343
case MLX5_IB_MMAP_NC_PAGE:
@@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
36123612
alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
36133613
return -EOPNOTSUPP;
36143614

3615-
if (!to_mdev(c->ibucontext.device)->wc_support &&
3615+
if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
36163616
alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
36173617
return -EOPNOTSUPP;
36183618

@@ -3766,18 +3766,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
37663766
return err;
37673767
}
37683768

3769-
static int mlx5_ib_enable_driver(struct ib_device *dev)
3770-
{
3771-
struct mlx5_ib_dev *mdev = to_mdev(dev);
3772-
int ret;
3773-
3774-
ret = mlx5_ib_test_wc(mdev);
3775-
mlx5_ib_dbg(mdev, "Write-Combining %s",
3776-
mdev->wc_support ? "supported" : "not supported");
3777-
3778-
return ret;
3779-
}
3780-
37813769
static const struct ib_device_ops mlx5_ib_dev_ops = {
37823770
.owner = THIS_MODULE,
37833771
.driver_id = RDMA_DRIVER_MLX5,
@@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
38083796
.drain_rq = mlx5_ib_drain_rq,
38093797
.drain_sq = mlx5_ib_drain_sq,
38103798
.device_group = &mlx5_attr_group,
3811-
.enable_driver = mlx5_ib_enable_driver,
38123799
.get_dev_fw_str = get_dev_fw_str,
38133800
.get_dma_mr = mlx5_ib_get_dma_mr,
38143801
.get_link_layer = mlx5_ib_port_link_layer,

drivers/infiniband/hw/mlx5/mem.c

-198
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,8 @@
3030
* SOFTWARE.
3131
*/
3232

33-
#include <linux/io.h>
3433
#include <rdma/ib_umem_odp.h>
3534
#include "mlx5_ib.h"
36-
#include <linux/jiffies.h>
3735

3836
/*
3937
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
@@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
9593
return 0;
9694
return page_size;
9795
}
98-
99-
#define WR_ID_BF 0xBF
100-
#define WR_ID_END 0xBAD
101-
#define TEST_WC_NUM_WQES 255
102-
#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
103-
static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
104-
bool signaled)
105-
{
106-
struct mlx5_ib_qp *qp = to_mqp(ibqp);
107-
struct mlx5_wqe_ctrl_seg *ctrl;
108-
struct mlx5_bf *bf = &qp->bf;
109-
__be32 mmio_wqe[16] = {};
110-
unsigned long flags;
111-
unsigned int idx;
112-
113-
if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
114-
return -EIO;
115-
116-
spin_lock_irqsave(&qp->sq.lock, flags);
117-
118-
idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
119-
ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
120-
121-
memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
122-
ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
123-
ctrl->opmod_idx_opcode =
124-
cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
125-
ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
126-
(qp->trans_qp.base.mqp.qpn << 8));
127-
128-
qp->sq.wrid[idx] = wr_id;
129-
qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
130-
qp->sq.wqe_head[idx] = qp->sq.head + 1;
131-
qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
132-
MLX5_SEND_WQE_BB);
133-
qp->sq.w_list[idx].next = qp->sq.cur_post;
134-
qp->sq.head++;
135-
136-
memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
137-
((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
138-
MLX5_WQE_CTRL_CQ_UPDATE;
139-
140-
/* Make sure that descriptors are written before
141-
* updating doorbell record and ringing the doorbell
142-
*/
143-
wmb();
144-
145-
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
146-
147-
/* Make sure doorbell record is visible to the HCA before
148-
* we hit doorbell
149-
*/
150-
wmb();
151-
__iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe,
152-
sizeof(mmio_wqe) / 8);
153-
154-
bf->offset ^= bf->buf_size;
155-
156-
spin_unlock_irqrestore(&qp->sq.lock, flags);
157-
158-
return 0;
159-
}
160-
161-
static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
162-
{
163-
int ret;
164-
struct ib_wc wc = {};
165-
unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
166-
167-
do {
168-
ret = ib_poll_cq(cq, 1, &wc);
169-
if (ret < 0 || wc.status)
170-
return ret < 0 ? ret : -EINVAL;
171-
if (ret)
172-
break;
173-
} while (!time_after(jiffies, end));
174-
175-
if (!ret)
176-
return -ETIMEDOUT;
177-
178-
if (wc.wr_id != WR_ID_BF)
179-
ret = 0;
180-
181-
return ret;
182-
}
183-
184-
static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
185-
{
186-
int err, i;
187-
188-
for (i = 0; i < TEST_WC_NUM_WQES; i++) {
189-
err = post_send_nop(dev, qp, WR_ID_BF, false);
190-
if (err)
191-
return err;
192-
}
193-
194-
return post_send_nop(dev, qp, WR_ID_END, true);
195-
}
196-
197-
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
198-
{
199-
struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
200-
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
201-
struct ib_qp_init_attr qp_init_attr = {
202-
.cap = { .max_send_wr = TEST_WC_NUM_WQES },
203-
.qp_type = IB_QPT_UD,
204-
.sq_sig_type = IB_SIGNAL_REQ_WR,
205-
.create_flags = MLX5_IB_QP_CREATE_WC_TEST,
206-
};
207-
struct ib_qp_attr qp_attr = { .port_num = 1 };
208-
struct ib_device *ibdev = &dev->ib_dev;
209-
struct ib_qp *qp;
210-
struct ib_cq *cq;
211-
struct ib_pd *pd;
212-
int ret;
213-
214-
if (!MLX5_CAP_GEN(dev->mdev, bf))
215-
return 0;
216-
217-
if (!dev->mdev->roce.roce_en &&
218-
port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
219-
if (mlx5_core_is_pf(dev->mdev))
220-
dev->wc_support = arch_can_pci_mmap_wc();
221-
return 0;
222-
}
223-
224-
ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
225-
if (ret)
226-
goto print_err;
227-
228-
if (!dev->wc_bfreg.wc)
229-
goto out1;
230-
231-
pd = ib_alloc_pd(ibdev, 0);
232-
if (IS_ERR(pd)) {
233-
ret = PTR_ERR(pd);
234-
goto out1;
235-
}
236-
237-
cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
238-
if (IS_ERR(cq)) {
239-
ret = PTR_ERR(cq);
240-
goto out2;
241-
}
242-
243-
qp_init_attr.recv_cq = cq;
244-
qp_init_attr.send_cq = cq;
245-
qp = ib_create_qp(pd, &qp_init_attr);
246-
if (IS_ERR(qp)) {
247-
ret = PTR_ERR(qp);
248-
goto out3;
249-
}
250-
251-
qp_attr.qp_state = IB_QPS_INIT;
252-
ret = ib_modify_qp(qp, &qp_attr,
253-
IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
254-
IB_QP_QKEY);
255-
if (ret)
256-
goto out4;
257-
258-
qp_attr.qp_state = IB_QPS_RTR;
259-
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
260-
if (ret)
261-
goto out4;
262-
263-
qp_attr.qp_state = IB_QPS_RTS;
264-
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
265-
if (ret)
266-
goto out4;
267-
268-
ret = test_wc_do_send(dev, qp);
269-
if (ret < 0)
270-
goto out4;
271-
272-
ret = test_wc_poll_cq_result(dev, cq);
273-
if (ret > 0) {
274-
dev->wc_support = true;
275-
ret = 0;
276-
}
277-
278-
out4:
279-
ib_destroy_qp(qp);
280-
out3:
281-
ib_destroy_cq(cq);
282-
out2:
283-
ib_dealloc_pd(pd);
284-
out1:
285-
mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
286-
print_err:
287-
if (ret)
288-
mlx5_ib_err(
289-
dev,
290-
"Error %d while trying to test write-combining support\n",
291-
ret);
292-
return ret;
293-
}

drivers/infiniband/hw/mlx5/mlx5_ib.h

-3
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,6 @@ struct mlx5_ib_flow_db {
341341
* rely on the range reserved for that use in the ib_qp_create_flags enum.
342342
*/
343343
#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
344-
#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)
345344

346345
struct wr_list {
347346
u16 opcode;
@@ -1123,7 +1122,6 @@ struct mlx5_ib_dev {
11231122
u8 ib_active:1;
11241123
u8 is_rep:1;
11251124
u8 lag_active:1;
1126-
u8 wc_support:1;
11271125
u8 fill_delay;
11281126
struct umr_common umrc;
11291127
/* sync used page count stats
@@ -1149,7 +1147,6 @@ struct mlx5_ib_dev {
11491147
/* Array with num_ports elements */
11501148
struct mlx5_ib_port *port;
11511149
struct mlx5_sq_bfreg bfreg;
1152-
struct mlx5_sq_bfreg wc_bfreg;
11531150
struct mlx5_sq_bfreg fp_bfreg;
11541151
struct mlx5_ib_delay_drop delay_drop;
11551152
const struct mlx5_ib_profile *profile;

drivers/infiniband/hw/mlx5/qp.c

-16
Original file line numberDiff line numberDiff line change
@@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
11071107

11081108
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
11091109
qp->bf.bfreg = &dev->fp_bfreg;
1110-
else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
1111-
qp->bf.bfreg = &dev->wc_bfreg;
11121110
else
11131111
qp->bf.bfreg = &dev->bfreg;
11141112

@@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
29592957
return;
29602958
}
29612959

2962-
if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
2963-
/*
2964-
* Special case, if condition didn't meet, it won't be error,
2965-
* just different in-kernel flow.
2966-
*/
2967-
*flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
2968-
return;
2969-
}
29702960
mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
29712961
}
29722962

@@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
30273017
IB_QP_CREATE_PCI_WRITE_END_PADDING,
30283018
MLX5_CAP_GEN(mdev, end_pad), qp);
30293019

3030-
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
3031-
qp_type != MLX5_IB_QPT_REG_UMR, qp);
30323020
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
30333021
true, qp);
30343022

@@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
46094597
if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
46104598
return true;
46114599

4612-
/* Internal QP used for wc testing, with NOPs in wq */
4613-
if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
4614-
return true;
4615-
46164600
return false;
46174601
}
46184602

drivers/net/ethernet/mellanox/mlx5/core/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
1717
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
1818
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
1919
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
20-
fw_reset.o qos.o lib/tout.o lib/aso.o
20+
fw_reset.o qos.o lib/tout.o lib/aso.o wc.o
2121

2222
#
2323
# Netdev basic

drivers/net/ethernet/mellanox/mlx5/core/main.c

+2
Original file line numberDiff line numberDiff line change
@@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
18191819
mutex_init(&dev->intf_state_mutex);
18201820
lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
18211821
mutex_init(&dev->mlx5e_res.uplink_netdev_lock);
1822+
mutex_init(&dev->wc_state_lock);
18221823

18231824
mutex_init(&priv->bfregs.reg_head.lock);
18241825
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
19161917
mutex_destroy(&priv->alloc_mutex);
19171918
mutex_destroy(&priv->bfregs.wc_head.lock);
19181919
mutex_destroy(&priv->bfregs.reg_head.lock);
1920+
mutex_destroy(&dev->wc_state_lock);
19191921
mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock);
19201922
mutex_destroy(&dev->intf_state_mutex);
19211923
lockdep_unregister_key(&dev->lock_key);

0 commit comments

Comments
 (0)