Skip to content

Commit dc68989

Browse files
cazouHans Verkuil
authored andcommitted
media: rkvdec: Switch to using structs instead of writel
In an effort to merge the rkvdec2 driver [1] with this one, switch from writel() calls to using structs to represent the register mappings. This is done in order to have all supported decoders use the same format in the future and ease reading of the code. Using structs also improves stability as the hardware is tested and validated downstream using a similar method. It was noticed, on decoders, that: - Some registers require to be writen in increasing order [2] - Some registers, even if unrelated, need to be written to their reset values (it was the case here for axi_ddr_[rw]data). Using structs can also help improving performance later when, e.g. multicore support is added on RK3588. Performance seems to be slightly improved, but at least, not made worse. Running fluster's JVT-AVC_V1 test suite with GStreamer on the Radxa ROCK PI 4 SE gives the following times: Before this patch: - --jobs 1: Ran 129/135 tests successfully in 77.167 secs - --jobs 6: Ran 129/135 tests successfully in 23.046 secs With this patch: - --jobs 1: Ran 129/135 tests successfully in 70.698 secs - --jobs 6: Ran 129/135 tests successfully in 22.917 secs This also shows that the fluster score hasn't changed. [1]: https://lore.kernel.org/all/20250325213303.826925-1-detlev.casanova@collabora.com/ [2]: https://lore.kernel.org/all/20200127143009.15677-5-andrzej.p@collabora.com/ Tested-by: Diederik de Haas <didi.debian@cknow.org> # Rock 5B Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com> Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com> Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
1 parent 4cb9cd8 commit dc68989

6 files changed

Lines changed: 604 additions & 455 deletions

File tree

drivers/media/platform/rockchip/rkvdec/rkvdec-h264.c

Lines changed: 71 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ struct rkvdec_h264_run {
115115
struct rkvdec_h264_ctx {
116116
struct rkvdec_aux_buf priv_tbl;
117117
struct rkvdec_h264_reflists reflists;
118+
struct rkvdec_regs regs;
118119
};
119120

120121
#define CABAC_ENTRY(ctxidx, idc0_m, idc0_n, idc1_m, idc1_n, \
@@ -841,45 +842,41 @@ static void assemble_hw_scaling_list(struct rkvdec_ctx *ctx,
841842
}
842843

843844
/*
844-
* dpb poc related registers table
845+
* Set the ref POC in the correct register.
846+
*
847+
* The 32 registers are spread across 3 regions, each alternating top and bottom ref POCs:
848+
* - 1: ref 0 to 14 contain top 0 to 7 and bottoms 0 to 6
849+
* - 2: ref 15 to 29 contain top 8 to 14 and bottoms 7 to 14
850+
* - 3: ref 30 and 31 which correspond to top 15 and bottom 15 respectively.
845851
*/
846-
static const u32 poc_reg_tbl_top_field[16] = {
847-
RKVDEC_REG_H264_POC_REFER0(0),
848-
RKVDEC_REG_H264_POC_REFER0(2),
849-
RKVDEC_REG_H264_POC_REFER0(4),
850-
RKVDEC_REG_H264_POC_REFER0(6),
851-
RKVDEC_REG_H264_POC_REFER0(8),
852-
RKVDEC_REG_H264_POC_REFER0(10),
853-
RKVDEC_REG_H264_POC_REFER0(12),
854-
RKVDEC_REG_H264_POC_REFER0(14),
855-
RKVDEC_REG_H264_POC_REFER1(1),
856-
RKVDEC_REG_H264_POC_REFER1(3),
857-
RKVDEC_REG_H264_POC_REFER1(5),
858-
RKVDEC_REG_H264_POC_REFER1(7),
859-
RKVDEC_REG_H264_POC_REFER1(9),
860-
RKVDEC_REG_H264_POC_REFER1(11),
861-
RKVDEC_REG_H264_POC_REFER1(13),
862-
RKVDEC_REG_H264_POC_REFER2(0)
863-
};
864-
865-
static const u32 poc_reg_tbl_bottom_field[16] = {
866-
RKVDEC_REG_H264_POC_REFER0(1),
867-
RKVDEC_REG_H264_POC_REFER0(3),
868-
RKVDEC_REG_H264_POC_REFER0(5),
869-
RKVDEC_REG_H264_POC_REFER0(7),
870-
RKVDEC_REG_H264_POC_REFER0(9),
871-
RKVDEC_REG_H264_POC_REFER0(11),
872-
RKVDEC_REG_H264_POC_REFER0(13),
873-
RKVDEC_REG_H264_POC_REFER1(0),
874-
RKVDEC_REG_H264_POC_REFER1(2),
875-
RKVDEC_REG_H264_POC_REFER1(4),
876-
RKVDEC_REG_H264_POC_REFER1(6),
877-
RKVDEC_REG_H264_POC_REFER1(8),
878-
RKVDEC_REG_H264_POC_REFER1(10),
879-
RKVDEC_REG_H264_POC_REFER1(12),
880-
RKVDEC_REG_H264_POC_REFER1(14),
881-
RKVDEC_REG_H264_POC_REFER2(1)
882-
};
852+
static void set_poc_reg(struct rkvdec_regs *regs, uint32_t poc, int id, bool bottom)
853+
{
854+
if (!bottom) {
855+
switch (id) {
856+
case 0 ... 7:
857+
regs->h26x.ref0_14_poc[id * 2] = poc;
858+
break;
859+
case 8 ... 14:
860+
regs->h26x.ref15_29_poc[(id - 8) * 2 + 1] = poc;
861+
break;
862+
case 15:
863+
regs->h26x.ref30_poc = poc;
864+
break;
865+
}
866+
} else {
867+
switch (id) {
868+
case 0 ... 6:
869+
regs->h26x.ref0_14_poc[id * 2 + 1] = poc;
870+
break;
871+
case 7 ... 14:
872+
regs->h26x.ref15_29_poc[(id - 7) * 2] = poc;
873+
break;
874+
case 15:
875+
regs->h26x.ref31_poc = poc;
876+
break;
877+
}
878+
}
879+
}
883880

884881
static void config_registers(struct rkvdec_ctx *ctx,
885882
struct rkvdec_h264_run *run)
@@ -894,6 +891,7 @@ static void config_registers(struct rkvdec_ctx *ctx,
894891
struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
895892
struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst;
896893
const struct v4l2_format *f;
894+
struct rkvdec_regs *regs = &h264_ctx->regs;
897895
dma_addr_t rlc_addr;
898896
dma_addr_t refer_addr;
899897
u32 rlc_len;
@@ -903,10 +901,11 @@ static void config_registers(struct rkvdec_ctx *ctx,
903901
u32 yuv_virstride = 0;
904902
u32 offset;
905903
dma_addr_t dst_addr;
906-
u32 reg, i;
904+
u32 i;
907905

908-
reg = RKVDEC_MODE(RKVDEC_MODE_H264);
909-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_SYSCTRL);
906+
memset(regs, 0, sizeof(*regs));
907+
908+
regs->common.reg02.dec_mode = RKVDEC_MODE_H264;
910909

911910
f = &ctx->decoded_fmt;
912911
dst_fmt = &f->fmt.pix_mp;
@@ -921,39 +920,35 @@ static void config_registers(struct rkvdec_ctx *ctx,
921920
else if (sps->chroma_format_idc == 2)
922921
yuv_virstride = 2 * y_virstride;
923922

924-
reg = RKVDEC_Y_HOR_VIRSTRIDE(hor_virstride / 16) |
925-
RKVDEC_UV_HOR_VIRSTRIDE(hor_virstride / 16) |
926-
RKVDEC_SLICE_NUM_HIGHBIT |
927-
RKVDEC_SLICE_NUM_LOWBITS(0x7ff);
928-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_PICPAR);
923+
regs->common.reg03.uv_hor_virstride = hor_virstride / 16;
924+
regs->common.reg03.y_hor_virstride = hor_virstride / 16;
925+
regs->common.reg03.slice_num_highbit = 1;
926+
regs->common.reg03.slice_num_lowbits = 0x7ff;
929927

930928
/* config rlc base address */
931929
rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
932-
writel_relaxed(rlc_addr, rkvdec->regs + RKVDEC_REG_STRM_RLC_BASE);
933-
writel_relaxed(rlc_addr, rkvdec->regs + RKVDEC_REG_RLCWRITE_BASE);
930+
regs->common.strm_rlc_base = rlc_addr;
931+
regs->h26x.rlcwrite_base = rlc_addr;
934932

935933
rlc_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
936-
reg = RKVDEC_STRM_LEN(rlc_len);
937-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_STRM_LEN);
934+
regs->common.stream_len = rlc_len;
938935

939936
/* config cabac table */
940937
offset = offsetof(struct rkvdec_h264_priv_tbl, cabac_table);
941-
writel_relaxed(priv_start_addr + offset,
942-
rkvdec->regs + RKVDEC_REG_CABACTBL_PROB_BASE);
938+
regs->common.cabactbl_base = priv_start_addr + offset;
943939

944940
/* config output base address */
945941
dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
946-
writel_relaxed(dst_addr, rkvdec->regs + RKVDEC_REG_DECOUT_BASE);
942+
regs->common.decout_base = dst_addr;
947943

948-
reg = RKVDEC_Y_VIRSTRIDE(y_virstride / 16);
949-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_Y_VIRSTRIDE);
944+
regs->common.reg08.y_virstride = y_virstride / 16;
950945

951-
reg = RKVDEC_YUV_VIRSTRIDE(yuv_virstride / 16);
952-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_YUV_VIRSTRIDE);
946+
regs->common.reg09.yuv_virstride = yuv_virstride / 16;
953947

954948
/* config ref pic address & poc */
955949
for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
956950
struct vb2_buffer *vb_buf = run->ref_buf[i];
951+
struct ref_base *base;
957952

958953
/*
959954
* If a DPB entry is unused or invalid, address of current destination
@@ -963,54 +958,37 @@ static void config_registers(struct rkvdec_ctx *ctx,
963958
vb_buf = &dst_buf->vb2_buf;
964959
refer_addr = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
965960

966-
if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
967-
refer_addr |= RKVDEC_COLMV_USED_FLAG_REF;
968-
if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)
969-
refer_addr |= RKVDEC_FIELD_REF;
970-
971-
if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF)
972-
refer_addr |= RKVDEC_TOPFIELD_USED_REF;
973-
if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)
974-
refer_addr |= RKVDEC_BOTFIELD_USED_REF;
975-
976-
writel_relaxed(dpb[i].top_field_order_cnt,
977-
rkvdec->regs + poc_reg_tbl_top_field[i]);
978-
writel_relaxed(dpb[i].bottom_field_order_cnt,
979-
rkvdec->regs + poc_reg_tbl_bottom_field[i]);
980-
981961
if (i < V4L2_H264_NUM_DPB_ENTRIES - 1)
982-
writel_relaxed(refer_addr,
983-
rkvdec->regs + RKVDEC_REG_H264_BASE_REFER(i));
962+
base = &regs->h26x.ref0_14_base[i];
984963
else
985-
writel_relaxed(refer_addr,
986-
rkvdec->regs + RKVDEC_REG_H264_BASE_REFER15);
987-
}
964+
base = &regs->h26x.ref15_base;
988965

989-
reg = RKVDEC_CUR_POC(dec_params->top_field_order_cnt);
990-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_CUR_POC0);
966+
base->base_addr = refer_addr >> 4;
967+
base->field_ref = !!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD);
968+
base->colmv_use_flag_ref = !!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE);
969+
base->topfield_used_ref = !!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF);
970+
base->botfield_used_ref = !!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF);
991971

992-
reg = RKVDEC_CUR_POC(dec_params->bottom_field_order_cnt);
993-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_CUR_POC1);
972+
set_poc_reg(regs, dpb[i].top_field_order_cnt, i, false);
973+
set_poc_reg(regs, dpb[i].bottom_field_order_cnt, i, true);
974+
}
975+
976+
regs->h26x.cur_poc = dec_params->top_field_order_cnt;
977+
regs->h26x.cur_poc1 = dec_params->bottom_field_order_cnt;
994978

995979
/* config hw pps address */
996980
offset = offsetof(struct rkvdec_h264_priv_tbl, param_set);
997-
writel_relaxed(priv_start_addr + offset,
998-
rkvdec->regs + RKVDEC_REG_PPS_BASE);
981+
regs->h26x.pps_base = priv_start_addr + offset;
999982

1000983
/* config hw rps address */
1001984
offset = offsetof(struct rkvdec_h264_priv_tbl, rps);
1002-
writel_relaxed(priv_start_addr + offset,
1003-
rkvdec->regs + RKVDEC_REG_RPS_BASE);
1004-
1005-
reg = RKVDEC_AXI_DDR_RDATA(0);
1006-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_AXI_DDR_RDATA);
1007-
1008-
reg = RKVDEC_AXI_DDR_WDATA(0);
1009-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_AXI_DDR_WDATA);
985+
regs->h26x.rps_base = priv_start_addr + offset;
1010986

1011987
offset = offsetof(struct rkvdec_h264_priv_tbl, err_info);
1012-
writel_relaxed(priv_start_addr + offset,
1013-
rkvdec->regs + RKVDEC_REG_H264_ERRINFO_BASE);
988+
regs->h26x.errorinfo_base = priv_start_addr + offset;
989+
990+
rkvdec_memcpy_toio(rkvdec->regs, regs,
991+
MIN(sizeof(*regs), sizeof(u32) * rkvdec->variant->num_regs));
1014992
}
1015993

1016994
#define RKVDEC_H264_MAX_DEPTH_IN_BYTES 2
@@ -1181,8 +1159,6 @@ static int rkvdec_h264_run(struct rkvdec_ctx *ctx)
11811159

11821160
schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000));
11831161

1184-
writel(0, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN);
1185-
writel(0, rkvdec->regs + RKVDEC_REG_H264_ERR_E);
11861162
writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND);
11871163
writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND);
11881164

drivers/media/platform/rockchip/rkvdec/rkvdec-hevc.c

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ struct rkvdec_hevc_run {
129129
struct rkvdec_hevc_ctx {
130130
struct rkvdec_aux_buf priv_tbl;
131131
struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache;
132+
struct rkvdec_regs regs;
132133
};
133134

134135
struct scaling_factor {
@@ -548,6 +549,7 @@ static void config_registers(struct rkvdec_ctx *ctx,
548549
const struct v4l2_ctrl_hevc_slice_params *sl_params = &run->slices_params[0];
549550
const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
550551
struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv;
552+
struct rkvdec_regs *regs = &hevc_ctx->regs;
551553
dma_addr_t priv_start_addr = hevc_ctx->priv_tbl.dma;
552554
const struct v4l2_pix_format_mplane *dst_fmt;
553555
struct vb2_v4l2_buffer *src_buf = run->base.bufs.src;
@@ -564,8 +566,9 @@ static void config_registers(struct rkvdec_ctx *ctx,
564566
dma_addr_t dst_addr;
565567
u32 reg, i;
566568

567-
reg = RKVDEC_MODE(RKVDEC_MODE_HEVC);
568-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_SYSCTRL);
569+
memset(regs, 0, sizeof(*regs));
570+
571+
regs->common.reg02.dec_mode = RKVDEC_MODE_HEVC;
569572

570573
f = &ctx->decoded_fmt;
571574
dst_fmt = &f->fmt.pix_mp;
@@ -580,33 +583,27 @@ static void config_registers(struct rkvdec_ctx *ctx,
580583
else if (sps->chroma_format_idc == 2)
581584
yuv_virstride = 2 * y_virstride;
582585

583-
reg = RKVDEC_Y_HOR_VIRSTRIDE(hor_virstride / 16) |
584-
RKVDEC_UV_HOR_VIRSTRIDE(hor_virstride / 16) |
585-
RKVDEC_SLICE_NUM_LOWBITS(run->num_slices);
586-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_PICPAR);
586+
regs->common.reg03.slice_num_lowbits = run->num_slices;
587+
regs->common.reg03.uv_hor_virstride = hor_virstride / 16;
588+
regs->common.reg03.y_hor_virstride = hor_virstride / 16;
587589

588590
/* config rlc base address */
589591
rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
590-
writel_relaxed(rlc_addr, rkvdec->regs + RKVDEC_REG_STRM_RLC_BASE);
592+
regs->common.strm_rlc_base = rlc_addr;
591593

592594
rlc_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
593-
reg = RKVDEC_STRM_LEN(round_up(rlc_len, 16) + 64);
594-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_STRM_LEN);
595+
regs->common.stream_len = round_up(rlc_len, 16) + 64;
595596

596597
/* config cabac table */
597598
offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table);
598-
writel_relaxed(priv_start_addr + offset,
599-
rkvdec->regs + RKVDEC_REG_CABACTBL_PROB_BASE);
599+
regs->common.cabactbl_base = priv_start_addr + offset;
600600

601601
/* config output base address */
602602
dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
603-
writel_relaxed(dst_addr, rkvdec->regs + RKVDEC_REG_DECOUT_BASE);
604-
605-
reg = RKVDEC_Y_VIRSTRIDE(y_virstride / 16);
606-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_Y_VIRSTRIDE);
603+
regs->common.decout_base = dst_addr;
607604

608-
reg = RKVDEC_YUV_VIRSTRIDE(yuv_virstride / 16);
609-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_YUV_VIRSTRIDE);
605+
regs->common.reg08.y_virstride = y_virstride / 16;
606+
regs->common.reg09.yuv_virstride = yuv_virstride / 16;
610607

611608
/* config ref pic address */
612609
for (i = 0; i < 15; i++) {
@@ -620,33 +617,30 @@ static void config_registers(struct rkvdec_ctx *ctx,
620617
}
621618

622619
refer_addr = vb2_dma_contig_plane_dma_addr(vb_buf, 0);
623-
writel_relaxed(refer_addr | reg,
624-
rkvdec->regs + RKVDEC_REG_H264_BASE_REFER(i));
625620

626-
reg = RKVDEC_POC_REFER(i < decode_params->num_active_dpb_entries ?
627-
dpb[i].pic_order_cnt_val : 0);
628-
writel_relaxed(reg,
629-
rkvdec->regs + RKVDEC_REG_H264_POC_REFER0(i));
621+
regs->h26x.ref0_14_base[i].base_addr = refer_addr >> 4;
622+
regs->h26x.ref0_14_base[i].field_ref = !!(reg & 1);
623+
regs->h26x.ref0_14_base[i].topfield_used_ref = !!(reg & 2);
624+
regs->h26x.ref0_14_base[i].botfield_used_ref = !!(reg & 4);
625+
regs->h26x.ref0_14_base[i].colmv_use_flag_ref = !!(reg & 8);
626+
627+
regs->h26x.ref0_14_poc[i] = i < decode_params->num_active_dpb_entries
628+
? dpb[i].pic_order_cnt_val
629+
: 0;
630630
}
631631

632-
reg = RKVDEC_CUR_POC(sl_params->slice_pic_order_cnt);
633-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_CUR_POC0);
632+
regs->h26x.cur_poc = sl_params->slice_pic_order_cnt;
634633

635634
/* config hw pps address */
636635
offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set);
637-
writel_relaxed(priv_start_addr + offset,
638-
rkvdec->regs + RKVDEC_REG_PPS_BASE);
636+
regs->h26x.pps_base = priv_start_addr + offset;
639637

640638
/* config hw rps address */
641639
offset = offsetof(struct rkvdec_hevc_priv_tbl, rps);
642-
writel_relaxed(priv_start_addr + offset,
643-
rkvdec->regs + RKVDEC_REG_RPS_BASE);
644-
645-
reg = RKVDEC_AXI_DDR_RDATA(0);
646-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_AXI_DDR_RDATA);
640+
regs->h26x.rps_base = priv_start_addr + offset;
647641

648-
reg = RKVDEC_AXI_DDR_WDATA(0);
649-
writel_relaxed(reg, rkvdec->regs + RKVDEC_REG_AXI_DDR_WDATA);
642+
rkvdec_memcpy_toio(rkvdec->regs, regs,
643+
MIN(sizeof(*regs), sizeof(u32) * rkvdec->variant->num_regs));
650644
}
651645

652646
#define RKVDEC_HEVC_MAX_DEPTH_IN_BYTES 2
@@ -784,8 +778,6 @@ static int rkvdec_hevc_run(struct rkvdec_ctx *ctx)
784778

785779
schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000));
786780

787-
writel(0, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN);
788-
writel(0, rkvdec->regs + RKVDEC_REG_H264_ERR_E);
789781
writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND);
790782
writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND);
791783

0 commit comments

Comments
 (0)