Skip to content

Commit e639055

Browse files
AlisonSchofielddavejiang
authored andcommitted
cxl/region: Translate DPA->HPA in unaligned MOD3 regions
The CXL driver implementation of DPA->HPA address translation depends on a region's starting address always being aligned to Host Bridge Interleave Ways * 256MB. The driver follows the decode methods defined in the CXL Spec[1] and expanded upon in the CXL Driver Writers Guide[2], which describe bit manipulations based on power-of-2 alignment to translate a DPA to an HPA. With the introduction of MOD3 interleave way support, platforms may create regions at starting addresses that are not power-of-2 aligned. This allows platforms to avoid gaps in the memory map, but addresses within those regions cannot be translated using the existing bit manipulation method. Introduce an unaligned translation method for DPA->HPA that reconstructs an HPA by restoring the address first at the port level and then at the host bridge level. [1] CXL Spec 4.0 8.2.4.20.13 Implementation Note Device Decoder Logic [2] CXL Type 3 Memory Software Guide 1.1 2.13.25 DPA to HPA Translation Suggested-by: Qing Huang <qing.huang@intel.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/e7c53215bf69f2ff1ae7e58bcc49ca387b7b0299.1768538962.git.alison.schofield@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 4ed7952 commit e639055

1 file changed

Lines changed: 155 additions & 5 deletions

File tree

drivers/cxl/core/region.c

Lines changed: 155 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3112,13 +3112,146 @@ u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
31123112
}
31133113
EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate");
31143114

3115+
static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port,
3116+
int *pos_hb)
3117+
{
3118+
int devices_per_hb;
3119+
3120+
/*
3121+
* Decode for 3-6-12 way interleaves as defined in the CXL
3122+
* Spec 4.0 9.13.1.1 Legal Interleaving Configurations.
3123+
* Region creation should prevent invalid combinations but
3124+
* sanity check here to avoid a silent bad decode.
3125+
*/
3126+
switch (hb_ways) {
3127+
case 3:
3128+
if (region_ways != 3 && region_ways != 6 && region_ways != 12)
3129+
return -EINVAL;
3130+
break;
3131+
case 6:
3132+
if (region_ways != 6 && region_ways != 12)
3133+
return -EINVAL;
3134+
break;
3135+
case 12:
3136+
if (region_ways != 12)
3137+
return -EINVAL;
3138+
break;
3139+
default:
3140+
return -EINVAL;
3141+
}
3142+
/*
3143+
* Each host bridge contributes an equal number of endpoints
3144+
* that are laid out contiguously per host bridge. Modulo
3145+
* selects the port within a host bridge and division selects
3146+
* the host bridge position.
3147+
*/
3148+
devices_per_hb = region_ways / hb_ways;
3149+
*pos_port = pos % devices_per_hb;
3150+
*pos_hb = pos / devices_per_hb;
3151+
3152+
return 0;
3153+
}
3154+
3155+
/*
3156+
* restore_parent() reconstruct the address in parent
3157+
*
3158+
* This math, specifically the bitmask creation 'mask = gran - 1' relies
3159+
* on the CXL Spec requirement that interleave granularity is always a
3160+
* power of two.
3161+
*
3162+
* [mask] isolate the offset with the granularity
3163+
* [addr & ~mask] remove the offset leaving the aligned portion
3164+
* [* ways] distribute across all interleave ways
3165+
* [+ (pos * gran)] add the positional offset
3166+
* [+ (addr & mask)] restore the masked offset
3167+
*/
3168+
static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways)
3169+
{
3170+
u64 mask = gran - 1;
3171+
3172+
return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask);
3173+
}
3174+
3175+
/*
3176+
* unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
3177+
* start address is not aligned at Host Bridge Interleave Ways * 256MB.
3178+
*
3179+
* Unaligned start addresses only occur with MOD3 interleaves. All power-
3180+
* of-two interleaves are guaranteed aligned.
3181+
*/
3182+
static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
3183+
struct cxl_region_params *p, int pos, u64 dpa)
3184+
{
3185+
int ways_port = p->interleave_ways / cxld->interleave_ways;
3186+
int gran_port = p->interleave_granularity;
3187+
int gran_hb = cxld->interleave_granularity;
3188+
int ways_hb = cxld->interleave_ways;
3189+
int pos_port, pos_hb, gran_shift;
3190+
u64 hpa_port = 0;
3191+
3192+
/* Decode an endpoint 'pos' into port and host-bridge components */
3193+
if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
3194+
dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
3195+
p->interleave_ways);
3196+
return ULLONG_MAX;
3197+
}
3198+
3199+
/* Restore the port parent address if needed */
3200+
if (gran_hb != gran_port)
3201+
hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
3202+
else
3203+
hpa_port = dpa;
3204+
3205+
/*
3206+
* Complete the HPA reconstruction by restoring the address as if
3207+
* each HB position is a candidate. Test against expected pos_hb
3208+
* to confirm match.
3209+
*/
3210+
gran_shift = ilog2(gran_hb);
3211+
for (int position = 0; position < ways_hb; position++) {
3212+
u64 shifted, hpa;
3213+
3214+
hpa = restore_parent(hpa_port, position, gran_hb, ways_hb);
3215+
hpa += p->res->start;
3216+
3217+
shifted = hpa >> gran_shift;
3218+
if (do_div(shifted, ways_hb) == pos_hb)
3219+
return hpa;
3220+
}
3221+
3222+
dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res,
3223+
pos);
3224+
dev_dbg(&cxld->dev, " port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n",
3225+
ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb);
3226+
3227+
return ULLONG_MAX;
3228+
}
3229+
3230+
static bool region_is_unaligned_mod3(struct cxl_region *cxlr)
3231+
{
3232+
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3233+
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
3234+
struct cxl_region_params *p = &cxlr->params;
3235+
int hbiw = cxld->interleave_ways;
3236+
u64 rem;
3237+
3238+
if (is_power_of_2(hbiw))
3239+
return false;
3240+
3241+
div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
3242+
3243+
return (rem != 0);
3244+
}
3245+
31153246
u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
31163247
u64 dpa)
31173248
{
31183249
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
3250+
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
31193251
struct cxl_region_params *p = &cxlr->params;
31203252
struct cxl_endpoint_decoder *cxled = NULL;
31213253
u64 dpa_offset, hpa_offset, hpa;
3254+
bool unaligned = false;
31223255
u16 eig = 0;
31233256
u8 eiw = 0;
31243257
int pos;
@@ -3132,15 +3265,32 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
31323265
if (!cxled)
31333266
return ULLONG_MAX;
31343267

3268+
dpa_offset = dpa - cxl_dpa_resource_start(cxled);
3269+
3270+
/* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */
3271+
unaligned = region_is_unaligned_mod3(cxlr);
3272+
if (unaligned) {
3273+
hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset);
3274+
if (hpa == ULLONG_MAX)
3275+
return ULLONG_MAX;
3276+
3277+
goto skip_aligned;
3278+
}
3279+
/*
3280+
* Aligned calc for all power-of-2 interleaves and for MOD3
3281+
* interleaves that are aligned at hbiw * 256MB
3282+
*/
31353283
pos = cxled->pos;
31363284
ways_to_eiw(p->interleave_ways, &eiw);
31373285
granularity_to_eig(p->interleave_granularity, &eig);
31383286

3139-
dpa_offset = dpa - cxl_dpa_resource_start(cxled);
31403287
hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
31413288

31423289
/* Apply the hpa_offset to the region base address */
3143-
hpa = hpa_offset + p->res->start + p->cache_size;
3290+
hpa = hpa_offset + p->res->start;
3291+
3292+
skip_aligned:
3293+
hpa += p->cache_size;
31443294

31453295
/* Root decoder translation overrides typical modulo decode */
31463296
if (cxlrd->ops.hpa_to_spa)
@@ -3151,9 +3301,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
31513301
"Addr trans fail: hpa 0x%llx not in region\n", hpa);
31523302
return ULLONG_MAX;
31533303
}
3154-
3155-
/* Simple chunk check, by pos & gran, only applies to modulo decodes */
3156-
if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
3304+
/* Chunk check applies to aligned modulo decodes only */
3305+
if (!unaligned && !cxlrd->ops.hpa_to_spa &&
3306+
!cxl_is_hpa_in_chunk(hpa, cxlr, pos))
31573307
return ULLONG_MAX;
31583308

31593309
return hpa;

0 commit comments

Comments
 (0)