Skip to content

Commit 264c285

Browse files
benhor01James Morse
authored andcommitted
arm_mpam: resctrl: Add monitor initialisation and domain boilerplate
Add the boilerplate that tells resctrl about the mpam monitors that are available. resctrl expects all (non-telemetry) monitors to be on the L3 and so advertise them there and invent an L3 resctrl resource if required. The L3 cache itself has to exist as the cache ids are used as the domain ids. Bring the resctrl monitor domains online and offline based on the cpus they contain. Support for specific monitor types is left to later. Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com> Reviewed-by: Zeng Heng <zengheng4@huawei.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Jesse Chick <jessechick@os.amperecomputing.com> Signed-off-by: James Morse <james.morse@arm.com>
1 parent 5dc8f73 commit 264c285

2 files changed

Lines changed: 235 additions & 11 deletions

File tree

drivers/resctrl/mpam_internal.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,16 @@ struct mpam_msc_ris {
336336

337337
struct mpam_resctrl_dom {
338338
struct mpam_component *ctrl_comp;
339+
340+
/*
341+
* There is no single mon_comp because different events may be backed
342+
* by different class/components. mon_comp is indexed by the event
343+
* number.
344+
*/
345+
struct mpam_component *mon_comp[QOS_NUM_EVENTS];
346+
339347
struct rdt_ctrl_domain resctrl_ctrl_dom;
348+
struct rdt_l3_mon_domain resctrl_mon_dom;
340349
};
341350

342351
struct mpam_resctrl_res {
@@ -345,6 +354,12 @@ struct mpam_resctrl_res {
345354
bool cdp_enabled;
346355
};
347356

357+
struct mpam_resctrl_mon {
358+
struct mpam_class *class;
359+
360+
/* per-class data that resctrl needs will live here */
361+
};
362+
348363
static inline int mpam_alloc_csu_mon(struct mpam_class *class)
349364
{
350365
struct mpam_props *cprops = &class->props;

drivers/resctrl/mpam_resctrl.c

Lines changed: 220 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,23 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
3434
rid < RDT_NUM_RESOURCES; \
3535
rid++, res = &mpam_resctrl_controls[rid])
3636

37+
/*
38+
* The classes we've picked to map to resctrl events.
39+
* Resctrl believes all the worlds a Xeon, and these are all on the L3. This
40+
* array lets us find the actual class backing the event counters. e.g.
41+
* the only memory bandwidth counters may be on the memory controller, but to
42+
* make use of them, we pretend they are on L3. Restrict the events considered
43+
* to those supported by MPAM.
44+
* Class pointer may be NULL.
45+
*/
46+
#define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID
47+
static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
48+
49+
#define for_each_mpam_resctrl_mon(mon, eventid) \
50+
for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \
51+
eventid <= MPAM_MAX_EVENT; \
52+
eventid++, mon = &mpam_resctrl_counters[eventid])
53+
3754
/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
3855
static DEFINE_MUTEX(domain_list_lock);
3956

@@ -63,6 +80,15 @@ bool resctrl_arch_alloc_capable(void)
6380
return false;
6481
}
6582

83+
bool resctrl_arch_mon_capable(void)
84+
{
85+
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
86+
struct rdt_resource *l3 = &res->resctrl_res;
87+
88+
/* All monitors are presented as being on the L3 cache */
89+
return l3->mon_capable;
90+
}
91+
6692
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
6793
{
6894
return mpam_resctrl_controls[rid].cdp_enabled;
@@ -89,6 +115,8 @@ static void resctrl_reset_task_closids(void)
89115
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
90116
{
91117
u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
118+
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
119+
struct rdt_resource *l3 = &res->resctrl_res;
92120
int cpu;
93121

94122
if (!IS_ENABLED(CONFIG_EXPERT) && enable) {
@@ -110,6 +138,11 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
110138
cdp_enabled = enable;
111139
mpam_resctrl_controls[rid].cdp_enabled = enable;
112140

141+
if (enable)
142+
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2;
143+
else
144+
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
145+
113146
/* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
114147
if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
115148
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
@@ -674,6 +707,56 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
674707
return comp->comp_id;
675708
}
676709

710+
static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
711+
enum resctrl_event_id type)
712+
{
713+
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
714+
struct rdt_resource *l3 = &res->resctrl_res;
715+
716+
lockdep_assert_cpus_held();
717+
718+
/*
719+
* There also needs to be an L3 cache present.
720+
* The check just requires any online CPU and it can't go offline as we
721+
* hold the cpu lock.
722+
*/
723+
if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1)
724+
return 0;
725+
726+
/*
727+
* If there are no MPAM resources on L3, force it into existence.
728+
* topology_matches_l3() already ensures this looks like the L3.
729+
* The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init().
730+
*/
731+
if (!res->class) {
732+
pr_warn_once("Faking L3 MSC to enable counters.\n");
733+
res->class = mpam_resctrl_counters[type].class;
734+
}
735+
736+
/*
737+
* Called multiple times!, once per event type that has a
738+
* monitoring class.
739+
* Setting name is necessary on monitor only platforms.
740+
*/
741+
l3->name = "L3";
742+
l3->mon_scope = RESCTRL_L3_CACHE;
743+
744+
/*
745+
* num-rmid is the upper bound for the number of monitoring groups that
746+
* can exist simultaneously, including the default monitoring group for
747+
* each control group. Hence, advertise the whole rmid_idx space even
748+
* though each control group has its own pmg/rmid space. Unfortunately,
749+
* this does mean userspace needs to know the architecture to correctly
750+
* interpret this value.
751+
*/
752+
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
753+
754+
if (resctrl_enable_mon_event(type, false, 0, NULL))
755+
l3->mon_capable = true;
756+
757+
return 0;
758+
}
759+
677760
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
678761
u32 closid, enum resctrl_conf_type type)
679762
{
@@ -901,11 +984,26 @@ static void mpam_resctrl_domain_insert(struct list_head *list,
901984
list_add_tail_rcu(&new->list, pos);
902985
}
903986

987+
static struct mpam_component *find_component(struct mpam_class *class, int cpu)
988+
{
989+
struct mpam_component *comp;
990+
991+
guard(srcu)(&mpam_srcu);
992+
list_for_each_entry_srcu(comp, &class->components, class_list,
993+
srcu_read_lock_held(&mpam_srcu)) {
994+
if (cpumask_test_cpu(cpu, &comp->affinity))
995+
return comp;
996+
}
997+
998+
return NULL;
999+
}
1000+
9041001
static struct mpam_resctrl_dom *
9051002
mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
9061003
{
9071004
int err;
9081005
struct mpam_resctrl_dom *dom;
1006+
struct rdt_l3_mon_domain *mon_d;
9091007
struct rdt_ctrl_domain *ctrl_d;
9101008
struct mpam_class *class = res->class;
9111009
struct mpam_component *comp_iter, *ctrl_comp;
@@ -945,15 +1043,92 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
9451043
} else {
9461044
pr_debug("Skipped control domain online - no controls\n");
9471045
}
1046+
1047+
if (r->mon_capable) {
1048+
struct mpam_component *any_mon_comp;
1049+
struct mpam_resctrl_mon *mon;
1050+
enum resctrl_event_id eventid;
1051+
1052+
/*
1053+
* Even if the monitor domain is backed by a different
1054+
* component, the L3 component IDs need to be used... only
1055+
* there may be no ctrl_comp for the L3.
1056+
* Search each event's class list for a component with
1057+
* overlapping CPUs and set up the dom->mon_comp array.
1058+
*/
1059+
1060+
for_each_mpam_resctrl_mon(mon, eventid) {
1061+
struct mpam_component *mon_comp;
1062+
1063+
if (!mon->class)
1064+
continue; // dummy resource
1065+
1066+
mon_comp = find_component(mon->class, cpu);
1067+
dom->mon_comp[eventid] = mon_comp;
1068+
if (mon_comp)
1069+
any_mon_comp = mon_comp;
1070+
}
1071+
if (!any_mon_comp) {
1072+
WARN_ON_ONCE(0);
1073+
err = -EFAULT;
1074+
goto offline_ctrl_domain;
1075+
}
1076+
1077+
mon_d = &dom->resctrl_mon_dom;
1078+
mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
1079+
mon_d->hdr.type = RESCTRL_MON_DOMAIN;
1080+
err = resctrl_online_mon_domain(r, &mon_d->hdr);
1081+
if (err)
1082+
goto offline_ctrl_domain;
1083+
1084+
mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
1085+
} else {
1086+
pr_debug("Skipped monitor domain online - no monitors\n");
1087+
}
1088+
9481089
return dom;
9491090

1091+
offline_ctrl_domain:
1092+
if (r->alloc_capable) {
1093+
mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
1094+
resctrl_offline_ctrl_domain(r, ctrl_d);
1095+
}
9501096
free_domain:
9511097
kfree(dom);
9521098
dom = ERR_PTR(err);
9531099

9541100
return dom;
9551101
}
9561102

1103+
/*
1104+
* We know all the monitors are associated with the L3, even if there are no
1105+
* controls and therefore no control component. Find the cache-id for the CPU
1106+
* and use that to search for existing resctrl domains.
1107+
* This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
1108+
* for anything that is not a cache.
1109+
*/
1110+
static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
1111+
{
1112+
int cache_id;
1113+
struct mpam_resctrl_dom *dom;
1114+
struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
1115+
1116+
lockdep_assert_cpus_held();
1117+
1118+
if (!l3->class)
1119+
return NULL;
1120+
cache_id = get_cpu_cacheinfo_id(cpu, 3);
1121+
if (cache_id < 0)
1122+
return NULL;
1123+
1124+
list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
1125+
if (dom->resctrl_mon_dom.hdr.id == cache_id)
1126+
return dom;
1127+
}
1128+
1129+
return NULL;
1130+
}
1131+
9571132
static struct mpam_resctrl_dom *
9581133
mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
9591134
{
@@ -967,7 +1142,11 @@ mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
9671142
return dom;
9681143
}
9691144

970-
return NULL;
1145+
if (r->rid != RDT_RESOURCE_L3)
1146+
return NULL;
1147+
1148+
/* Search the mon domain list too - needed on monitor only platforms. */
1149+
return mpam_resctrl_get_mon_domain_from_cpu(cpu);
9711150
}
9721151

9731152
int mpam_resctrl_online_cpu(unsigned int cpu)
@@ -994,6 +1173,11 @@ int mpam_resctrl_online_cpu(unsigned int cpu)
9941173

9951174
mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
9961175
}
1176+
if (r->mon_capable) {
1177+
struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom;
1178+
1179+
mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
1180+
}
9971181
}
9981182
}
9991183

@@ -1012,8 +1196,9 @@ void mpam_resctrl_offline_cpu(unsigned int cpu)
10121196
guard(mutex)(&domain_list_lock);
10131197
for_each_mpam_resctrl_control(res, rid) {
10141198
struct mpam_resctrl_dom *dom;
1199+
struct rdt_l3_mon_domain *mon_d;
10151200
struct rdt_ctrl_domain *ctrl_d;
1016-
bool ctrl_dom_empty;
1201+
bool ctrl_dom_empty, mon_dom_empty;
10171202
struct rdt_resource *r = &res->resctrl_res;
10181203

10191204
if (!res->class)
@@ -1032,7 +1217,16 @@ void mpam_resctrl_offline_cpu(unsigned int cpu)
10321217
ctrl_dom_empty = true;
10331218
}
10341219

1035-
if (ctrl_dom_empty)
1220+
if (r->mon_capable) {
1221+
mon_d = &dom->resctrl_mon_dom;
1222+
mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
1223+
if (mon_dom_empty)
1224+
resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
1225+
} else {
1226+
mon_dom_empty = true;
1227+
}
1228+
1229+
if (ctrl_dom_empty && mon_dom_empty)
10361230
kfree(dom);
10371231
}
10381232
}
@@ -1042,12 +1236,15 @@ int mpam_resctrl_setup(void)
10421236
int err = 0;
10431237
struct mpam_resctrl_res *res;
10441238
enum resctrl_res_level rid;
1239+
struct mpam_resctrl_mon *mon;
1240+
enum resctrl_event_id eventid;
10451241

10461242
wait_event(wait_cacheinfo_ready, cacheinfo_ready);
10471243

10481244
cpus_read_lock();
10491245
for_each_mpam_resctrl_control(res, rid) {
10501246
INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
1247+
INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
10511248
res->resctrl_res.rid = rid;
10521249
}
10531250

@@ -1063,25 +1260,37 @@ int mpam_resctrl_setup(void)
10631260
err = mpam_resctrl_control_init(res);
10641261
if (err) {
10651262
pr_debug("Failed to initialise rid %u\n", rid);
1066-
break;
1263+
goto internal_error;
10671264
}
10681265
}
1069-
cpus_read_unlock();
10701266

1071-
if (err) {
1072-
pr_debug("Internal error %d - resctrl not supported\n", err);
1073-
return err;
1267+
for_each_mpam_resctrl_mon(mon, eventid) {
1268+
if (!mon->class)
1269+
continue; // dummy resource
1270+
1271+
err = mpam_resctrl_monitor_init(mon, eventid);
1272+
if (err) {
1273+
pr_debug("Failed to initialise event %u\n", eventid);
1274+
goto internal_error;
1275+
}
10741276
}
10751277

1076-
if (!resctrl_arch_alloc_capable()) {
1077-
pr_debug("No alloc(%u) found - resctrl not supported\n",
1078-
resctrl_arch_alloc_capable());
1278+
cpus_read_unlock();
1279+
1280+
if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
1281+
pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
1282+
resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
10791283
return -EOPNOTSUPP;
10801284
}
10811285

10821286
/* TODO: call resctrl_init() */
10831287

10841288
return 0;
1289+
1290+
internal_error:
1291+
cpus_read_unlock();
1292+
pr_debug("Internal error %d - resctrl not supported\n", err);
1293+
return err;
10851294
}
10861295

10871296
static int __init __cacheinfo_ready(void)

0 commit comments

Comments
 (0)