Skip to content

Commit 6e1d31c

Browse files
Chen Ridonghtejun
authored andcommitted
cpuset: separate generate_sched_domains for v1 and v2
The generate_sched_domains() function currently handles both v1 and v2 logic. However, the underlying mechanisms for building scheduler domains differ significantly between the two versions. For cpuset v2, scheduler domains are straightforwardly derived from valid partitions, whereas cpuset v1 employs a more complex union-find algorithm to merge overlapping cpusets. Co-locating these implementations complicates maintenance. This patch, along with subsequent ones, aims to separate the v1 and v2 logic. For ease of review, this patch first copies the generate_sched_domains() function into cpuset-v1.c as cpuset1_generate_sched_domains() and removes v2-specific code. Common helpers and top_cpuset are declared in cpuset-internal.h. When operating in v1 mode, the code now calls cpuset1_generate_sched_domains(). Currently there is some code duplication, which will be largely eliminated once v1-specific code is removed from v2 in the following patch. Signed-off-by: Chen Ridong <chenridong@huawei.com> Reviewed-by: Waiman Long <longman@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent cb33f88 commit 6e1d31c

3 files changed

Lines changed: 185 additions & 27 deletions

File tree

kernel/cgroup/cpuset-internal.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/cpuset.h>
1010
#include <linux/spinlock.h>
1111
#include <linux/union_find.h>
12+
#include <linux/sched/isolation.h>
1213

1314
/* See "Frequency meter" comments, below. */
1415

@@ -185,6 +186,8 @@ struct cpuset {
185186
#endif
186187
};
187188

189+
extern struct cpuset top_cpuset;
190+
188191
static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
189192
{
190193
return css ? container_of(css, struct cpuset, css) : NULL;
@@ -242,6 +245,21 @@ static inline int is_spread_slab(const struct cpuset *cs)
242245
return test_bit(CS_SPREAD_SLAB, &cs->flags);
243246
}
244247

248+
/*
249+
* Helper routine for generate_sched_domains().
250+
* Do cpusets a, b have overlapping effective cpus_allowed masks?
251+
*/
252+
static inline int cpusets_overlap(struct cpuset *a, struct cpuset *b)
253+
{
254+
return cpumask_intersects(a->effective_cpus, b->effective_cpus);
255+
}
256+
257+
static inline int nr_cpusets(void)
258+
{
259+
/* jump label reference count + the top-level cpuset */
260+
return static_key_count(&cpusets_enabled_key.key) + 1;
261+
}
262+
245263
/**
246264
* cpuset_for_each_child - traverse online children of a cpuset
247265
* @child_cs: loop cursor pointing to the current child
@@ -298,6 +316,9 @@ void cpuset1_init(struct cpuset *cs);
298316
void cpuset1_online_css(struct cgroup_subsys_state *css);
299317
void update_domain_attr_tree(struct sched_domain_attr *dattr,
300318
struct cpuset *root_cs);
319+
int cpuset1_generate_sched_domains(cpumask_var_t **domains,
320+
struct sched_domain_attr **attributes);
321+
301322
#else
302323
static inline void cpuset1_update_task_spread_flags(struct cpuset *cs,
303324
struct task_struct *tsk) {}
@@ -311,6 +332,8 @@ static inline void cpuset1_init(struct cpuset *cs) {}
311332
static inline void cpuset1_online_css(struct cgroup_subsys_state *css) {}
312333
static inline void update_domain_attr_tree(struct sched_domain_attr *dattr,
313334
struct cpuset *root_cs) {}
335+
static inline int cpuset1_generate_sched_domains(cpumask_var_t **domains,
336+
struct sched_domain_attr **attributes) { return 0; };
314337

315338
#endif /* CONFIG_CPUSETS_V1 */
316339

kernel/cgroup/cpuset-v1.c

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,164 @@ void update_domain_attr_tree(struct sched_domain_attr *dattr,
580580
rcu_read_unlock();
581581
}
582582

583+
/*
584+
* cpuset1_generate_sched_domains()
585+
*
586+
* Finding the best partition (set of domains):
587+
* The double nested loops below over i, j scan over the load
588+
* balanced cpusets (using the array of cpuset pointers in csa[])
589+
* looking for pairs of cpusets that have overlapping cpus_allowed
590+
* and merging them using a union-find algorithm.
591+
*
592+
* The union of the cpus_allowed masks from the set of all cpusets
593+
* having the same root then form the one element of the partition
594+
* (one sched domain) to be passed to partition_sched_domains().
595+
*/
596+
int cpuset1_generate_sched_domains(cpumask_var_t **domains,
597+
struct sched_domain_attr **attributes)
598+
{
599+
struct cpuset *cp; /* top-down scan of cpusets */
600+
struct cpuset **csa; /* array of all cpuset ptrs */
601+
int csn; /* how many cpuset ptrs in csa so far */
602+
int i, j; /* indices for partition finding loops */
603+
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
604+
struct sched_domain_attr *dattr; /* attributes for custom domains */
605+
int ndoms = 0; /* number of sched domains in result */
606+
int nslot; /* next empty doms[] struct cpumask slot */
607+
struct cgroup_subsys_state *pos_css;
608+
bool root_load_balance = is_sched_load_balance(&top_cpuset);
609+
int nslot_update;
610+
611+
lockdep_assert_cpuset_lock_held();
612+
613+
doms = NULL;
614+
dattr = NULL;
615+
csa = NULL;
616+
617+
/* Special case for the 99% of systems with one, full, sched domain */
618+
if (root_load_balance) {
619+
ndoms = 1;
620+
doms = alloc_sched_domains(ndoms);
621+
if (!doms)
622+
goto done;
623+
624+
dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
625+
if (dattr) {
626+
*dattr = SD_ATTR_INIT;
627+
update_domain_attr_tree(dattr, &top_cpuset);
628+
}
629+
cpumask_and(doms[0], top_cpuset.effective_cpus,
630+
housekeeping_cpumask(HK_TYPE_DOMAIN));
631+
632+
goto done;
633+
}
634+
635+
csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL);
636+
if (!csa)
637+
goto done;
638+
csn = 0;
639+
640+
rcu_read_lock();
641+
if (root_load_balance)
642+
csa[csn++] = &top_cpuset;
643+
cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
644+
if (cp == &top_cpuset)
645+
continue;
646+
647+
/*
648+
* Continue traversing beyond @cp iff @cp has some CPUs and
649+
* isn't load balancing. The former is obvious. The
650+
* latter: All child cpusets contain a subset of the
651+
* parent's cpus, so just skip them, and then we call
652+
* update_domain_attr_tree() to calc relax_domain_level of
653+
* the corresponding sched domain.
654+
*/
655+
if (!cpumask_empty(cp->cpus_allowed) &&
656+
!(is_sched_load_balance(cp) &&
657+
cpumask_intersects(cp->cpus_allowed,
658+
housekeeping_cpumask(HK_TYPE_DOMAIN))))
659+
continue;
660+
661+
if (is_sched_load_balance(cp) &&
662+
!cpumask_empty(cp->effective_cpus))
663+
csa[csn++] = cp;
664+
665+
/* skip @cp's subtree */
666+
pos_css = css_rightmost_descendant(pos_css);
667+
continue;
668+
}
669+
rcu_read_unlock();
670+
671+
for (i = 0; i < csn; i++)
672+
uf_node_init(&csa[i]->node);
673+
674+
/* Merge overlapping cpusets */
675+
for (i = 0; i < csn; i++) {
676+
for (j = i + 1; j < csn; j++) {
677+
if (cpusets_overlap(csa[i], csa[j]))
678+
uf_union(&csa[i]->node, &csa[j]->node);
679+
}
680+
}
681+
682+
/* Count the total number of domains */
683+
for (i = 0; i < csn; i++) {
684+
if (uf_find(&csa[i]->node) == &csa[i]->node)
685+
ndoms++;
686+
}
687+
688+
/*
689+
* Now we know how many domains to create.
690+
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
691+
*/
692+
doms = alloc_sched_domains(ndoms);
693+
if (!doms)
694+
goto done;
695+
696+
/*
697+
* The rest of the code, including the scheduler, can deal with
698+
* dattr==NULL case. No need to abort if alloc fails.
699+
*/
700+
dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr),
701+
GFP_KERNEL);
702+
703+
for (nslot = 0, i = 0; i < csn; i++) {
704+
nslot_update = 0;
705+
for (j = i; j < csn; j++) {
706+
if (uf_find(&csa[j]->node) == &csa[i]->node) {
707+
struct cpumask *dp = doms[nslot];
708+
709+
if (i == j) {
710+
nslot_update = 1;
711+
cpumask_clear(dp);
712+
if (dattr)
713+
*(dattr + nslot) = SD_ATTR_INIT;
714+
}
715+
cpumask_or(dp, dp, csa[j]->effective_cpus);
716+
cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN));
717+
if (dattr)
718+
update_domain_attr_tree(dattr + nslot, csa[j]);
719+
}
720+
}
721+
if (nslot_update)
722+
nslot++;
723+
}
724+
BUG_ON(nslot != ndoms);
725+
726+
done:
727+
kfree(csa);
728+
729+
/*
730+
* Fallback to the default domain if kmalloc() failed.
731+
* See comments in partition_sched_domains().
732+
*/
733+
if (doms == NULL)
734+
ndoms = 1;
735+
736+
*domains = doms;
737+
*attributes = dattr;
738+
return ndoms;
739+
}
740+
583741
/*
584742
* for the common functions, 'private' gives the type of file
585743
*/

kernel/cgroup/cpuset.c

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ static inline void notify_partition_change(struct cpuset *cs, int old_prs)
211211
* If cpu_online_mask is used while a hotunplug operation is happening in
212212
* parallel, we may leave an offline CPU in cpu_allowed or some other masks.
213213
*/
214-
static struct cpuset top_cpuset = {
214+
struct cpuset top_cpuset = {
215215
.flags = BIT(CS_CPU_EXCLUSIVE) |
216216
BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE),
217217
.partition_root_state = PRS_ROOT,
@@ -744,21 +744,6 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
744744
}
745745

746746
#ifdef CONFIG_SMP
747-
/*
748-
* Helper routine for generate_sched_domains().
749-
* Do cpusets a, b have overlapping effective cpus_allowed masks?
750-
*/
751-
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
752-
{
753-
return cpumask_intersects(a->effective_cpus, b->effective_cpus);
754-
}
755-
756-
/* Must be called with cpuset_mutex held. */
757-
static inline int nr_cpusets(void)
758-
{
759-
/* jump label reference count + the top-level cpuset */
760-
return static_key_count(&cpusets_enabled_key.key) + 1;
761-
}
762747

763748
/*
764749
* generate_sched_domains()
@@ -798,17 +783,6 @@ static inline int nr_cpusets(void)
798783
* convenient format, that can be easily compared to the prior
799784
* value to determine what partition elements (sched domains)
800785
* were changed (added or removed.)
801-
*
802-
* Finding the best partition (set of domains):
803-
* The double nested loops below over i, j scan over the load
804-
* balanced cpusets (using the array of cpuset pointers in csa[])
805-
* looking for pairs of cpusets that have overlapping cpus_allowed
806-
* and merging them using a union-find algorithm.
807-
*
808-
* The union of the cpus_allowed masks from the set of all cpusets
809-
* having the same root then form the one element of the partition
810-
* (one sched domain) to be passed to partition_sched_domains().
811-
*
812786
*/
813787
static int generate_sched_domains(cpumask_var_t **domains,
814788
struct sched_domain_attr **attributes)
@@ -826,6 +800,9 @@ static int generate_sched_domains(cpumask_var_t **domains,
826800
bool cgrpv2 = cpuset_v2();
827801
int nslot_update;
828802

803+
if (!cgrpv2)
804+
return cpuset1_generate_sched_domains(domains, attributes);
805+
829806
doms = NULL;
830807
dattr = NULL;
831808
csa = NULL;

0 commit comments

Comments
 (0)