]> Gentwo Git Trees - linux/.git/commitdiff
cpuset: Treat cpusets in attaching as populated
authorChen Ridong <chenridong@huawei.com>
Fri, 14 Nov 2025 02:08:47 +0000 (02:08 +0000)
committerTejun Heo <tj@kernel.org>
Fri, 21 Nov 2025 02:25:26 +0000 (16:25 -1000)
Currently, the check for whether a partition is populated does not
account for tasks in the cpuset of attaching. This is a corner case
that can leave a task stuck in a partition with no effective CPUs.

The race condition occurs as follows:

cpu0 cpu1
//cpuset A  with cpu N
migrate task p to A
cpuset_can_attach
// with effective cpus
// check ok

// cpuset_mutex is not held // clear cpuset.cpus.exclusive
// making effective cpus empty
update_exclusive_cpumask
// tasks_nocpu_error check ok
// empty effective cpus, partition valid
cpuset_attach
...
// task p stays in A, with non-effective cpus.

To fix this issue, this patch introduces cs_is_populated, which considers
tasks in the attaching cpuset. This new helper is used in validate_change
and partition_is_populated.

Fixes: e2d59900d936 ("cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective")
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cpuset.c

index ec8bebc66469f8c26a49b73974c44cbbe4dc351b..1e3aadc09d3a291f473451b172be87ab95538bf8 100644 (file)
@@ -356,6 +356,15 @@ static inline bool is_in_v2_mode(void)
              (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
 }
 
+static inline bool cpuset_is_populated(struct cpuset *cs)
+{
+       lockdep_assert_held(&cpuset_mutex);
+
+       /* Cpusets in the process of attaching should be considered as populated */
+       return cgroup_is_populated(cs->css.cgroup) ||
+               cs->attach_in_progress;
+}
+
 /**
  * partition_is_populated - check if partition has tasks
  * @cs: partition root to be checked
@@ -373,19 +382,29 @@ static inline bool is_in_v2_mode(void)
 static inline bool partition_is_populated(struct cpuset *cs,
                                          struct cpuset *excluded_child)
 {
-       struct cgroup_subsys_state *css;
-       struct cpuset *child;
+       struct cpuset *cp;
+       struct cgroup_subsys_state *pos_css;
 
-       if (cs->css.cgroup->nr_populated_csets)
+       /*
+        * We cannot call cs_is_populated(cs) directly, as
+        * nr_populated_domain_children may include populated
+        * csets from descendants that are partitions.
+        */
+       if (cs->css.cgroup->nr_populated_csets ||
+           cs->attach_in_progress)
                return true;
 
        rcu_read_lock();
-       cpuset_for_each_child(child, css, cs) {
-               if (child == excluded_child)
+       cpuset_for_each_descendant_pre(cp, pos_css, cs) {
+               if (cp == cs || cp == excluded_child)
                        continue;
-               if (is_partition_valid(child))
+
+               if (is_partition_valid(cp)) {
+                       pos_css = css_rightmost_descendant(pos_css);
                        continue;
-               if (cgroup_is_populated(child->css.cgroup)) {
+               }
+
+               if (cpuset_is_populated(cp)) {
                        rcu_read_unlock();
                        return true;
                }
@@ -670,7 +689,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
         * be changed to have empty cpus_allowed or mems_allowed.
         */
        ret = -ENOSPC;
-       if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
+       if (cpuset_is_populated(cur)) {
                if (!cpumask_empty(cur->cpus_allowed) &&
                    cpumask_empty(trial->cpus_allowed))
                        goto out;