Merge branch 'for-3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

* 'for-3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits) cgroup: fix to allow mounting a hierarchy by name cgroup: move assignement out of condition in cgroup_attach_proc() cgroup: Remove task_lock() from cgroup_post_fork() cgroup: add sparse annotation to cgroup_iter_start() and cgroup_iter_end() cgroup: mark cgroup_rmdir_waitq and cgroup_attach_proc() as static cgroup: only need to check oldcgrp==newgrp once cgroup: remove redundant get/put of task struct cgroup: remove redundant get/put of old css_set from migrate cgroup: Remove unnecessary task_lock before fetching css_set on migration cgroup: Drop task_lock(parent) on cgroup_fork() cgroups: remove redundant get/put of css_set from css_set_check_fetched() resource cgroups: remove bogus cast cgroup: kill subsys->can_attach_task(), pre_attach() and attach_task() cgroup, cpuset: don't use ss->pre_attach() cgroup: don't use subsys->can_attach_task() or ->attach_task() cgroup: introduce cgroup_taskset and use it in subsys->can_attach(), cancel_attach() and attach() cgroup: improve old cgroup handling in cgroup_attach_proc() cgroup: always lock threadgroup during migration threadgroup: extend threadgroup_lock() to cover exit and exec threadgroup: rename signal->threadgroup_fork_lock to ->group_rwsem ... Fix up conflict in kernel/cgroup.c due to commit e0197aae59e5: "cgroups: fix a css_set not found bug in cgroup_attach_proc" that already mentioned that the bug is fixed (differently) in Tejun's cgroup patchset. This one, in other words.
2012-01-09 12:59:24 -08:00 · 2012-01-09 12:59:24 -08:00 · db0c2bf69a
parent ac69e09280 0d19ea8665
commit db0c2bf69a
15 changed files with 471 additions and 350 deletions
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@ -594,53 +594,44 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task)
+	       struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
-Called prior to moving a task into a cgroup; if the subsystem
+Called prior to moving one or more tasks into a cgroup; if the
-returns an error, this will abort the attach operation.  If a NULL
+subsystem returns an error, this will abort the attach operation.
-task is passed, then a successful result indicates that *any*
+@tset contains the tasks to be attached and is guaranteed to have at
-unspecified task can be moved into the cgroup. Note that this isn't
+least one task in it.
-called on a fork. If this method returns 0 (success) then this should
+
-remain valid while the caller holds cgroup_mutex and it is ensured that either
+If there are multiple tasks in the taskset, then:
  - it's guaranteed that all are from the same thread group
  - @tset contains all tasks from the thread group whether or not
    they're switching cgroups
  - the first task is the leader
 Each @tset entry also contains the task's old cgroup and tasks which
 aren't switching cgroup can be skipped easily using the
 cgroup_taskset_for_each() iterator. Note that this isn't called on a
 fork. If this method returns 0 (success) then this should remain valid
 while the caller holds cgroup_mutex and it is ensured that either
 attach() or cancel_attach() will be called in future.
 int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
 (cgroup_mutex held by caller)
 As can_attach, but for operations that must be run once per task to be
 attached (possibly many when using cgroup_attach_proc). Called after
 can_attach.
 void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task, bool threadgroup)
+		   struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 Called when a task attach operation has failed after can_attach() has succeeded.
 A subsystem whose can_attach() has some side-effects should provide this
 function, so that the subsystem can implement a rollback. If not, not necessary.
 This will be called only about subsystems whose can_attach() operation have
-succeeded.
+succeeded. The parameters are identical to can_attach().
 void pre_attach(struct cgroup *cgrp);
 (cgroup_mutex held by caller)
 For any non-per-thread attachment work that needs to happen before
 attach_task. Needed by cpuset.
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	    struct cgroup *old_cgrp, struct task_struct *task)
+	    struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
-
+The parameters are identical to can_attach().
 void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
 (cgroup_mutex held by caller)
 As attach, but for operations that must be run once per task to be attached,
 like can_attach_task. Called before attach. Currently does not support any
 subsystem that might need the old_cgrp for every thread in the group.
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@ -30,8 +30,10 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
 						  struct cgroup *);
-static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *);
+static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
-static void blkiocg_attach_task(struct cgroup *, struct task_struct *);
+			      struct cgroup_taskset *);
 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
 			   struct cgroup_taskset *);
 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
@ -44,8 +46,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
 struct cgroup_subsys blkio_subsys = {
 	.name = "blkio",
 	.create = blkiocg_create,
-	.can_attach_task = blkiocg_can_attach_task,
+	.can_attach = blkiocg_can_attach,
-	.attach_task = blkiocg_attach_task,
+	.attach = blkiocg_attach,
 	.destroy = blkiocg_destroy,
 	.populate = blkiocg_populate,
 #ifdef CONFIG_BLK_CGROUP
@ -1626,30 +1628,39 @@ done:
 * of the main cic data structures.  For now we allow a task to change
 * its cgroup only if it's the only owner of its ioc.
 */
-static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			      struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct io_context *ioc;
 	int ret = 0;
 	/* task_lock() is needed to avoid races with exit_io_context() */
-	task_lock(tsk);
+	cgroup_taskset_for_each(task, cgrp, tset) {
-	ioc = tsk->io_context;
+		task_lock(task);
-	if (ioc && atomic_read(&ioc->nr_tasks) > 1)
+		ioc = task->io_context;
-		ret = -EINVAL;
+		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
-	task_unlock(tsk);
+			ret = -EINVAL;
-
+		task_unlock(task);
 		if (ret)
 			break;
 	}
 	return ret;
 }
-static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			   struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct io_context *ioc;
-	task_lock(tsk);
+	cgroup_taskset_for_each(task, cgrp, tset) {
-	ioc = tsk->io_context;
+		task_lock(task);
-	if (ioc)
+		ioc = task->io_context;
-		ioc->cgroup_changed = 1;
+		if (ioc)
-	task_unlock(tsk);
+			ioc->cgroup_changed = 1;
 		task_unlock(task);
 	}
 }
 void blkio_policy_register(struct blkio_policy_type *blkiop)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@ -456,6 +456,28 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
 void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
 void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
 /*
 * Control Group taskset, used to pass around set of tasks to cgroup_subsys
 * methods.
 */
 struct cgroup_taskset;
 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
 struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset);
 int cgroup_taskset_size(struct cgroup_taskset *tset);
 /**
 * cgroup_taskset_for_each - iterate cgroup_taskset
 * @task: the loop cursor
 * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all
 * @tset: taskset to iterate
 */
 #define cgroup_taskset_for_each(task, skip_cgrp, tset)			\
 	for ((task) = cgroup_taskset_first((tset)); (task);		\
 	     (task) = cgroup_taskset_next((tset)))			\
 		if (!(skip_cgrp) ||					\
 		    cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp))
 /*
 * Control Group subsystem type.
 * See Documentation/cgroups/cgroups.txt for details
@ -467,14 +489,11 @@ struct cgroup_subsys {
 	int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			  struct task_struct *tsk);
+			  struct cgroup_taskset *tset);
 	int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			      struct task_struct *tsk);
+			      struct cgroup_taskset *tset);
 	void (*pre_attach)(struct cgroup *cgrp);
 	void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		       struct cgroup *old_cgrp, struct task_struct *tsk);
+		       struct cgroup_taskset *tset);
 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 	void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			struct cgroup *old_cgrp, struct task_struct *task);
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@ -23,11 +23,10 @@ extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 #ifdef CONFIG_CGROUPS
-#define INIT_THREADGROUP_FORK_LOCK(sig)					\
+#define INIT_GROUP_RWSEM(sig)						\
-	.threadgroup_fork_lock =					\
+	.group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
 		__RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
 #else
-#define INIT_THREADGROUP_FORK_LOCK(sig)
+#define INIT_GROUP_RWSEM(sig)
 #endif
 #define INIT_SIGNALS(sig) {						\
@ -46,7 +45,7 @@ extern struct fs_struct init_fs;
 	},								\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
-	INIT_THREADGROUP_FORK_LOCK(sig)					\
+	INIT_GROUP_RWSEM(sig)						\
 }
 extern struct nsproxy init_nsproxy;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@ -637,13 +637,15 @@ struct signal_struct {
 #endif
 #ifdef CONFIG_CGROUPS
 	/*
-	 * The threadgroup_fork_lock prevents threads from forking with
+	 * group_rwsem prevents new tasks from entering the threadgroup and
-	 * CLONE_THREAD while held for writing. Use this for fork-sensitive
+	 * member tasks from exiting,a more specifically, setting of
-	 * threadgroup-wide operations. It's taken for reading in fork.c in
+	 * PF_EXITING.  fork and exit paths are protected with this rwsem
-	 * copy_process().
+	 * using threadgroup_change_begin/end().  Users which require
-	 * Currently only needed write-side by cgroups.
+	 * threadgroup to remain stable should use threadgroup_[un]lock()
 	 * which also takes care of exec path.  Currently, cgroup is the
 	 * only user.
 	 */
-	struct rw_semaphore threadgroup_fork_lock;
+	struct rw_semaphore group_rwsem;
 #endif
 	int oom_adj;		/* OOM kill score adjustment (bit shift) */
@ -2394,29 +2396,62 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 /* See the declaration of threadgroup_fork_lock in signal_struct. */
 #ifdef CONFIG_CGROUPS
-static inline void threadgroup_fork_read_lock(struct task_struct *tsk)
+static inline void threadgroup_change_begin(struct task_struct *tsk)
 {
-	down_read(&tsk->signal->threadgroup_fork_lock);
+	down_read(&tsk->signal->group_rwsem);
 }
-static inline void threadgroup_fork_read_unlock(struct task_struct *tsk)
+static inline void threadgroup_change_end(struct task_struct *tsk)
 {
-	up_read(&tsk->signal->threadgroup_fork_lock);
+	up_read(&tsk->signal->group_rwsem);
 }
-static inline void threadgroup_fork_write_lock(struct task_struct *tsk)
+
 /**
 * threadgroup_lock - lock threadgroup
 * @tsk: member task of the threadgroup to lock
 *
 * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
 * perform exec.  This is useful for cases where the threadgroup needs to
 * stay stable across blockable operations.
 *
 * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
 * synchronization.  While held, no new task will be added to threadgroup
 * and no existing live task will have its PF_EXITING set.
 *
 * During exec, a task goes and puts its thread group through unusual
 * changes.  After de-threading, exclusive access is assumed to resources
 * which are usually shared by tasks in the same group - e.g. sighand may
 * be replaced with a new one.  Also, the exec'ing task takes over group
 * leader role including its pid.  Exclude these changes while locked by
 * grabbing cred_guard_mutex which is used to synchronize exec path.
 */
 static inline void threadgroup_lock(struct task_struct *tsk)
 {
-	down_write(&tsk->signal->threadgroup_fork_lock);
+	/*
 	 * exec uses exit for de-threading nesting group_rwsem inside
 	 * cred_guard_mutex. Grab cred_guard_mutex first.
 	 */
 	mutex_lock(&tsk->signal->cred_guard_mutex);
 	down_write(&tsk->signal->group_rwsem);
 }
-static inline void threadgroup_fork_write_unlock(struct task_struct *tsk)
+
 /**
 * threadgroup_unlock - unlock threadgroup
 * @tsk: member task of the threadgroup to unlock
 *
 * Reverse threadgroup_lock().
 */
 static inline void threadgroup_unlock(struct task_struct *tsk)
 {
-	up_write(&tsk->signal->threadgroup_fork_lock);
+	up_write(&tsk->signal->group_rwsem);
 	mutex_unlock(&tsk->signal->cred_guard_mutex);
 }
 #else
-static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {}
+static inline void threadgroup_change_begin(struct task_struct *tsk) {}
-static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {}
+static inline void threadgroup_change_end(struct task_struct *tsk) {}
-static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {}
+static inline void threadgroup_lock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {}
+static inline void threadgroup_unlock(struct task_struct *tsk) {}
 #endif
 #ifndef __HAVE_THREAD_FUNCTIONS
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@ -63,7 +63,24 @@
 #include <linux/atomic.h>
 /*
 * cgroup_mutex is the master lock.  Any modification to cgroup or its
 * hierarchy must be performed while holding it.
 *
 * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
 * cgroupfs_root of any cgroup hierarchy - subsys list, flags,
 * release_agent_path and so on.  Modifying requires both cgroup_mutex and
 * cgroup_root_mutex.  Readers can acquire either of the two.  This is to
 * break the following locking order cycle.
 *
 *  A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
 *  B. namespace_sem -> cgroup_mutex
 *
 * B happens only through cgroup_show_options() and using cgroup_root_mutex
 * breaks it.
 */
 static DEFINE_MUTEX(cgroup_mutex);
 static DEFINE_MUTEX(cgroup_root_mutex);
 /*
 * Generate an array of cgroup subsystem pointers. At boot time, this is
@ -921,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 *
 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
 */
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
 static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
 {
@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	int i;
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
 	BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
 	removed_bits = root->actual_subsys_bits & ~final_bits;
 	added_bits = final_bits & ~root->actual_subsys_bits;
@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 	struct cgroup_subsys *ss;
-	mutex_lock(&cgroup_mutex);
+	mutex_lock(&cgroup_root_mutex);
 	for_each_subsys(root, ss)
 		seq_printf(seq, ",%s", ss->name);
 	if (test_bit(ROOT_NOPREFIX, &root->flags))
@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 		seq_puts(seq, ",clone_children");
 	if (strlen(root->name))
 		seq_printf(seq, ",name=%s", root->name);
-	mutex_unlock(&cgroup_mutex);
+	mutex_unlock(&cgroup_root_mutex);
 	return 0;
 }
@ -1175,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 	/*
 	 * If the 'all' option was specified select all the subsystems,
-	 * otherwise 'all, 'none' and a subsystem name options were not
+	 * otherwise if 'none', 'name=' and a subsystem name options
-	 * specified, let's default to 'all'
+	 * were not specified, let's default to 'all'
 	 */
-	if (all_ss || (!all_ss && !one_ss && !opts->none)) {
+	if (all_ss || (!one_ss && !opts->none && !opts->name)) {
 		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 			if (ss == NULL)
@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
 	mutex_lock(&cgroup_mutex);
 	mutex_lock(&cgroup_root_mutex);
 	/* See what subsystems are wanted */
 	ret = parse_cgroupfs_options(data, &opts);
@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 out_unlock:
 	kfree(opts.release_agent);
 	kfree(opts.name);
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
 	return ret;
@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	int ret = 0;
 	struct super_block *sb;
 	struct cgroupfs_root *new_root;
 	struct inode *inode;
 	/* First find the desired set of subsystems */
 	mutex_lock(&cgroup_mutex);
@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		/* We used the new root structure, so this is a new hierarchy */
 		struct list_head tmp_cg_links;
 		struct cgroup *root_cgrp = &root->top_cgroup;
 		struct inode *inode;
 		struct cgroupfs_root *existing_root;
 		const struct cred *cred;
 		int i;
@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		mutex_lock(&inode->i_mutex);
 		mutex_lock(&cgroup_mutex);
 		mutex_lock(&cgroup_root_mutex);
-		if (strlen(root->name)) {
+		/* Check for name clashes with existing mounts */
-			/* Check for name clashes with existing mounts */
+		ret = -EBUSY;
-			for_each_active_root(existing_root) {
+		if (strlen(root->name))
-				if (!strcmp(existing_root->name, root->name)) {
+			for_each_active_root(existing_root)
-					ret = -EBUSY;
+				if (!strcmp(existing_root->name, root->name))
-					mutex_unlock(&cgroup_mutex);
+					goto unlock_drop;
 					mutex_unlock(&inode->i_mutex);
 					goto drop_new_super;
 				}
 			}
 		}
 		/*
 		 * We're accessing css_set_count without locking
@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		 * have some link structures left over
 		 */
 		ret = allocate_cg_links(css_set_count, &tmp_cg_links);
-		if (ret) {
+		if (ret)
-			mutex_unlock(&cgroup_mutex);
+			goto unlock_drop;
 			mutex_unlock(&inode->i_mutex);
 			goto drop_new_super;
 		}
 		ret = rebind_subsystems(root, root->subsys_bits);
 		if (ret == -EBUSY) {
 			mutex_unlock(&cgroup_mutex);
 			mutex_unlock(&inode->i_mutex);
 			free_cg_links(&tmp_cg_links);
-			goto drop_new_super;
+			goto unlock_drop;
 		}
 		/*
 		 * There must be no failure case after here, since rebinding
@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		cred = override_creds(&init_cred);
 		cgroup_populate_dir(root_cgrp);
 		revert_creds(cred);
 		mutex_unlock(&cgroup_root_mutex);
 		mutex_unlock(&cgroup_mutex);
 		mutex_unlock(&inode->i_mutex);
 	} else {
@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	kfree(opts.name);
 	return dget(sb->s_root);
 unlock_drop:
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&inode->i_mutex);
 drop_new_super:
 	deactivate_locked_super(sb);
 drop_modules:
@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
 	BUG_ON(!list_empty(&cgrp->sibling));
 	mutex_lock(&cgroup_mutex);
 	mutex_lock(&cgroup_root_mutex);
 	/* Rebind all subsystems back to the default hierarchy */
 	ret = rebind_subsystems(root, 0);
@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
 		root_count--;
 	}
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	kill_litter_super(sb);
@ -1739,12 +1757,91 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 }
 EXPORT_SYMBOL_GPL(cgroup_path);
 /*
 * Control Group taskset
 */
 struct task_and_cgroup {
 	struct task_struct	*task;
 	struct cgroup		*cgrp;
 };
 struct cgroup_taskset {
 	struct task_and_cgroup	single;
 	struct flex_array	*tc_array;
 	int			tc_array_len;
 	int			idx;
 	struct cgroup		*cur_cgrp;
 };
 /**
 * cgroup_taskset_first - reset taskset and return the first task
 * @tset: taskset of interest
 *
 * @tset iteration is initialized and the first task is returned.
 */
 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
 {
 	if (tset->tc_array) {
 		tset->idx = 0;
 		return cgroup_taskset_next(tset);
 	} else {
 		tset->cur_cgrp = tset->single.cgrp;
 		return tset->single.task;
 	}
 }
 EXPORT_SYMBOL_GPL(cgroup_taskset_first);
 /**
 * cgroup_taskset_next - iterate to the next task in taskset
 * @tset: taskset of interest
 *
 * Return the next task in @tset.  Iteration must have been initialized
 * with cgroup_taskset_first().
 */
 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
 {
 	struct task_and_cgroup *tc;
 	if (!tset->tc_array || tset->idx >= tset->tc_array_len)
 		return NULL;
 	tc = flex_array_get(tset->tc_array, tset->idx++);
 	tset->cur_cgrp = tc->cgrp;
 	return tc->task;
 }
 EXPORT_SYMBOL_GPL(cgroup_taskset_next);
 /**
 * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task
 * @tset: taskset of interest
 *
 * Return the cgroup for the current (last returned) task of @tset.  This
 * function must be preceded by either cgroup_taskset_first() or
 * cgroup_taskset_next().
 */
 struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
 {
 	return tset->cur_cgrp;
 }
 EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
 /**
 * cgroup_taskset_size - return the number of tasks in taskset
 * @tset: taskset of interest
 */
 int cgroup_taskset_size(struct cgroup_taskset *tset)
 {
 	return tset->tc_array ? tset->tc_array_len : 1;
 }
 EXPORT_SYMBOL_GPL(cgroup_taskset_size);
 /*
 * cgroup_task_migrate - move a task from one cgroup to another.
 *
 * 'guarantee' is set if the caller promises that a new css_set for the task
 * will already exist. If not set, this function might sleep, and can fail with
- * -ENOMEM. Otherwise, it can only fail with -ESRCH.
+ * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
 */
 static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 			       struct task_struct *tsk, bool guarantee)
@ -1753,14 +1850,12 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 	struct css_set *newcg;
 	/*
-	 * get old css_set. we need to take task_lock and refcount it, because
+	 * We are synchronized through threadgroup_lock() against PF_EXITING
-	 * an exiting task can change its css_set to init_css_set and drop its
+	 * setting such that we can't race against cgroup_exit() changing the
-	 * old one without taking cgroup_mutex.
+	 * css_set to init_css_set and dropping the old one.
 	 */
-	task_lock(tsk);
+	WARN_ON_ONCE(tsk->flags & PF_EXITING);
 	oldcg = tsk->cgroups;
 	get_css_set(oldcg);
 	task_unlock(tsk);
 	/* locate or allocate a new css_set for this task. */
 	if (guarantee) {
@ -1775,20 +1870,11 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 		might_sleep();
 		/* find_css_set will give us newcg already referenced. */
 		newcg = find_css_set(oldcg, cgrp);
-		if (!newcg) {
+		if (!newcg)
 			put_css_set(oldcg);
 			return -ENOMEM;
 		}
 	}
 	put_css_set(oldcg);
 	/* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
 	task_lock(tsk);
 	if (tsk->flags & PF_EXITING) {
 		task_unlock(tsk);
 		put_css_set(newcg);
 		return -ESRCH;
 	}
 	rcu_assign_pointer(tsk->cgroups, newcg);
 	task_unlock(tsk);
@ -1814,8 +1900,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 * @cgrp: the cgroup the task is attaching to
 * @tsk: the task to be attached
 *
- * Call holding cgroup_mutex. May take task_lock of
+ * Call with cgroup_mutex and threadgroup locked. May take task_lock of
- * the task 'tsk' during call.
+ * @tsk during call.
 */
 int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
@ -1823,15 +1909,23 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	struct cgroup_subsys *ss, *failed_ss = NULL;
 	struct cgroup *oldcgrp;
 	struct cgroupfs_root *root = cgrp->root;
 	struct cgroup_taskset tset = { };
 	/* @tsk either already exited or can't exit until the end */
 	if (tsk->flags & PF_EXITING)
 		return -ESRCH;
 	/* Nothing to do if the task is already in that cgroup */
 	oldcgrp = task_cgroup_from_root(tsk, root);
 	if (cgrp == oldcgrp)
 		return 0;
 	tset.single.task = tsk;
 	tset.single.cgrp = oldcgrp;
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, tsk);
+			retval = ss->can_attach(ss, cgrp, &tset);
 			if (retval) {
 				/*
 				 * Remember on which subsystem the can_attach()
@ -1843,13 +1937,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 				goto out;
 			}
 		}
 		if (ss->can_attach_task) {
 			retval = ss->can_attach_task(cgrp, tsk);
 			if (retval) {
 				failed_ss = ss;
 				goto out;
 			}
 		}
 	}
 	retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
@ -1857,12 +1944,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		goto out;
 	for_each_subsys(root, ss) {
 		if (ss->pre_attach)
 			ss->pre_attach(cgrp);
 		if (ss->attach_task)
 			ss->attach_task(cgrp, tsk);
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk);
+			ss->attach(ss, cgrp, &tset);
 	}
 	synchronize_rcu();
@ -1884,7 +1967,7 @@ out:
 				 */
 				break;
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, tsk);
+				ss->cancel_attach(ss, cgrp, &tset);
 		}
 	}
 	return retval;
@ -1935,23 +2018,17 @@ static bool css_set_check_fetched(struct cgroup *cgrp,
 	read_lock(&css_set_lock);
 	newcg = find_existing_css_set(cg, cgrp, template);
 	if (newcg)
 		get_css_set(newcg);
 	read_unlock(&css_set_lock);
 	/* doesn't exist at all? */
 	if (!newcg)
 		return false;
 	/* see if it's already in the list */
-	list_for_each_entry(cg_entry, newcg_list, links) {
+	list_for_each_entry(cg_entry, newcg_list, links)
-		if (cg_entry->cg == newcg) {
+		if (cg_entry->cg == newcg)
 			put_css_set(newcg);
 			return true;
 		}
 	}
 	/* not found */
 	put_css_set(newcg);
 	return false;
 }
@ -1985,21 +2062,21 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
 * @cgrp: the cgroup to attach to
 * @leader: the threadgroup leader task_struct of the group to be attached
 *
- * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will
+ * Call holding cgroup_mutex and the group_rwsem of the leader. Will take
- * take task_lock of each thread in leader's threadgroup individually in turn.
+ * task_lock of each thread in leader's threadgroup individually in turn.
 */
-int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
+static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 {
 	int retval, i, group_size;
 	struct cgroup_subsys *ss, *failed_ss = NULL;
 	bool cancel_failed_ss = false;
 	/* guaranteed to be initialized later, but the compiler needs this */
 	struct cgroup *oldcgrp = NULL;
 	struct css_set *oldcg;
 	struct cgroupfs_root *root = cgrp->root;
 	/* threadgroup list cursor and array */
 	struct task_struct *tsk;
 	struct task_and_cgroup *tc;
 	struct flex_array *group;
 	struct cgroup_taskset tset = { };
 	/*
 	 * we need to make sure we have css_sets for all the tasks we're
 	 * going to move -before- we actually start moving them, so that in
@ -2012,13 +2089,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	 * step 0: in order to do expensive, possibly blocking operations for
 	 * every thread, we cannot iterate the thread group list, since it needs
 	 * rcu or tasklist locked. instead, build an array of all threads in the
-	 * group - threadgroup_fork_lock prevents new threads from appearing,
+	 * group - group_rwsem prevents new threads from appearing, and if
-	 * and if threads exit, this will just be an over-estimate.
+	 * threads exit, this will just be an over-estimate.
 	 */
 	group_size = get_nr_threads(leader);
 	/* flex_array supports very large thread-groups better than kmalloc. */
-	group = flex_array_alloc(sizeof(struct task_struct *), group_size,
+	group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
 				 GFP_KERNEL);
 	if (!group)
 		return -ENOMEM;
 	/* pre-allocate to guarantee space while iterating in rcu read-side. */
@ -2040,49 +2116,53 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 		retval = -EAGAIN;
 		goto out_free_group_list;
 	}
-	/* take a reference on each task in the group to go in the array. */
+
 	tsk = leader;
 	i = 0;
 	do {
 		struct task_and_cgroup ent;
 		/* @tsk either already exited or can't exit until the end */
 		if (tsk->flags & PF_EXITING)
 			continue;
 		/* as per above, nr_threads may decrease, but not increase. */
 		BUG_ON(i >= group_size);
 		get_task_struct(tsk);
 		/*
 		 * saying GFP_ATOMIC has no effect here because we did prealloc
 		 * earlier, but it's good form to communicate our expectations.
 		 */
-		retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
+		ent.task = tsk;
 		ent.cgrp = task_cgroup_from_root(tsk, root);
 		/* nothing to do if this task is already in the cgroup */
 		if (ent.cgrp == cgrp)
 			continue;
 		retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
 		BUG_ON(retval != 0);
 		i++;
 	} while_each_thread(leader, tsk);
 	/* remember the number of threads in the array for later. */
 	group_size = i;
 	tset.tc_array = group;
 	tset.tc_array_len = group_size;
 	read_unlock(&tasklist_lock);
 	/* methods shouldn't be called if no task is actually migrating */
 	retval = 0;
 	if (!group_size)
 		goto out_free_group_list;
 	/*
 	 * step 1: check that we can legitimately attach to the cgroup.
 	 */
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, leader);
+			retval = ss->can_attach(ss, cgrp, &tset);
 			if (retval) {
 				failed_ss = ss;
 				goto out_cancel_attach;
 			}
 		}
 		/* a callback to be run on every thread in the threadgroup. */
 		if (ss->can_attach_task) {
 			/* run on each task in the threadgroup. */
 			for (i = 0; i < group_size; i++) {
 				tsk = flex_array_get_ptr(group, i);
 				retval = ss->can_attach_task(cgrp, tsk);
 				if (retval) {
 					failed_ss = ss;
 					cancel_failed_ss = true;
 					goto out_cancel_attach;
 				}
 			}
 		}
 	}
 	/*
@ -2091,67 +2171,36 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	 */
 	INIT_LIST_HEAD(&newcg_list);
 	for (i = 0; i < group_size; i++) {
-		tsk = flex_array_get_ptr(group, i);
+		tc = flex_array_get(group, i);
-		/* nothing to do if this task is already in the cgroup */
+		oldcg = tc->task->cgroups;
-		oldcgrp = task_cgroup_from_root(tsk, root);
+
-		if (cgrp == oldcgrp)
+		/* if we don't already have it in the list get a new one */
-			continue;
+		if (!css_set_check_fetched(cgrp, tc->task, oldcg,
-		/* get old css_set pointer */
+					   &newcg_list)) {
 		task_lock(tsk);
 		oldcg = tsk->cgroups;
 		get_css_set(oldcg);
 		task_unlock(tsk);
 		/* see if the new one for us is already in the list? */
 		if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
 			/* was already there, nothing to do. */
 			put_css_set(oldcg);
 		} else {
 			/* we don't already have it. get new one. */
 			retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
 			put_css_set(oldcg);
 			if (retval)
 				goto out_list_teardown;
 		}
 	}
 	/*
-	 * step 3: now that we're guaranteed success wrt the css_sets, proceed
+	 * step 3: now that we're guaranteed success wrt the css_sets,
-	 * to move all tasks to the new cgroup, calling ss->attach_task for each
+	 * proceed to move all tasks to the new cgroup.  There are no
-	 * one along the way. there are no failure cases after here, so this is
+	 * failure cases after here, so this is the commit point.
 	 * the commit point.
 	 */
 	for_each_subsys(root, ss) {
 		if (ss->pre_attach)
 			ss->pre_attach(cgrp);
 	}
 	for (i = 0; i < group_size; i++) {
-		tsk = flex_array_get_ptr(group, i);
+		tc = flex_array_get(group, i);
-		/* leave current thread as it is if it's already there */
+		retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
-		oldcgrp = task_cgroup_from_root(tsk, root);
+		BUG_ON(retval);
 		if (cgrp == oldcgrp)
 			continue;
 		/* if the thread is PF_EXITING, it can just get skipped. */
 		retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
 		if (retval == 0) {
 			/* attach each task to each subsystem */
 			for_each_subsys(root, ss) {
 				if (ss->attach_task)
 					ss->attach_task(cgrp, tsk);
 			}
 		} else {
 			BUG_ON(retval != -ESRCH);
 		}
 	}
 	/* nothing is sensitive to fork() after this point. */
 	/*
-	 * step 4: do expensive, non-thread-specific subsystem callbacks.
+	 * step 4: do subsystem attach callbacks.
 	 * TODO: if ever a subsystem needs to know the oldcgrp for each task
 	 * being moved, this call will need to be reworked to communicate that.
 	 */
 	for_each_subsys(root, ss) {
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, leader);
+			ss->attach(ss, cgrp, &tset);
 	}
 	/*
@ -2171,20 +2220,12 @@ out_cancel_attach:
 	/* same deal as in cgroup_attach_task */
 	if (retval) {
 		for_each_subsys(root, ss) {
-			if (ss == failed_ss) {
+			if (ss == failed_ss)
 				if (cancel_failed_ss && ss->cancel_attach)
 					ss->cancel_attach(ss, cgrp, leader);
 				break;
 			}
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, leader);
+				ss->cancel_attach(ss, cgrp, &tset);
 		}
 	}
 	/* clean up the array of referenced threads in the group. */
 	for (i = 0; i < group_size; i++) {
 		tsk = flex_array_get_ptr(group, i);
 		put_task_struct(tsk);
 	}
 out_free_group_list:
 	flex_array_free(group);
 	return retval;
@ -2192,8 +2233,8 @@ out_free_group_list:
 /*
 * Find the task_struct of the task to attach by vpid and pass it along to the
- * function to attach either it or all tasks in its threadgroup. Will take
+ * function to attach either it or all tasks in its threadgroup. Will lock
- * cgroup_mutex; may take task_lock of task.
+ * cgroup_mutex and threadgroup; may take task_lock of task.
 */
 static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 {
@ -2220,13 +2261,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 			 * detect it later.
 			 */
 			tsk = tsk->group_leader;
 		} else if (tsk->flags & PF_EXITING) {
 			/* optimization for the single-task-only case */
 			rcu_read_unlock();
 			cgroup_unlock();
 			return -ESRCH;
 		}
 		/*
 		 * even if we're attaching all tasks in the thread group, we
 		 * only need to check permissions on one of them.
@ -2249,13 +2284,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 		get_task_struct(tsk);
 	}
-	if (threadgroup) {
+	threadgroup_lock(tsk);
-		threadgroup_fork_write_lock(tsk);
+
 	if (threadgroup)
 		ret = cgroup_attach_proc(cgrp, tsk);
-		threadgroup_fork_write_unlock(tsk);
+	else
 	} else {
 		ret = cgroup_attach_task(cgrp, tsk);
-	}
+
 	threadgroup_unlock(tsk);
 	put_task_struct(tsk);
 	cgroup_unlock();
 	return ret;
@ -2306,7 +2343,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 		return -EINVAL;
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	mutex_lock(&cgroup_root_mutex);
 	strcpy(cgrp->root->release_agent_path, buffer);
 	mutex_unlock(&cgroup_root_mutex);
 	cgroup_unlock();
 	return 0;
 }
@ -2789,6 +2828,7 @@ static void cgroup_enable_task_cg_lists(void)
 }
 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
 	__acquires(css_set_lock)
 {
 	/*
 	 * The first time anyone tries to iterate across a cgroup,
@ -2828,6 +2868,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 }
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
 	__releases(css_set_lock)
 {
 	read_unlock(&css_set_lock);
 }
@ -4491,20 +4532,31 @@ static const struct file_operations proc_cgroupstats_operations = {
 *
 * A pointer to the shared css_set was automatically copied in
 * fork.c by dup_task_struct().  However, we ignore that copy, since
- * it was not made under the protection of RCU or cgroup_mutex, so
+ * it was not made under the protection of RCU, cgroup_mutex or
- * might no longer be a valid cgroup pointer.  cgroup_attach_task() might
+ * threadgroup_change_begin(), so it might no longer be a valid
- * have already changed current->cgroups, allowing the previously
+ * cgroup pointer.  cgroup_attach_task() might have already changed
- * referenced cgroup group to be removed and freed.
+ * current->cgroups, allowing the previously referenced cgroup
 * group to be removed and freed.
 *
 * Outside the pointer validity we also need to process the css_set
 * inheritance between threadgoup_change_begin() and
 * threadgoup_change_end(), this way there is no leak in any process
 * wide migration performed by cgroup_attach_proc() that could otherwise
 * miss a thread because it is too early or too late in the fork stage.
 *
 * At the point that cgroup_fork() is called, 'current' is the parent
 * task, and the passed argument 'child' points to the child task.
 */
 void cgroup_fork(struct task_struct *child)
 {
-	task_lock(current);
+	/*
 	 * We don't need to task_lock() current because current->cgroups
 	 * can't be changed concurrently here. The parent obviously hasn't
 	 * exited and called cgroup_exit(), and we are synchronized against
 	 * cgroup migration through threadgroup_change_begin().
 	 */
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
 	task_unlock(current);
 	INIT_LIST_HEAD(&child->cg_list);
 }
@ -4546,10 +4598,19 @@ void cgroup_post_fork(struct task_struct *child)
 {
 	if (use_task_css_set_links) {
 		write_lock(&css_set_lock);
-		task_lock(child);
+		if (list_empty(&child->cg_list)) {
-		if (list_empty(&child->cg_list))
+			/*
 			 * It's safe to use child->cgroups without task_lock()
 			 * here because we are protected through
 			 * threadgroup_change_begin() against concurrent
 			 * css_set change in cgroup_task_migrate(). Also
 			 * the task can't exit at that point until
 			 * wake_up_new_task() is called, so we are protected
 			 * against cgroup_exit() setting child->cgroup to
 			 * init_css_set.
 			 */
 			list_add(&child->cg_list, &child->cgroups->tasks);
-		task_unlock(child);
+		}
 		write_unlock(&css_set_lock);
 	}
 }
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@ -166,13 +166,17 @@ static bool is_task_frozen_enough(struct task_struct *task)
 */
 static int freezer_can_attach(struct cgroup_subsys *ss,
 			      struct cgroup *new_cgroup,
-			      struct task_struct *task)
+			      struct cgroup_taskset *tset)
 {
 	struct freezer *freezer;
 	struct task_struct *task;
 	/*
 	 * Anything frozen can't move or be moved to/from.
 	 */
 	cgroup_taskset_for_each(task, new_cgroup, tset)
 		if (cgroup_freezing(task))
 			return -EBUSY;
 	freezer = cgroup_freezer(new_cgroup);
 	if (freezer->state != CGROUP_THAWED)
@ -181,11 +185,6 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 	return 0;
 }
 static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	return cgroup_freezing(tsk) ? -EBUSY : 0;
 }
 static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 {
 	struct freezer *freezer;
@ -381,10 +380,5 @@ struct cgroup_subsys freezer_subsys = {
 	.populate	= freezer_populate,
 	.subsys_id	= freezer_subsys_id,
 	.can_attach	= freezer_can_attach,
 	.can_attach_task = freezer_can_attach_task,
 	.pre_attach	= NULL,
 	.attach_task	= NULL,
 	.attach		= NULL,
 	.fork		= freezer_fork,
 	.exit		= NULL,
 };
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@ -1389,79 +1389,73 @@ static int fmeter_getrate(struct fmeter *fmp)
 	return val;
 }
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
 static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 			     struct task_struct *tsk)
 {
 	struct cpuset *cs = cgroup_cs(cont);
 	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
 		return -ENOSPC;
 	/*
 	 * Kthreads bound to specific cpus cannot be moved to a new cpuset; we
 	 * cannot change their cpu affinity and isolating such threads by their
 	 * set of allowed nodes is unnecessary.  Thus, cpusets are not
 	 * applicable for such threads.  This prevents checking for success of
 	 * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may
 	 * be changed.
 	 */
 	if (tsk->flags & PF_THREAD_BOUND)
 		return -EINVAL;
 	return 0;
 }
 static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
 {
 	return security_task_setscheduler(task);
 }
 /*
 * Protected by cgroup_lock. The nodemasks must be stored globally because
- * dynamically allocating them is not allowed in pre_attach, and they must
+ * dynamically allocating them is not allowed in can_attach, and they must
- * persist among pre_attach, attach_task, and attach.
+ * persist until attach.
 */
 static cpumask_var_t cpus_attach;
 static nodemask_t cpuset_attach_nodemask_from;
 static nodemask_t cpuset_attach_nodemask_to;
-/* Set-up work for before attaching each task. */
+/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static void cpuset_pre_attach(struct cgroup *cont)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			     struct cgroup_taskset *tset)
 {
-	struct cpuset *cs = cgroup_cs(cont);
+	struct cpuset *cs = cgroup_cs(cgrp);
 	struct task_struct *task;
 	int ret;
 	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
 		return -ENOSPC;
 	cgroup_taskset_for_each(task, cgrp, tset) {
 		/*
 		 * Kthreads bound to specific cpus cannot be moved to a new
 		 * cpuset; we cannot change their cpu affinity and
 		 * isolating such threads by their set of allowed nodes is
 		 * unnecessary.  Thus, cpusets are not applicable for such
 		 * threads.  This prevents checking for success of
 		 * set_cpus_allowed_ptr() on all attached tasks before
 		 * cpus_allowed may be changed.
 		 */
 		if (task->flags & PF_THREAD_BOUND)
 			return -EINVAL;
 		if ((ret = security_task_setscheduler(task)))
 			return ret;
 	}
 	/* prepare for attach */
 	if (cs == &top_cpuset)
 		cpumask_copy(cpus_attach, cpu_possible_mask);
 	else
 		guarantee_online_cpus(cs, cpus_attach);
 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
 	return 0;
 }
-/* Per-thread attachment work. */
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
+			  struct cgroup_taskset *tset)
 {
 	int err;
 	struct cpuset *cs = cgroup_cs(cont);
 	/*
 	 * can_attach beforehand should guarantee that this doesn't fail.
 	 * TODO: have a better way to handle failure here
 	 */
 	err = set_cpus_allowed_ptr(tsk, cpus_attach);
 	WARN_ON_ONCE(err);
 	cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
 	cpuset_update_task_spread_flag(cs, tsk);
 }
 static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 			  struct cgroup *oldcont, struct task_struct *tsk)
 {
 	struct mm_struct *mm;
-	struct cpuset *cs = cgroup_cs(cont);
+	struct task_struct *task;
-	struct cpuset *oldcs = cgroup_cs(oldcont);
+	struct task_struct *leader = cgroup_taskset_first(tset);
 	struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
 	struct cpuset *cs = cgroup_cs(cgrp);
 	struct cpuset *oldcs = cgroup_cs(oldcgrp);
 	cgroup_taskset_for_each(task, cgrp, tset) {
 		/*
 		 * can_attach beforehand should guarantee that this doesn't
 		 * fail.  TODO: have a better way to handle failure here
 		 */
 		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
 		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
 		cpuset_update_task_spread_flag(cs, task);
 	}
 	/*
 	 * Change mm, possibly for multiple threads in a threadgroup. This is
@ -1469,7 +1463,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 	 */
 	cpuset_attach_nodemask_from = oldcs->mems_allowed;
 	cpuset_attach_nodemask_to = cs->mems_allowed;
-	mm = get_task_mm(tsk);
+	mm = get_task_mm(leader);
 	if (mm) {
 		mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
 		if (is_memory_migrate(cs))
@ -1925,9 +1919,6 @@ struct cgroup_subsys cpuset_subsys = {
 	.create = cpuset_create,
 	.destroy = cpuset_destroy,
 	.can_attach = cpuset_can_attach,
 	.can_attach_task = cpuset_can_attach_task,
 	.pre_attach = cpuset_pre_attach,
 	.attach_task = cpuset_attach_task,
 	.attach = cpuset_attach,
 	.populate = cpuset_populate,
 	.post_clone = cpuset_post_clone,
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@ -6941,10 +6941,13 @@ static int __perf_cgroup_move(void *info)
 	return 0;
 }
-static void
+static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task)
+			       struct cgroup_taskset *tset)
 {
-	task_function_call(task, __perf_cgroup_move, task);
+	struct task_struct *task;
 	cgroup_taskset_for_each(task, cgrp, tset)
 		task_function_call(task, __perf_cgroup_move, task);
 }
 static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
@ -6958,7 +6961,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	if (!(task->flags & PF_EXITING))
 		return;
-	perf_cgroup_attach_task(cgrp, task);
+	task_function_call(task, __perf_cgroup_move, task);
 }
 struct cgroup_subsys perf_subsys = {
@ -6967,6 +6970,6 @@ struct cgroup_subsys perf_subsys = {
 	.create		= perf_cgroup_create,
 	.destroy	= perf_cgroup_destroy,
 	.exit		= perf_cgroup_exit,
-	.attach_task	= perf_cgroup_attach_task,
+	.attach		= perf_cgroup_attach,
 };
 #endif /* CONFIG_CGROUP_PERF */
--- a/kernel/fork.c
+++ b/kernel/fork.c
@ -972,7 +972,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sched_autogroup_fork(sig);
 #ifdef CONFIG_CGROUPS
-	init_rwsem(&sig->threadgroup_fork_lock);
+	init_rwsem(&sig->group_rwsem);
 #endif
 	sig->oom_adj = current->signal->oom_adj;
@ -1153,7 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	if (clone_flags & CLONE_THREAD)
-		threadgroup_fork_read_lock(current);
+		threadgroup_change_begin(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@ -1368,7 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	if (clone_flags & CLONE_THREAD)
-		threadgroup_fork_read_unlock(current);
+		threadgroup_change_end(current);
 	perf_event_fork(p);
 	return p;
@ -1403,7 +1403,7 @@ bad_fork_cleanup_policy:
 bad_fork_cleanup_cgroup:
 #endif
 	if (clone_flags & CLONE_THREAD)
-		threadgroup_fork_read_unlock(current);
+		threadgroup_change_end(current);
 	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@ -159,8 +159,7 @@ int res_counter_memparse_write_strategy(const char *buf,
 		return 0;
 	}
-	/* FIXME - make memparse() take const char* args */
+	*res = memparse(buf, &end);
 	*res = memparse((char *)buf, &end);
 	if (*end != '\0')
 		return -EINVAL;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@ -7563,24 +7563,31 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	sched_destroy_group(tg);
 }
-static int
+static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+				 struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	cgroup_taskset_for_each(task, cgrp, tset) {
 #ifdef CONFIG_RT_GROUP_SCHED
-	if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
+		if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
-		return -EINVAL;
+			return -EINVAL;
 #else
-	/* We don't support RT-tasks being in separate groups */
+		/* We don't support RT-tasks being in separate groups */
-	if (tsk->sched_class != &fair_sched_class)
+		if (task->sched_class != &fair_sched_class)
-		return -EINVAL;
+			return -EINVAL;
 #endif
 	}
 	return 0;
 }
-static void
+static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+			      struct cgroup_taskset *tset)
 {
-	sched_move_task(tsk);
+	struct task_struct *task;
 	cgroup_taskset_for_each(task, cgrp, tset)
 		sched_move_task(task);
 }
 static void
@ -7915,8 +7922,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
 	.create		= cpu_cgroup_create,
 	.destroy	= cpu_cgroup_destroy,
-	.can_attach_task = cpu_cgroup_can_attach_task,
+	.can_attach	= cpu_cgroup_can_attach,
-	.attach_task	= cpu_cgroup_attach_task,
+	.attach		= cpu_cgroup_attach,
 	.exit		= cpu_cgroup_exit,
 	.populate	= cpu_cgroup_populate,
 	.subsys_id	= cpu_cgroup_subsys_id,
--- a/kernel/signal.c
+++ b/kernel/signal.c
@ -2355,8 +2355,15 @@ void exit_signals(struct task_struct *tsk)
 	int group_stop = 0;
 	sigset_t unblocked;
 	/*
 	 * @tsk is about to have PF_EXITING set - lock out users which
 	 * expect stable threadgroup.
 	 */
 	threadgroup_change_begin(tsk);
 	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
 		tsk->flags |= PF_EXITING;
 		threadgroup_change_end(tsk);
 		return;
 	}
@ -2366,6 +2373,9 @@ void exit_signals(struct task_struct *tsk)
 	 * see wants_signal(), do_signal_stop().
 	 */
 	tsk->flags |= PF_EXITING;
 	threadgroup_change_end(tsk);
 	if (!signal_pending(tsk))
 		goto out;
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@ -5391,8 +5391,9 @@ static void mem_cgroup_clear_mc(void)
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 	struct task_struct *p = cgroup_taskset_first(tset);
 	int ret = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
@ -5430,7 +5431,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 	mem_cgroup_clear_mc();
 }
@ -5547,9 +5548,9 @@ retry:
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
-				struct cgroup *old_cont,
+				struct cgroup_taskset *tset)
 				struct task_struct *p)
 {
 	struct task_struct *p = cgroup_taskset_first(tset);
 	struct mm_struct *mm = get_task_mm(p);
 	if (mm) {
@ -5564,19 +5565,18 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 #else	/* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 	return 0;
 }
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p)
+				struct cgroup_taskset *tset)
 {
 }
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
-				struct cgroup *old_cont,
+				struct cgroup_taskset *tset)
 				struct task_struct *p)
 {
 }
 #endif
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@ -62,11 +62,12 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 struct cgroup_subsys devices_subsys;
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task)
+			struct cgroup *new_cgrp, struct cgroup_taskset *set)
 {
-	if (current != task && !capable(CAP_SYS_ADMIN))
+	struct task_struct *task = cgroup_taskset_first(set);
 			return -EPERM;
 	if (current != task && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }