diff --git a/kernel/sched.c b/kernel/sched.c index bfb8ad8ed17..7178b8c2351 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -151,7 +151,8 @@ static inline int task_has_rt_policy(struct task_struct *p) */ struct rt_prio_array { DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ - struct list_head queue[MAX_RT_PRIO]; + struct list_head xqueue[MAX_RT_PRIO]; /* exclusive queue */ + struct list_head squeue[MAX_RT_PRIO]; /* shared queue */ }; struct rt_bandwidth { @@ -7542,7 +7543,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) array = &rt_rq->active; for (i = 0; i < MAX_RT_PRIO; i++) { - INIT_LIST_HEAD(array->queue + i); + INIT_LIST_HEAD(array->xqueue + i); + INIT_LIST_HEAD(array->squeue + i); __clear_bit(i, array->bitmap); } /* delimiter for bitsearch: */ diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 3432d573205..fefed39fafd 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -458,7 +458,13 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) if (group_rq && rt_rq_throttled(group_rq)) return; - list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); + if (rt_se->nr_cpus_allowed == 1) + list_add_tail(&rt_se->run_list, + array->xqueue + rt_se_prio(rt_se)); + else + list_add_tail(&rt_se->run_list, + array->squeue + rt_se_prio(rt_se)); + __set_bit(rt_se_prio(rt_se), array->bitmap); inc_rt_tasks(rt_se, rt_rq); @@ -470,7 +476,8 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) struct rt_prio_array *array = &rt_rq->active; list_del_init(&rt_se->run_list); - if (list_empty(array->queue + rt_se_prio(rt_se))) + if (list_empty(array->squeue + rt_se_prio(rt_se)) + && list_empty(array->xqueue + rt_se_prio(rt_se))) __clear_bit(rt_se_prio(rt_se), array->bitmap); dec_rt_tasks(rt_se, rt_rq); @@ -537,13 +544,19 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) /* * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. + * + * Note: We always enqueue the task to the shared-queue, regardless of its + * previous position w.r.t. exclusive vs shared. This is so that exclusive RR + * tasks fairly round-robin with all tasks on the runqueue, not just other + * exclusive tasks. */ static void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) { struct rt_prio_array *array = &rt_rq->active; - list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); + list_del_init(&rt_se->run_list); + list_add_tail(&rt_se->run_list, array->squeue + rt_se_prio(rt_se)); } static void requeue_task_rt(struct rq *rq, struct task_struct *p) @@ -601,13 +614,46 @@ static int select_task_rq_rt(struct task_struct *p, int sync) } #endif /* CONFIG_SMP */ +static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, + struct rt_rq *rt_rq); + /* * Preempt the current task with a newly woken task if needed: */ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) { - if (p->prio < rq->curr->prio) + if (p->prio < rq->curr->prio) { resched_task(rq->curr); + return; + } + +#ifdef CONFIG_SMP + /* + * If: + * + * - the newly woken task is of equal priority to the current task + * - the newly woken task is non-migratable while current is migratable + * - current will be preempted on the next reschedule + * + * we should check to see if current can readily move to a different + * cpu. If so, we will reschedule to allow the push logic to try + * to move current somewhere else, making room for our non-migratable + * task. + */ + if((p->prio == rq->curr->prio) + && p->rt.nr_cpus_allowed == 1 + && rq->curr->rt.nr_cpus_allowed != 1 + && pick_next_rt_entity(rq, &rq->rt) != &rq->curr->rt) { + cpumask_t mask; + + if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + /* + * There appears to be other cpus that can accept + * current, so lets reschedule to try and push it away + */ + resched_task(rq->curr); + } +#endif } static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, @@ -621,8 +667,15 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, idx = sched_find_first_bit(array->bitmap); BUG_ON(idx >= MAX_RT_PRIO); - queue = array->queue + idx; - next = list_entry(queue->next, struct sched_rt_entity, run_list); + queue = array->xqueue + idx; + if (!list_empty(queue)) + next = list_entry(queue->next, struct sched_rt_entity, + run_list); + else { + queue = array->squeue + idx; + next = list_entry(queue->next, struct sched_rt_entity, + run_list); + } return next; } @@ -692,7 +745,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) continue; if (next && next->prio < idx) continue; - list_for_each_entry(rt_se, array->queue + idx, run_list) { + list_for_each_entry(rt_se, array->squeue + idx, run_list) { struct task_struct *p = rt_task_of(rt_se); if (pick_rt_task(rq, p, cpu)) { next = p; @@ -1146,6 +1199,14 @@ static void set_cpus_allowed_rt(struct task_struct *p, } update_rt_migration(rq); + + if (unlikely(weight == 1 || p->rt.nr_cpus_allowed == 1)) + /* + * If either the new or old weight is a "1", we need + * to requeue to properly move between shared and + * exclusive queues. + */ + requeue_task_rt(rq, p); } p->cpus_allowed = *new_mask;