diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3146ed3f6ec..0d21f79b2fe 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1407,6 +1407,31 @@ config SMP_ON_UP If you don't know what to do here, say Y. +config ARM_CPU_TOPOLOGY + bool "Support cpu topology definition" + depends on SMP && CPU_V7 + default y + help + Support ARM cpu topology definition. The MPIDR register defines + affinity between processors which is then used to describe the cpu + topology of an ARM System. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on ARM_CPU_TOPOLOGY + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_SMT + bool "SMT scheduler support" + depends on ARM_CPU_TOPOLOGY + help + Improves the CPU scheduler's decision making when dealing with + MultiThreading at a cost of slightly increased overhead in some + places. If unsure say N here. + config HAVE_ARM_SCU bool help diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index 3227ca952a1..666b278e56d 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -180,7 +180,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, return -EINVAL; mask = 0xff << shift; - bit = 1 << (cpu + shift); + bit = 1 << (cpu_logical_map(cpu) + shift); spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; @@ -259,9 +259,15 @@ static void __init gic_dist_init(struct gic_chip_data *gic, unsigned int irq_start) { unsigned int gic_irqs, irq_limit, i; + u32 cpumask; void __iomem *base = gic->dist_base; - u32 cpumask = 1 << smp_processor_id(); + u32 cpu = 0; +#ifdef CONFIG_SMP + cpu = cpu_logical_map(smp_processor_id()); +#endif + + cpumask = 1 << cpu; cpumask |= cpumask << 8; cpumask |= cpumask << 16; @@ -382,7 +388,12 @@ void __cpuinit gic_enable_ppi(unsigned int irq) #ifdef CONFIG_SMP void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { - unsigned long map = *cpus_addr(*mask); + int cpu; + unsigned long map = 0; + + /* Convert our logical CPU mask into a physical one. */ + for_each_cpu(cpu, mask) + map |= 1 << cpu_logical_map(cpu); /* * Ensure that stores to Normal memory are visible to the diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index cd4458f6417..cb47d28cbe1 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -8,6 +8,7 @@ #define CPUID_CACHETYPE 1 #define CPUID_TCM 2 #define CPUID_TLBTYPE 3 +#define CPUID_MPIDR 5 #define CPUID_EXT_PFR0 "c1, 0" #define CPUID_EXT_PFR1 "c1, 1" @@ -70,6 +71,11 @@ static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void) return read_cpuid(CPUID_TCM); } +static inline unsigned int __attribute_const__ read_cpuid_mpidr(void) +{ + return read_cpuid(CPUID_MPIDR); +} + /* * Intel's XScale3 core supports some v6 features (supersections, L2) * but advertises itself as v5 as it does not support the v6 ISA. For diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h new file mode 100644 index 00000000000..5abaf5bbd98 --- /dev/null +++ b/arch/arm/include/asm/exception.h @@ -0,0 +1,19 @@ +/* + * Annotations for marking C functions as exception handlers. + * + * These should only be used for C functions that are called from the low + * level exception entry code and not any intervening C code. + */ +#ifndef __ASM_ARM_EXCEPTION_H +#define __ASM_ARM_EXCEPTION_H + +#include + +#define __exception __attribute__((section(".exception.text"))) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define __exception_irq_entry __irq_entry +#else +#define __exception_irq_entry __exception +#endif + +#endif /* __ASM_ARM_EXCEPTION_H */ diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h index 080d74f8128..3306f281333 100644 --- a/arch/arm/include/asm/localtimer.h +++ b/arch/arm/include/asm/localtimer.h @@ -22,6 +22,10 @@ void percpu_timer_setup(void); */ asmlinkage void do_local_timer(struct pt_regs *); +/* + * Called from C code + */ +void handle_local_timer(struct pt_regs *); #ifdef CONFIG_LOCAL_TIMERS diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index e42d96a45d3..0a17b62538c 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -32,6 +32,11 @@ extern void show_ipi_list(struct seq_file *, int); */ asmlinkage void do_IPI(int ipinr, struct pt_regs *regs); +/* + * Called from C code, this handles an IPI. + */ +void handle_IPI(int ipinr, struct pt_regs *regs); + /* * Setup the set of possible CPUs (via set_cpu_possible) */ @@ -65,6 +70,12 @@ extern void platform_secondary_init(unsigned int cpu); */ extern void platform_smp_prepare_cpus(unsigned int); +/* + * Logical CPU mapping. + */ +extern int __cpu_logical_map[NR_CPUS]; +#define cpu_logical_map(cpu) __cpu_logical_map[cpu] + /* * Initial data for bringing up a secondary CPU. */ diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 832888d0c20..ed6b0499a10 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -62,13 +62,6 @@ #include -#define __exception __attribute__((section(".exception.text"))) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __exception -#endif - struct thread_info; struct task_struct; diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index accbd7cad9b..a7e457ed27c 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -1,6 +1,39 @@ #ifndef _ASM_ARM_TOPOLOGY_H #define _ASM_ARM_TOPOLOGY_H +#ifdef CONFIG_ARM_CPU_TOPOLOGY + +#include + +struct cputopo_arm { + int thread_id; + int core_id; + int socket_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cputopo_arm cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +#define mc_capable() (cpu_topology[0].socket_id != -1) +#define smt_capable() (cpu_topology[0].thread_id != -1) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(unsigned int cpu); + +#else + +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } + +#endif + #include #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index f7887dc53c1..c687bceba7d 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_CPU_HAS_PMU) += pmu.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt +obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index de3dcab8610..53919b230e8 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,8 +35,8 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index d88ff0230e8..35417d0fb8a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -31,6 +30,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include /* * as from 2.5, kernels no longer have an init_tasks structure @@ -259,6 +261,20 @@ void __ref cpu_die(void) } #endif /* CONFIG_HOTPLUG_CPU */ +int __cpu_logical_map[NR_CPUS]; + +void __init smp_setup_processor_id(void) +{ + int i; + u32 cpu = is_smp() ? read_cpuid_mpidr() & 0xff : 0; + + cpu_logical_map(0) = cpu; + for (i = 1; i < NR_CPUS; ++i) + cpu_logical_map(i) = i == cpu ? 0 : i; + + printk(KERN_INFO "Booting Linux on physical CPU %d\n", cpu); +} + /* * Called by both boot and secondaries to move global data into * per-processor storage. @@ -268,6 +284,8 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid) struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); cpu_info->loops_per_jiffy = loops_per_jiffy; + + store_cpu_topology(cpuid); } /* @@ -358,6 +376,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = num_possible_cpus(); + init_cpu_topology(); + smp_store_cpu_info(smp_processor_id()); /* @@ -459,6 +479,11 @@ static void ipi_timer(void) #ifdef CONFIG_LOCAL_TIMERS asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) +{ + handle_local_timer(regs); +} + +void handle_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -566,6 +591,11 @@ static void ipi_cpu_stop(unsigned int cpu) * Main handler for inter-processor interrupts */ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) +{ + handle_IPI(ipinr, regs); +} + +void handle_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 7fcddb75c87..8f5dd796335 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -34,7 +34,7 @@ unsigned int __init scu_get_core_count(void __iomem *scu_base) /* * Enable the SCU */ -void __init scu_enable(void __iomem *scu_base) +void scu_enable(void __iomem *scu_base) { u32 scu_ctrl; diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c new file mode 100644 index 00000000000..1040c00405d --- /dev/null +++ b/arch/arm/kernel/topology.c @@ -0,0 +1,148 @@ +/* + * arch/arm/kernel/topology.c + * + * Copyright (C) 2011 Linaro Limited. + * Written by: Vincent Guittot + * + * based on arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MPIDR_SMP_BITMASK (0x3 << 30) +#define MPIDR_SMP_VALUE (0x2 << 30) + +#define MPIDR_MT_BITMASK (0x1 << 24) + +/* + * These masks reflect the current use of the affinity levels. + * The affinity level can be up to 16 bits according to ARM ARM + */ + +#define MPIDR_LEVEL0_MASK 0x3 +#define MPIDR_LEVEL0_SHIFT 0 + +#define MPIDR_LEVEL1_MASK 0xF +#define MPIDR_LEVEL1_SHIFT 8 + +#define MPIDR_LEVEL2_MASK 0xFF +#define MPIDR_LEVEL2_SHIFT 16 + +struct cputopo_arm cpu_topology[NR_CPUS]; + +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +/* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, + * which prevents simultaneous write access to cpu_topology array + */ +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; + unsigned int mpidr; + unsigned int cpu; + + /* If the cpu topology has been already set, just return */ + if (cpuid_topo->core_id != -1) + return; + + mpidr = read_cpuid_mpidr(); + + /* create cpu topology mapping */ + if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) { + /* + * This is a multiprocessor system + * multiprocessor format & multiprocessor mode field are set + */ + + if (mpidr & MPIDR_MT_BITMASK) { + /* core performance interdependency */ + cpuid_topo->thread_id = (mpidr >> MPIDR_LEVEL0_SHIFT) + & MPIDR_LEVEL0_MASK; + cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL1_SHIFT) + & MPIDR_LEVEL1_MASK; + cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL2_SHIFT) + & MPIDR_LEVEL2_MASK; + } else { + /* largely independent cores */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL0_SHIFT) + & MPIDR_LEVEL0_MASK; + cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL1_SHIFT) + & MPIDR_LEVEL1_MASK; + } + } else { + /* + * This is an uniprocessor system + * we are in multiprocessor format but uniprocessor system + * or in the old uniprocessor format + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = 0; + cpuid_topo->socket_id = -1; + } + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + struct cputopo_arm *cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id == cpu_topo->socket_id) { + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, + &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id == cpu_topo->core_id) { + cpumask_set_cpu(cpuid, + &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, + &cpuid_topo->thread_sibling); + } + } + } + smp_wmb(); + + printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", + cpuid, cpu_topology[cpuid].thread_id, + cpu_topology[cpuid].core_id, + cpu_topology[cpuid].socket_id, mpidr); +} + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask */ + for_each_possible_cpu(cpu) { + struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + } + smp_wmb(); +} diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index bc9f9da782c..210382555af 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index b09e848eb6c..ca607571782 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -19,6 +19,8 @@ #include #include +#include + #include #include #include diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 3b5ea68acbb..aa33949fef6 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include