From f7e416eb923d0f9ff1e5e9b5c0e8b75ddd7db865 Mon Sep 17 00:00:00 2001 From: Venkatraman Sathiyamoorthy Date: Fri, 3 Aug 2012 07:58:33 +0100 Subject: [PATCH 1/9] ARM: 7482/1: topology: fix section mismatch warning for init_cpu_topology Get rid of this warning.. arch/arm/kernel/built-in.o(.text+0xac78): Section mismatch in reference from the function init_cpu_topology() to the function .init.text:parse_dt_topology() The function init_cpu_topology() references the function __init parse_dt_topology(). This is often because init_cpu_topology lacks a __init annotation or the annotation of parse_dt_topology is wrong. Signed-off-by: Venkatraman S Signed-off-by: Russell King --- arch/arm/kernel/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 198b08456e9..26c12c6440f 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -321,7 +321,7 @@ void store_cpu_topology(unsigned int cpuid) * init_cpu_topology is called at boot when only one cpu is running * which prevent simultaneous write access to cpu_topology array */ -void init_cpu_topology(void) +void __init init_cpu_topology(void) { unsigned int cpu; From 3d9fb0038a9b02febb01efc79a4a5d97f1822a90 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Aug 2012 17:24:14 +0100 Subject: [PATCH 2/9] ARM: 7483/1: vfp: only advertise VFPv4 in hwcaps if CONFIG_VFPv3 is enabled VFPv4 support depends on the VFPv3 context save/restore code, so only advertise support in the hwcaps if the kernel can actually handle it. Cc: # 3.1+ Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/vfp/vfpmodule.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index fb849d044bd..c834b32af27 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -719,8 +719,10 @@ static int __init vfp_init(void) if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100) elf_hwcap |= HWCAP_NEON; #endif +#ifdef CONFIG_VFPv3 if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000) elf_hwcap |= HWCAP_VFPv4; +#endif } } return 0; From f1898f6be9a2e3690fb62d8307849b86a89cc36c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 6 Aug 2012 18:43:42 +0100 Subject: [PATCH 3/9] ARM: 7484/1: Don't enable GENERIC_LOCKBREAK with ticket spinlocks Now that ARM has implemented its spinlocks with tickets we don't need to use the generic lockbreak algorithm. Remove the Kconfig from ARM so that we use the arch_spin_is_contended() definition from the asm header. This also saves a word in each lock because we don't need the break_lock member anymore. Signed-off-by: Stephen Boyd Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e91c7cdc6fe..e4191ccbdb7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -126,11 +126,6 @@ config TRACE_IRQFLAGS_SUPPORT bool default y -config GENERIC_LOCKBREAK - bool - default y - depends on SMP && PREEMPT - config RWSEM_GENERIC_SPINLOCK bool default y From 237ec6f2e51d2fc2ff37c7c5f1ccc9264d09c85b Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 7 Aug 2012 19:05:10 +0100 Subject: [PATCH 4/9] ARM: 7486/1: sched_clock: update epoch_cyc on resume Many clocks that are used to provide sched_clock will reset during suspend. If read_sched_clock returns 0 after suspend, sched_clock will appear to jump forward. This patch resets cd.epoch_cyc to the current value of read_sched_clock during resume, which causes sched_clock() just after suspend to return the same value as sched_clock() just before suspend. In addition, during the window where epoch_ns has been updated before suspend, but epoch_cyc has not been updated after suspend, it is unknown whether the clock has reset or not, and sched_clock() could return a bogus value. Add a suspended flag, and return the pre-suspend epoch_ns value during this period. The new behavior is triggered by calling setup_sched_clock_needs_suspend instead of setup_sched_clock. Signed-off-by: Colin Cross Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/include/asm/sched_clock.h | 2 ++ arch/arm/kernel/sched_clock.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h index e3f75726343..05b8e82ec9f 100644 --- a/arch/arm/include/asm/sched_clock.h +++ b/arch/arm/include/asm/sched_clock.h @@ -10,5 +10,7 @@ extern void sched_clock_postinit(void); extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); +extern void setup_sched_clock_needs_suspend(u32 (*read)(void), int bits, + unsigned long rate); #endif diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c index 27d186abbc0..f4515393248 100644 --- a/arch/arm/kernel/sched_clock.c +++ b/arch/arm/kernel/sched_clock.c @@ -21,6 +21,8 @@ struct clock_data { u32 epoch_cyc_copy; u32 mult; u32 shift; + bool suspended; + bool needs_suspend; }; static void sched_clock_poll(unsigned long wrap_ticks); @@ -49,6 +51,9 @@ static unsigned long long cyc_to_sched_clock(u32 cyc, u32 mask) u64 epoch_ns; u32 epoch_cyc; + if (cd.suspended) + return cd.epoch_ns; + /* * Load the epoch_cyc and epoch_ns atomically. We do this by * ensuring that we always write epoch_cyc, epoch_ns and @@ -98,6 +103,13 @@ static void sched_clock_poll(unsigned long wrap_ticks) update_sched_clock(); } +void __init setup_sched_clock_needs_suspend(u32 (*read)(void), int bits, + unsigned long rate) +{ + setup_sched_clock(read, bits, rate); + cd.needs_suspend = true; +} + void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) { unsigned long r, w; @@ -169,11 +181,23 @@ void __init sched_clock_postinit(void) static int sched_clock_suspend(void) { sched_clock_poll(sched_clock_timer.data); + if (cd.needs_suspend) + cd.suspended = true; return 0; } +static void sched_clock_resume(void) +{ + if (cd.needs_suspend) { + cd.epoch_cyc = read_sched_clock(); + cd.epoch_cyc_copy = cd.epoch_cyc; + cd.suspended = false; + } +} + static struct syscore_ops sched_clock_ops = { .suspend = sched_clock_suspend, + .resume = sched_clock_resume, }; static int __init sched_clock_syscore_init(void) From 47f1204329237a0f8655f5a9f14a38ac81946ca1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 17:51:18 +0100 Subject: [PATCH 5/9] ARM: 7487/1: mm: avoid setting nG bit for user mappings that aren't present Swap entries are encoding in ptes such that !pte_present(pte) and pte_file(pte). The remaining bits of the descriptor are used to identify the swapfile and offset within it to the swap entry. When writing such a pte for a user virtual address, set_pte_at unconditionally sets the nG bit, which (in the case of LPAE) will corrupt the swapfile offset and lead to a BUG: [ 140.494067] swap_free: Unused swap offset entry 000763b4 [ 140.509989] BUG: Bad page map in process rs:main Q:Reg pte:0ec76800 pmd:8f92e003 This patch fixes the problem by only setting the nG bit for user mappings that are actually present. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 40 ++++++++++++++++++---------------- arch/arm/mm/flush.c | 2 -- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index f66626d71e7..d88f9f049e9 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -195,25 +195,6 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) -#if __LINUX_ARM_ARCH__ < 6 -static inline void __sync_icache_dcache(pte_t pteval) -{ -} -#else -extern void __sync_icache_dcache(pte_t pteval); -#endif - -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - if (addr >= TASK_SIZE) - set_pte_ext(ptep, pteval, 0); - else { - __sync_icache_dcache(pteval); - set_pte_ext(ptep, pteval, PTE_EXT_NG); - } -} - #define pte_none(pte) (!pte_val(pte)) #define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) #define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) @@ -226,6 +207,27 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ (L_PTE_PRESENT | L_PTE_USER)) +#if __LINUX_ARM_ARCH__ < 6 +static inline void __sync_icache_dcache(pte_t pteval) +{ +} +#else +extern void __sync_icache_dcache(pte_t pteval); +#endif + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_present_user(pteval)) { + __sync_icache_dcache(pteval); + ext |= PTE_EXT_NG; + } + + set_pte_ext(ptep, pteval, ext); +} + #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 77458548e03..40ca11ed6e5 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -231,8 +231,6 @@ void __sync_icache_dcache(pte_t pteval) struct page *page; struct address_space *mapping; - if (!pte_present_user(pteval)) - return; if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) /* only flush non-aliasing VIPT caches for exec mappings */ return; From f5f2025ef3e2cdb593707cbf87378761f17befbe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 17:51:19 +0100 Subject: [PATCH 6/9] ARM: 7488/1: mm: use 5 bits for swapfile type encoding Page migration encodes the pfn in the offset field of a swp_entry_t. For LPAE, we support physical addresses of up to 36 bits (due to sparsemem limitations with the size of page flags), requiring 24 bits to represent a pfn. A further 3 bits are used to encode a swp_entry into a pte, leaving 5 bits for the type field. Furthermore, the core code defines MAX_SWAPFILES_SHIFT as 5, so the additional type bit does not get used. This patch reduces the width of the type field to 5 bits, allowing us to create up to 31 swapfiles of 64GB each. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index d88f9f049e9..41dc31f834c 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -253,13 +253,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <--------------- offset --------------------> <- type --> 0 0 0 + * <--------------- offset ----------------------> < type -> 0 0 0 * - * This gives us up to 63 swap files and 32GB per swap file. Note that + * This gives us up to 31 swap files and 64GB per swap file. Note that * the offset field is always non-zero. */ #define __SWP_TYPE_SHIFT 3 -#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) From 730a8128cd8978467eb1cf546b11014acb57d433 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Aug 2012 19:13:36 +0100 Subject: [PATCH 7/9] ARM: 7489/1: errata: fix workaround for erratum #720789 on UP systems Commit 5a783cbc4836 ("ARM: 7478/1: errata: extend workaround for erratum #720789") added workarounds for erratum #720789 to the range TLB invalidation functions with the observation that the erratum only affects SMP platforms. However, when running an SMP_ON_UP kernel on a uniprocessor platform we must take care to preserve the ASID as the workaround is not required. This patch ensures that we don't set the ASID to 0 when flushing the TLB on such a system, preserving the original behaviour with the workaround disabled. Cc: Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/tlb-v7.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index c2021139cb5..ea94765acf9 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -38,10 +38,10 @@ ENTRY(v7wbi_flush_user_tlb_range) dsb mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT -#ifdef CONFIG_ARM_ERRATA_720789 - mov r3, #0 -#else asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) #endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT From 080fc66fb5b6feae19fbbe8c6cbd7b11a4dac636 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Aug 2012 11:44:13 +0100 Subject: [PATCH 8/9] ARM: Bring back ARMv3 IO and user access code This partially reverts 357c9c1f07d4546bc3fbc0fd1044d96b114d14ed (ARM: Remove support for ARMv3 ARM610 and ARM710 CPUs). Although we only support StrongARM on the RiscPC, we need to keep the ARMv3 user access code for this platform because the bus does not understand half-word load/stores. Reported-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/lib/Makefile | 23 +- arch/arm/lib/io-readsw-armv3.S | 106 ++++++ arch/arm/lib/io-writesw-armv3.S | 126 +++++++ arch/arm/lib/uaccess.S | 564 ++++++++++++++++++++++++++++++++ 4 files changed, 816 insertions(+), 3 deletions(-) create mode 100644 arch/arm/lib/io-readsw-armv3.S create mode 100644 arch/arm/lib/io-writesw-armv3.S create mode 100644 arch/arm/lib/uaccess.S diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 2473fd1fd51..af72969820b 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -16,13 +16,30 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ call_with_stack.o mmu-y := clear_user.o copy_page.o getuser.o putuser.o -mmu-y += copy_from_user.o copy_to_user.o + +# the code in uaccess.S is not preemption safe and +# probably faster on ARMv3 only +ifeq ($(CONFIG_PREEMPT),y) + mmu-y += copy_from_user.o copy_to_user.o +else +ifneq ($(CONFIG_CPU_32v3),y) + mmu-y += copy_from_user.o copy_to_user.o +else + mmu-y += uaccess.o +endif +endif # using lib_ here won't override already available weak symbols obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o -lib-$(CONFIG_MMU) += $(mmu-y) -lib-y += io-readsw-armv4.o io-writesw-armv4.o +lib-$(CONFIG_MMU) += $(mmu-y) + +ifeq ($(CONFIG_CPU_32v3),y) + lib-y += io-readsw-armv3.o io-writesw-armv3.o +else + lib-y += io-readsw-armv4.o io-writesw-armv4.o +endif + lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o lib-$(CONFIG_ARCH_SHARK) += io-shark.o diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S new file mode 100644 index 00000000000..88487c8c4f2 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv3.S @@ -0,0 +1,106 @@ +/* + * linux/arch/arm/lib/io-readsw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +.Linsw_bad_alignment: + adr r0, .Linsw_bad_align_msg + mov r2, lr + b panic +.Linsw_bad_align_msg: + .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Linsw_align: tst r1, #1 + bne .Linsw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + moveq pc, lr + +ENTRY(__raw_readsw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .Linsw_align + +.Linsw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + + subs r2, r2, #8 + bpl .Linsw_8_lp + + tst r2, #7 + ldmeqfd sp!, {r4, r5, r6, pc} + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.Lno_insw_4: tst r2, #2 + beq .Lno_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.Lno_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + + ldmfd sp!, {r4, r5, r6, pc} + + diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S new file mode 100644 index 00000000000..49b800419e3 --- /dev/null +++ b/arch/arm/lib/io-writesw-armv3.S @@ -0,0 +1,126 @@ +/* + * linux/arch/arm/lib/io-writesw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +.Loutsw_bad_alignment: + adr r0, .Loutsw_bad_align_msg + mov r2, lr + b panic +.Loutsw_bad_align_msg: + .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Loutsw_align: tst r1, #1 + bne .Loutsw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + moveq pc, lr + +ENTRY(__raw_writesw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .Loutsw_8_lp + + tst r2, #7 + ldmeqfd sp!, {r4, r5, r6, pc} + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_4: tst r2, #2 + beq .Lno_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S new file mode 100644 index 00000000000..5c908b1cb8e --- /dev/null +++ b/arch/arm/lib/uaccess.S @@ -0,0 +1,564 @@ +/* + * linux/arch/arm/lib/uaccess.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Routines to block copy data to/from user memory + * These are highly optimised both for the 4k page size + * and for various alignments. + */ +#include +#include +#include +#include + + .text + +#define PAGE_SHIFT 12 + +/* Prototype: int __copy_to_user(void *to, const char *from, size_t n) + * Purpose : copy a block to user memory from kernel memory + * Params : to - user memory + * : from - kernel memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ + +.Lc2u_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 + ldrb r3, [r1], #1 +USER( TUSER( strb) r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + sub r2, r2, ip + b .Lc2u_dest_aligned + +ENTRY(__copy_to_user) + stmfd sp!, {r2, r4 - r7, lr} + cmp r2, #4 + blt .Lc2u_not_enough + ands ip, r0, #3 + bne .Lc2u_dest_not_aligned +.Lc2u_dest_aligned: + + ands ip, r1, #3 + bne .Lc2u_src_not_aligned +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.Lc2u_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_0nowords + ldr r3, [r1], #4 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .Lc2u_0rem8lp + +.Lc2u_0cpy8lp: ldmia r1!, {r3 - r6} + stmia r0!, {r3 - r6} @ Shouldnt fault + ldmia r1!, {r3 - r6} + subs ip, ip, #32 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_0cpy8lp + +.Lc2u_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} + stmgeia r0!, {r3 - r6} @ Shouldnt fault + tst ip, #8 + ldmneia r1!, {r3 - r4} + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + ldrne r3, [r1], #4 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_0fupi +.Lc2u_0nowords: teq ip, #0 + beq .Lc2u_finished +.Lc2u_nowords: cmp ip, #2 + ldrb r3, [r1], #1 +USER( TUSER( strb) r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished + +.Lc2u_not_enough: + movs ip, r2 + bne .Lc2u_nowords +.Lc2u_finished: mov r0, #0 + ldmfd sp!, {r2, r4 - r7, pc} + +.Lc2u_src_not_aligned: + bic r1, r1, #3 + ldr r7, [r1], #4 + cmp ip, #2 + bgt .Lc2u_3fupi + beq .Lc2u_2fupi +.Lc2u_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_1nowords + mov r3, r7, pull #8 + ldr r7, [r1], #4 + orr r3, r3, r7, push #24 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lc2u_1rem8lp + +.Lc2u_1cpy8lp: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_1cpy8lp + +.Lc2u_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #8 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #24 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_1fupi +.Lc2u_1nowords: mov r3, r7, get_byte_1 + teq ip, #0 + beq .Lc2u_finished + cmp ip, #2 +USER( TUSER( strb) r3, [r0], #1) @ May fault + movge r3, r7, get_byte_2 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + movgt r3, r7, get_byte_3 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished + +.Lc2u_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_2nowords + mov r3, r7, pull #16 + ldr r7, [r1], #4 + orr r3, r3, r7, push #16 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lc2u_2rem8lp + +.Lc2u_2cpy8lp: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_2cpy8lp + +.Lc2u_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #16 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #16 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_2fupi +.Lc2u_2nowords: mov r3, r7, get_byte_2 + teq ip, #0 + beq .Lc2u_finished + cmp ip, #2 +USER( TUSER( strb) r3, [r0], #1) @ May fault + movge r3, r7, get_byte_3 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished + +.Lc2u_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_3nowords + mov r3, r7, pull #24 + ldr r7, [r1], #4 + orr r3, r3, r7, push #8 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lc2u_3rem8lp + +.Lc2u_3cpy8lp: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_3cpy8lp + +.Lc2u_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #24 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #8 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_3fupi +.Lc2u_3nowords: mov r3, r7, get_byte_3 + teq ip, #0 + beq .Lc2u_finished + cmp ip, #2 +USER( TUSER( strb) r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished +ENDPROC(__copy_to_user) + + .pushsection .fixup,"ax" + .align 0 +9001: ldmfd sp!, {r0, r4 - r7, pc} + .popsection + +/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n); + * Purpose : copy a block from user memory to kernel memory + * Params : to - kernel memory + * : from - user memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ +.Lcfu_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 +USER( TUSER( ldrb) r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + sub r2, r2, ip + b .Lcfu_dest_aligned + +ENTRY(__copy_from_user) + stmfd sp!, {r0, r2, r4 - r7, lr} + cmp r2, #4 + blt .Lcfu_not_enough + ands ip, r0, #3 + bne .Lcfu_dest_not_aligned +.Lcfu_dest_aligned: + ands ip, r1, #3 + bne .Lcfu_src_not_aligned + +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.Lcfu_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_0nowords +USER( TUSER( ldr) r3, [r1], #4) + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .Lcfu_0rem8lp + +.Lcfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault + stmia r0!, {r3 - r6} + ldmia r1!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #32 + stmia r0!, {r3 - r6} + bpl .Lcfu_0cpy8lp + +.Lcfu_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} @ Shouldnt fault + stmgeia r0!, {r3 - r6} + tst ip, #8 + ldmneia r1!, {r3 - r4} @ Shouldnt fault + stmneia r0!, {r3 - r4} + tst ip, #4 + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_0fupi +.Lcfu_0nowords: teq ip, #0 + beq .Lcfu_finished +.Lcfu_nowords: cmp ip, #2 +USER( TUSER( ldrb) r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .Lcfu_finished + +.Lcfu_not_enough: + movs ip, r2 + bne .Lcfu_nowords +.Lcfu_finished: mov r0, #0 + add sp, sp, #8 + ldmfd sp!, {r4 - r7, pc} + +.Lcfu_src_not_aligned: + bic r1, r1, #3 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + cmp ip, #2 + bgt .Lcfu_3fupi + beq .Lcfu_2fupi +.Lcfu_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_1nowords + mov r3, r7, pull #8 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, push #24 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lcfu_1rem8lp + +.Lcfu_1cpy8lp: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} @ Shouldnt fault + subs ip, ip, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} + bpl .Lcfu_1cpy8lp + +.Lcfu_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #8 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #24 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_1fupi +.Lcfu_1nowords: mov r3, r7, get_byte_1 + teq ip, #0 + beq .Lcfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, get_byte_2 + strgeb r3, [r0], #1 + movgt r3, r7, get_byte_3 + strgtb r3, [r0], #1 + b .Lcfu_finished + +.Lcfu_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_2nowords + mov r3, r7, pull #16 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, push #16 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lcfu_2rem8lp + + +.Lcfu_2cpy8lp: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} @ Shouldnt fault + subs ip, ip, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} + bpl .Lcfu_2cpy8lp + +.Lcfu_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #16 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #16 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_2fupi +.Lcfu_2nowords: mov r3, r7, get_byte_2 + teq ip, #0 + beq .Lcfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, get_byte_3 + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault + strgtb r3, [r0], #1 + b .Lcfu_finished + +.Lcfu_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_3nowords + mov r3, r7, pull #24 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, push #8 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lcfu_3rem8lp + +.Lcfu_3cpy8lp: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} @ Shouldnt fault + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} + subs ip, ip, #16 + bpl .Lcfu_3cpy8lp + +.Lcfu_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #24 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #8 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_3fupi +.Lcfu_3nowords: mov r3, r7, get_byte_3 + teq ip, #0 + beq .Lcfu_finished + cmp ip, #2 + strb r3, [r0], #1 +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .Lcfu_finished +ENDPROC(__copy_from_user) + + .pushsection .fixup,"ax" + .align 0 + /* + * We took an exception. r0 contains a pointer to + * the byte not copied. + */ +9001: ldr r2, [sp], #4 @ void *to + sub r2, r0, r2 @ bytes copied + ldr r1, [sp], #4 @ unsigned long count + subs r4, r1, r2 @ bytes left to copy + movne r1, r4 + blne __memzero + mov r0, r4 + ldmfd sp!, {r4 - r7, pc} + .popsection + From 89868730a756feca7e4b21a97c86487cd565ed22 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 13 Aug 2012 19:57:31 +0100 Subject: [PATCH 9/9] ARM: 7490/1: Drop duplicate select for GENERIC_IRQ_PROBE Seems that Thomas' and my patches collided during the last merge window. Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e4191ccbdb7..6d6e18fee9f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -38,7 +38,6 @@ config ARM select HARDIRQS_SW_RESEND select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW - select GENERIC_IRQ_PROBE select ARCH_WANT_IPC_PARSE_VERSION select HARDIRQS_SW_RESEND select CPU_PM if (SUSPEND || CPU_IDLE)