From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:31:07 +1030 Subject: [PATCH] module: add syscall to load module from fd As part of the effort to create a stronger boundary between root and kernel, Chrome OS wants to be able to enforce that kernel modules are being loaded only from our read-only crypto-hash verified (dm_verity) root filesystem. Since the init_module syscall hands the kernel a module as a memory blob, no reasoning about the origin of the blob can be made. Earlier proposals for appending signatures to kernel modules would not be useful in Chrome OS, since it would involve adding an additional set of keys to our kernel and builds for no good reason: we already trust the contents of our root filesystem. We don't need to verify those kernel modules a second time. Having to do signature checking on module loading would slow us down and be redundant. All we need to know is where a module is coming from so we can say yes/no to loading it. If a file descriptor is used as the source of a kernel module, many more things can be reasoned about. In Chrome OS's case, we could enforce that the module lives on the filesystem we expect it to live on. In the case of IMA (or other LSMs), it would be possible, for example, to examine extended attributes that may contain signatures over the contents of the module. This introduces a new syscall (on x86), similar to init_module, that has only two arguments. The first argument is used as a file descriptor to the module and the second argument is a pointer to the NULL terminated string of module arguments. Signed-off-by: Kees Cook Cc: Andrew Morton Signed-off-by: Rusty Russell (merge fixes) --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 1 + kernel/module.c | 485 ++++++++++++++++++------------- kernel/sys_ni.c | 1 + 5 files changed, 282 insertions(+), 207 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index a47103fbc69..83b3838417e 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -356,3 +356,4 @@ 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp +350 i386 finit_module sys_finit_module diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index a582bfed95b..7c58c84b7bc 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -319,6 +319,7 @@ 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp +313 common finit_module sys_finit_module # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 727f0cd7392..32bc035bcd6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); +asmlinkage long sys_finit_module(int fd, const char __user *uargs); #endif diff --git a/kernel/module.c b/kernel/module.c index 6e48c3a4359..6d2c4e4ca1f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod, #endif #ifdef CONFIG_MODULE_SIG -static int module_sig_check(struct load_info *info, - const void *mod, unsigned long *_len) +static int module_sig_check(struct load_info *info) { int err = -ENOKEY; - unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; - unsigned long len = *_len; + const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; + const void *mod = info->hdr; - if (len > markerlen && - memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { + if (info->len > markerlen && + memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ - *_len -= markerlen; - err = mod_verify_sig(mod, _len); + info->len -= markerlen; + err = mod_verify_sig(mod, &info->len); } if (!err) { @@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info, return err; } #else /* !CONFIG_MODULE_SIG */ -static int module_sig_check(struct load_info *info, - void *mod, unsigned long *len) +static int module_sig_check(struct load_info *info) { return 0; } #endif /* !CONFIG_MODULE_SIG */ -/* Sets info->hdr, info->len and info->sig_ok. */ -static int copy_and_check(struct load_info *info, - const void __user *umod, unsigned long len, - const char __user *uargs) +/* Sanity checks against invalid binaries, wrong arch, weird elf version. */ +static int elf_header_check(struct load_info *info) { - int err; - Elf_Ehdr *hdr; + if (info->len < sizeof(*(info->hdr))) + return -ENOEXEC; - if (len < sizeof(*hdr)) + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 + || info->hdr->e_type != ET_REL + || !elf_check_arch(info->hdr) + || info->hdr->e_shentsize != sizeof(Elf_Shdr)) + return -ENOEXEC; + + if (info->hdr->e_shoff >= info->len + || (info->hdr->e_shnum * sizeof(Elf_Shdr) > + info->len - info->hdr->e_shoff)) + return -ENOEXEC; + + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_user(const void __user *umod, unsigned long len, + struct load_info *info) +{ + info->len = len; + if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; /* Suck in entire file: we'll want most of it. */ - if ((hdr = vmalloc(len)) == NULL) + info->hdr = vmalloc(info->len); + if (!info->hdr) return -ENOMEM; - if (copy_from_user(hdr, umod, len) != 0) { - err = -EFAULT; - goto free_hdr; + if (copy_from_user(info->hdr, umod, info->len) != 0) { + vfree(info->hdr); + return -EFAULT; } - err = module_sig_check(info, hdr, &len); - if (err) - goto free_hdr; - - /* Sanity checks against insmoding binaries or wrong arch, - weird elf version */ - if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 - || hdr->e_type != ET_REL - || !elf_check_arch(hdr) - || hdr->e_shentsize != sizeof(Elf_Shdr)) { - err = -ENOEXEC; - goto free_hdr; - } - - if (hdr->e_shoff >= len || - hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { - err = -ENOEXEC; - goto free_hdr; - } - - info->hdr = hdr; - info->len = len; return 0; +} -free_hdr: - vfree(hdr); +/* Sets info->hdr and info->len. */ +static int copy_module_from_fd(int fd, struct load_info *info) +{ + struct file *file; + int err; + struct kstat stat; + loff_t pos; + ssize_t bytes = 0; + + file = fget(fd); + if (!file) + return -ENOEXEC; + + err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); + if (err) + goto out; + + if (stat.size > INT_MAX) { + err = -EFBIG; + goto out; + } + info->hdr = vmalloc(stat.size); + if (!info->hdr) { + err = -ENOMEM; + goto out; + } + + pos = 0; + while (pos < stat.size) { + bytes = kernel_read(file, pos, (char *)(info->hdr) + pos, + stat.size - pos); + if (bytes < 0) { + vfree(info->hdr); + err = bytes; + goto out; + } + if (bytes == 0) + break; + pos += bytes; + } + info->len = pos; + +out: + fput(file); return err; } @@ -2945,156 +2983,6 @@ static bool finished_loading(const char *name) return ret; } -/* Allocate and load the module: note that size of section 0 is always - zero, and we rely on this for optional sections. */ -static struct module *load_module(void __user *umod, - unsigned long len, - const char __user *uargs) -{ - struct load_info info = { NULL, }; - struct module *mod, *old; - long err; - - pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); - - /* Copy in the blobs from userspace, check they are vaguely sane. */ - err = copy_and_check(&info, umod, len, uargs); - if (err) - return ERR_PTR(err); - - /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(&info); - if (IS_ERR(mod)) { - err = PTR_ERR(mod); - goto free_copy; - } - -#ifdef CONFIG_MODULE_SIG - mod->sig_ok = info.sig_ok; - if (!mod->sig_ok) - add_taint_module(mod, TAINT_FORCED_MODULE); -#endif - - /* Now module is in final location, initialize linked lists, etc. */ - err = module_unload_init(mod); - if (err) - goto free_module; - - /* Now we've got everything in the final locations, we can - * find optional sections. */ - find_module_sections(mod, &info); - - err = check_module_license_and_versions(mod); - if (err) - goto free_unload; - - /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, &info); - - /* Fix up syms, so that st_value is a pointer to location. */ - err = simplify_symbols(mod, &info); - if (err < 0) - goto free_modinfo; - - err = apply_relocations(mod, &info); - if (err < 0) - goto free_modinfo; - - err = post_relocation(mod, &info); - if (err < 0) - goto free_modinfo; - - flush_module_icache(mod); - - /* Now copy in args */ - mod->args = strndup_user(uargs, ~0UL >> 1); - if (IS_ERR(mod->args)) { - err = PTR_ERR(mod->args); - goto free_arch_cleanup; - } - - /* Mark state as coming so strong_try_module_get() ignores us. */ - mod->state = MODULE_STATE_COMING; - - /* Now sew it into the lists so we can get lockdep and oops - * info during argument parsing. No one should access us, since - * strong_try_module_get() will fail. - * lockdep/oops can run asynchronous, so use the RCU list insertion - * function to insert in a way safe to concurrent readers. - * The mutex protects against concurrent writers. - */ -again: - mutex_lock(&module_mutex); - if ((old = find_module(mod->name)) != NULL) { - if (old->state == MODULE_STATE_COMING) { - /* Wait in case it fails to load. */ - mutex_unlock(&module_mutex); - err = wait_event_interruptible(module_wq, - finished_loading(mod->name)); - if (err) - goto free_arch_cleanup; - goto again; - } - err = -EEXIST; - goto unlock; - } - - /* This has to be done once we're sure module name is unique. */ - dynamic_debug_setup(info.debug, info.num_debug); - - /* Find duplicate symbols */ - err = verify_export_symbols(mod); - if (err < 0) - goto ddebug; - - module_bug_finalize(info.hdr, info.sechdrs, mod); - list_add_rcu(&mod->list, &modules); - mutex_unlock(&module_mutex); - - /* Module is ready to execute: parsing args may do that. */ - err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, &ddebug_dyndbg_module_param_cb); - if (err < 0) - goto unlink; - - /* Link in to syfs. */ - err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); - if (err < 0) - goto unlink; - - /* Get rid of temporary copy. */ - free_copy(&info); - - /* Done! */ - trace_module_load(mod); - return mod; - - unlink: - mutex_lock(&module_mutex); - /* Unlink carefully: kallsyms could be walking list. */ - list_del_rcu(&mod->list); - module_bug_cleanup(mod); - wake_up_all(&module_wq); - ddebug: - dynamic_debug_remove(info.debug); - unlock: - mutex_unlock(&module_mutex); - synchronize_sched(); - kfree(mod->args); - free_arch_cleanup: - module_arch_cleanup(mod); - free_modinfo: - free_modinfo(mod); - free_unload: - module_unload_free(mod); - free_module: - module_deallocate(mod, &info); - free_copy: - free_copy(&info); - return ERR_PTR(err); -} - /* Call module constructors. */ static void do_mod_ctors(struct module *mod) { @@ -3107,21 +2995,10 @@ static void do_mod_ctors(struct module *mod) } /* This is where the real work happens */ -SYSCALL_DEFINE3(init_module, void __user *, umod, - unsigned long, len, const char __user *, uargs) +static int do_init_module(struct module *mod) { - struct module *mod; int ret = 0; - /* Must have permission */ - if (!capable(CAP_SYS_MODULE) || modules_disabled) - return -EPERM; - - /* Do all the hard work */ - mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) - return PTR_ERR(mod); - blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); @@ -3191,6 +3068,200 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, return 0; } +static int may_init_module(void) +{ + if (!capable(CAP_SYS_MODULE) || modules_disabled) + return -EPERM; + + return 0; +} + +/* Allocate and load the module: note that size of section 0 is always + zero, and we rely on this for optional sections. */ +static int load_module(struct load_info *info, const char __user *uargs) +{ + struct module *mod, *old; + long err; + + err = module_sig_check(info); + if (err) + goto free_copy; + + err = elf_header_check(info); + if (err) + goto free_copy; + + /* Figure out module layout, and allocate all the memory. */ + mod = layout_and_allocate(info); + if (IS_ERR(mod)) { + err = PTR_ERR(mod); + goto free_copy; + } + +#ifdef CONFIG_MODULE_SIG + mod->sig_ok = info->sig_ok; + if (!mod->sig_ok) + add_taint_module(mod, TAINT_FORCED_MODULE); +#endif + + /* Now module is in final location, initialize linked lists, etc. */ + err = module_unload_init(mod); + if (err) + goto free_module; + + /* Now we've got everything in the final locations, we can + * find optional sections. */ + find_module_sections(mod, info); + + err = check_module_license_and_versions(mod); + if (err) + goto free_unload; + + /* Set up MODINFO_ATTR fields */ + setup_modinfo(mod, info); + + /* Fix up syms, so that st_value is a pointer to location. */ + err = simplify_symbols(mod, info); + if (err < 0) + goto free_modinfo; + + err = apply_relocations(mod, info); + if (err < 0) + goto free_modinfo; + + err = post_relocation(mod, info); + if (err < 0) + goto free_modinfo; + + flush_module_icache(mod); + + /* Now copy in args */ + mod->args = strndup_user(uargs, ~0UL >> 1); + if (IS_ERR(mod->args)) { + err = PTR_ERR(mod->args); + goto free_arch_cleanup; + } + + /* Mark state as coming so strong_try_module_get() ignores us. */ + mod->state = MODULE_STATE_COMING; + + /* Now sew it into the lists so we can get lockdep and oops + * info during argument parsing. No one should access us, since + * strong_try_module_get() will fail. + * lockdep/oops can run asynchronous, so use the RCU list insertion + * function to insert in a way safe to concurrent readers. + * The mutex protects against concurrent writers. + */ +again: + mutex_lock(&module_mutex); + if ((old = find_module(mod->name)) != NULL) { + if (old->state == MODULE_STATE_COMING) { + /* Wait in case it fails to load. */ + mutex_unlock(&module_mutex); + err = wait_event_interruptible(module_wq, + finished_loading(mod->name)); + if (err) + goto free_arch_cleanup; + goto again; + } + err = -EEXIST; + goto unlock; + } + + /* This has to be done once we're sure module name is unique. */ + dynamic_debug_setup(info->debug, info->num_debug); + + /* Find duplicate symbols */ + err = verify_export_symbols(mod); + if (err < 0) + goto ddebug; + + module_bug_finalize(info->hdr, info->sechdrs, mod); + list_add_rcu(&mod->list, &modules); + mutex_unlock(&module_mutex); + + /* Module is ready to execute: parsing args may do that. */ + err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, + -32768, 32767, &ddebug_dyndbg_module_param_cb); + if (err < 0) + goto unlink; + + /* Link in to syfs. */ + err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); + if (err < 0) + goto unlink; + + /* Get rid of temporary copy. */ + free_copy(info); + + /* Done! */ + trace_module_load(mod); + + return do_init_module(mod); + + unlink: + mutex_lock(&module_mutex); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + module_bug_cleanup(mod); + wake_up_all(&module_wq); + ddebug: + dynamic_debug_remove(info->debug); + unlock: + mutex_unlock(&module_mutex); + synchronize_sched(); + kfree(mod->args); + free_arch_cleanup: + module_arch_cleanup(mod); + free_modinfo: + free_modinfo(mod); + free_unload: + module_unload_free(mod); + free_module: + module_deallocate(mod, info); + free_copy: + free_copy(info); + return err; +} + +SYSCALL_DEFINE3(init_module, void __user *, umod, + unsigned long, len, const char __user *, uargs) +{ + int err; + struct load_info info = { }; + + err = may_init_module(); + if (err) + return err; + + pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); + + err = copy_module_from_user(umod, len, &info); + if (err) + return err; + + return load_module(&info, uargs); +} + +SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +{ + int err; + struct load_info info = { }; + + err = may_init_module(); + if (err) + return err; + + pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); + + err = copy_module_from_fd(fd, &info); + if (err) + return err; + + return load_module(&info, uargs); +} + static inline int within(unsigned long addr, void *start, unsigned long size) { return ((void *)addr >= start && (void *)addr < start + size); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index dbff751e408..395084d4ce1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -25,6 +25,7 @@ cond_syscall(sys_swapoff); cond_syscall(sys_kexec_load); cond_syscall(compat_sys_kexec_load); cond_syscall(sys_init_module); +cond_syscall(sys_finit_module); cond_syscall(sys_delete_module); cond_syscall(sys_socketpair); cond_syscall(sys_bind);